##// END OF EJS Templates
fsmonitor: layer on another hack in bser.c for os.stat() compat (issue5811)...
Augie Fackler -
r37833:b1f62cd3 default
parent child Browse files
Show More
@@ -1,1212 +1,1223 b''
1 1 /*
2 2 Copyright (c) 2013-2015, Facebook, Inc.
3 3 All rights reserved.
4 4
5 5 Redistribution and use in source and binary forms, with or without
6 6 modification, are permitted provided that the following conditions are met:
7 7
8 8 * Redistributions of source code must retain the above copyright notice,
9 9 this list of conditions and the following disclaimer.
10 10
11 11 * Redistributions in binary form must reproduce the above copyright notice,
12 12 this list of conditions and the following disclaimer in the documentation
13 13 and/or other materials provided with the distribution.
14 14
15 15 * Neither the name Facebook nor the names of its contributors may be used to
16 16 endorse or promote products derived from this software without specific
17 17 prior written permission.
18 18
19 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 20 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 29 */
30 30
31 31 #include <Python.h>
32 32 #include <bytesobject.h>
33 33 #ifdef _MSC_VER
34 34 #define inline __inline
35 35 #if _MSC_VER >= 1800
36 36 #include <stdint.h>
37 37 #else
38 38 // The compiler associated with Python 2.7 on Windows doesn't ship
39 39 // with stdint.h, so define the small subset that we use here.
40 40 typedef __int8 int8_t;
41 41 typedef __int16 int16_t;
42 42 typedef __int32 int32_t;
43 43 typedef __int64 int64_t;
44 44 typedef unsigned __int8 uint8_t;
45 45 typedef unsigned __int16 uint16_t;
46 46 typedef unsigned __int32 uint32_t;
47 47 typedef unsigned __int64 uint64_t;
48 48 #define UINT32_MAX 4294967295U
49 49 #endif
50 50 #endif
51 51
52 52 // clang-format off
53 53 /* Return the smallest size int that can store the value */
54 54 #define INT_SIZE(x) (((x) == ((int8_t)x)) ? 1 : \
55 55 ((x) == ((int16_t)x)) ? 2 : \
56 56 ((x) == ((int32_t)x)) ? 4 : 8)
57 57
58 58 #define BSER_ARRAY 0x00
59 59 #define BSER_OBJECT 0x01
60 60 #define BSER_BYTESTRING 0x02
61 61 #define BSER_INT8 0x03
62 62 #define BSER_INT16 0x04
63 63 #define BSER_INT32 0x05
64 64 #define BSER_INT64 0x06
65 65 #define BSER_REAL 0x07
66 66 #define BSER_TRUE 0x08
67 67 #define BSER_FALSE 0x09
68 68 #define BSER_NULL 0x0a
69 69 #define BSER_TEMPLATE 0x0b
70 70 #define BSER_SKIP 0x0c
71 71 #define BSER_UTF8STRING 0x0d
72 72 // clang-format on
73 73
74 74 // An immutable object representation of BSER_OBJECT.
75 75 // Rather than build a hash table, key -> value are obtained
76 76 // by walking the list of keys to determine the offset into
77 77 // the values array. The assumption is that the number of
78 78 // array elements will be typically small (~6 for the top
79 79 // level query result and typically 3-5 for the file entries)
80 80 // so that the time overhead for this is small compared to
81 81 // using a proper hash table. Even with this simplistic
82 82 // approach, this is still faster for the mercurial use case
83 83 // as it helps to eliminate creating N other objects to
84 84 // represent the stat information in the hgwatchman extension
85 85 // clang-format off
86 86 typedef struct {
87 87 PyObject_HEAD
88 88 PyObject *keys; // tuple of field names
89 89 PyObject *values; // tuple of values
90 90 } bserObject;
91 91 // clang-format on
92 92
93 93 static Py_ssize_t bserobj_tuple_length(PyObject* o) {
94 94 bserObject* obj = (bserObject*)o;
95 95
96 96 return PySequence_Length(obj->keys);
97 97 }
98 98
99 99 static PyObject* bserobj_tuple_item(PyObject* o, Py_ssize_t i) {
100 100 bserObject* obj = (bserObject*)o;
101 101
102 102 return PySequence_GetItem(obj->values, i);
103 103 }
104 104
105 105 // clang-format off
106 106 static PySequenceMethods bserobj_sq = {
107 107 bserobj_tuple_length, /* sq_length */
108 108 0, /* sq_concat */
109 109 0, /* sq_repeat */
110 110 bserobj_tuple_item, /* sq_item */
111 111 0, /* sq_ass_item */
112 112 0, /* sq_contains */
113 113 0, /* sq_inplace_concat */
114 114 0 /* sq_inplace_repeat */
115 115 };
116 116 // clang-format on
117 117
118 118 static void bserobj_dealloc(PyObject* o) {
119 119 bserObject* obj = (bserObject*)o;
120 120
121 121 Py_CLEAR(obj->keys);
122 122 Py_CLEAR(obj->values);
123 123 PyObject_Del(o);
124 124 }
125 125
126 126 static PyObject* bserobj_getattrro(PyObject* o, PyObject* name) {
127 127 bserObject* obj = (bserObject*)o;
128 128 Py_ssize_t i, n;
129 129 PyObject* name_bytes = NULL;
130 130 PyObject* ret = NULL;
131 const char* namestr;
131 const char* namestr = NULL;
132 132
133 133 if (PyIndex_Check(name)) {
134 134 i = PyNumber_AsSsize_t(name, PyExc_IndexError);
135 135 if (i == -1 && PyErr_Occurred()) {
136 136 goto bail;
137 137 }
138 ret = PySequence_GetItem(obj->values, i);
139 goto bail;
140 }
141 138
142 // We can be passed in Unicode objects here -- we don't support anything other
143 // than UTF-8 for keys.
144 if (PyUnicode_Check(name)) {
145 name_bytes = PyUnicode_AsUTF8String(name);
146 if (name_bytes == NULL) {
139 if (i == 8 && PySequence_Size(obj->values) < 9) {
140 // Hack alert: Python 3 removed support for os.stat().st_mtime
141 // being an integer.Instead, if you need an integer, you have to
142 // use os.stat()[stat.ST_MTIME] instead. stat.ST_MTIME is 8, and
143 // our stat tuples are shorter than that, so we can detect
144 // requests for index 8 on tuples shorter than that and return
145 // st_mtime instead.
146 namestr = "st_mtime";
147 } else {
148 ret = PySequence_GetItem(obj->values, i);
147 149 goto bail;
148 150 }
149 namestr = PyBytes_AsString(name_bytes);
150 151 } else {
151 namestr = PyBytes_AsString(name);
152 // We can be passed in Unicode objects here -- we don't support anything other
153 // than UTF-8 for keys.
154 if (PyUnicode_Check(name)) {
155 name_bytes = PyUnicode_AsUTF8String(name);
156 if (name_bytes == NULL) {
157 goto bail;
158 }
159 namestr = PyBytes_AsString(name_bytes);
160 } else {
161 namestr = PyBytes_AsString(name);
162 }
152 163 }
153 164
154 165 if (namestr == NULL) {
155 166 goto bail;
156 167 }
157 168 // hack^Wfeature to allow mercurial to use "st_size" to reference "size"
158 169 if (!strncmp(namestr, "st_", 3)) {
159 170 namestr += 3;
160 171 }
161 172
162 173 n = PyTuple_GET_SIZE(obj->keys);
163 174 for (i = 0; i < n; i++) {
164 175 const char* item_name = NULL;
165 176 PyObject* key = PyTuple_GET_ITEM(obj->keys, i);
166 177
167 178 item_name = PyBytes_AsString(key);
168 179 if (!strcmp(item_name, namestr)) {
169 180 ret = PySequence_GetItem(obj->values, i);
170 181 goto bail;
171 182 }
172 183 }
173 184
174 185 PyErr_Format(
175 186 PyExc_AttributeError, "bserobject has no attribute '%.400s'", namestr);
176 187 bail:
177 188 Py_XDECREF(name_bytes);
178 189 return ret;
179 190 }
180 191
181 192 // clang-format off
182 193 static PyMappingMethods bserobj_map = {
183 194 bserobj_tuple_length, /* mp_length */
184 195 bserobj_getattrro, /* mp_subscript */
185 196 0 /* mp_ass_subscript */
186 197 };
187 198
188 199 PyTypeObject bserObjectType = {
189 200 PyVarObject_HEAD_INIT(NULL, 0)
190 201 "bserobj_tuple", /* tp_name */
191 202 sizeof(bserObject), /* tp_basicsize */
192 203 0, /* tp_itemsize */
193 204 bserobj_dealloc, /* tp_dealloc */
194 205 0, /* tp_print */
195 206 0, /* tp_getattr */
196 207 0, /* tp_setattr */
197 208 0, /* tp_compare */
198 209 0, /* tp_repr */
199 210 0, /* tp_as_number */
200 211 &bserobj_sq, /* tp_as_sequence */
201 212 &bserobj_map, /* tp_as_mapping */
202 213 0, /* tp_hash */
203 214 0, /* tp_call */
204 215 0, /* tp_str */
205 216 bserobj_getattrro, /* tp_getattro */
206 217 0, /* tp_setattro */
207 218 0, /* tp_as_buffer */
208 219 Py_TPFLAGS_DEFAULT, /* tp_flags */
209 220 "bserobj tuple", /* tp_doc */
210 221 0, /* tp_traverse */
211 222 0, /* tp_clear */
212 223 0, /* tp_richcompare */
213 224 0, /* tp_weaklistoffset */
214 225 0, /* tp_iter */
215 226 0, /* tp_iternext */
216 227 0, /* tp_methods */
217 228 0, /* tp_members */
218 229 0, /* tp_getset */
219 230 0, /* tp_base */
220 231 0, /* tp_dict */
221 232 0, /* tp_descr_get */
222 233 0, /* tp_descr_set */
223 234 0, /* tp_dictoffset */
224 235 0, /* tp_init */
225 236 0, /* tp_alloc */
226 237 0, /* tp_new */
227 238 };
228 239 // clang-format on
229 240
230 241 typedef struct loads_ctx {
231 242 int mutable;
232 243 const char* value_encoding;
233 244 const char* value_errors;
234 245 uint32_t bser_version;
235 246 uint32_t bser_capabilities;
236 247 } unser_ctx_t;
237 248
238 249 static PyObject*
239 250 bser_loads_recursive(const char** ptr, const char* end, const unser_ctx_t* ctx);
240 251
241 252 static const char bser_true = BSER_TRUE;
242 253 static const char bser_false = BSER_FALSE;
243 254 static const char bser_null = BSER_NULL;
244 255 static const char bser_bytestring_hdr = BSER_BYTESTRING;
245 256 static const char bser_array_hdr = BSER_ARRAY;
246 257 static const char bser_object_hdr = BSER_OBJECT;
247 258
248 259 static inline uint32_t next_power_2(uint32_t n) {
249 260 n |= (n >> 16);
250 261 n |= (n >> 8);
251 262 n |= (n >> 4);
252 263 n |= (n >> 2);
253 264 n |= (n >> 1);
254 265 return n + 1;
255 266 }
256 267
257 268 // A buffer we use for building up the serialized result
258 269 struct bser_buffer {
259 270 char* buf;
260 271 int wpos, allocd;
261 272 uint32_t bser_version;
262 273 uint32_t capabilities;
263 274 };
264 275 typedef struct bser_buffer bser_t;
265 276
266 277 static int bser_append(bser_t* bser, const char* data, uint32_t len) {
267 278 int newlen = next_power_2(bser->wpos + len);
268 279 if (newlen > bser->allocd) {
269 280 char* nbuf = realloc(bser->buf, newlen);
270 281 if (!nbuf) {
271 282 return 0;
272 283 }
273 284
274 285 bser->buf = nbuf;
275 286 bser->allocd = newlen;
276 287 }
277 288
278 289 memcpy(bser->buf + bser->wpos, data, len);
279 290 bser->wpos += len;
280 291 return 1;
281 292 }
282 293
283 294 static int bser_init(bser_t* bser, uint32_t version, uint32_t capabilities) {
284 295 bser->allocd = 8192;
285 296 bser->wpos = 0;
286 297 bser->buf = malloc(bser->allocd);
287 298 bser->bser_version = version;
288 299 bser->capabilities = capabilities;
289 300 if (!bser->buf) {
290 301 return 0;
291 302 }
292 303
293 304 // Leave room for the serialization header, which includes
294 305 // our overall length. To make things simpler, we'll use an
295 306 // int32 for the header
296 307 #define EMPTY_HEADER "\x00\x01\x05\x00\x00\x00\x00"
297 308
298 309 // Version 2 also carries an integer indicating the capabilities. The
299 310 // capabilities integer comes before the PDU size.
300 311 #define EMPTY_HEADER_V2 "\x00\x02\x00\x00\x00\x00\x05\x00\x00\x00\x00"
301 312 if (version == 2) {
302 313 bser_append(bser, EMPTY_HEADER_V2, sizeof(EMPTY_HEADER_V2) - 1);
303 314 } else {
304 315 bser_append(bser, EMPTY_HEADER, sizeof(EMPTY_HEADER) - 1);
305 316 }
306 317
307 318 return 1;
308 319 }
309 320
310 321 static void bser_dtor(bser_t* bser) {
311 322 free(bser->buf);
312 323 bser->buf = NULL;
313 324 }
314 325
315 326 static int bser_long(bser_t* bser, int64_t val) {
316 327 int8_t i8;
317 328 int16_t i16;
318 329 int32_t i32;
319 330 int64_t i64;
320 331 char sz;
321 332 int size = INT_SIZE(val);
322 333 char* iptr;
323 334
324 335 switch (size) {
325 336 case 1:
326 337 sz = BSER_INT8;
327 338 i8 = (int8_t)val;
328 339 iptr = (char*)&i8;
329 340 break;
330 341 case 2:
331 342 sz = BSER_INT16;
332 343 i16 = (int16_t)val;
333 344 iptr = (char*)&i16;
334 345 break;
335 346 case 4:
336 347 sz = BSER_INT32;
337 348 i32 = (int32_t)val;
338 349 iptr = (char*)&i32;
339 350 break;
340 351 case 8:
341 352 sz = BSER_INT64;
342 353 i64 = (int64_t)val;
343 354 iptr = (char*)&i64;
344 355 break;
345 356 default:
346 357 PyErr_SetString(PyExc_RuntimeError, "Cannot represent this long value!?");
347 358 return 0;
348 359 }
349 360
350 361 if (!bser_append(bser, &sz, sizeof(sz))) {
351 362 return 0;
352 363 }
353 364
354 365 return bser_append(bser, iptr, size);
355 366 }
356 367
357 368 static int bser_bytestring(bser_t* bser, PyObject* sval) {
358 369 char* buf = NULL;
359 370 Py_ssize_t len;
360 371 int res;
361 372 PyObject* utf = NULL;
362 373
363 374 if (PyUnicode_Check(sval)) {
364 375 utf = PyUnicode_AsEncodedString(sval, "utf-8", "ignore");
365 376 sval = utf;
366 377 }
367 378
368 379 res = PyBytes_AsStringAndSize(sval, &buf, &len);
369 380 if (res == -1) {
370 381 res = 0;
371 382 goto out;
372 383 }
373 384
374 385 if (!bser_append(bser, &bser_bytestring_hdr, sizeof(bser_bytestring_hdr))) {
375 386 res = 0;
376 387 goto out;
377 388 }
378 389
379 390 if (!bser_long(bser, len)) {
380 391 res = 0;
381 392 goto out;
382 393 }
383 394
384 395 if (len > UINT32_MAX) {
385 396 PyErr_Format(PyExc_ValueError, "string too big");
386 397 res = 0;
387 398 goto out;
388 399 }
389 400
390 401 res = bser_append(bser, buf, (uint32_t)len);
391 402
392 403 out:
393 404 if (utf) {
394 405 Py_DECREF(utf);
395 406 }
396 407
397 408 return res;
398 409 }
399 410
400 411 static int bser_recursive(bser_t* bser, PyObject* val) {
401 412 if (PyBool_Check(val)) {
402 413 if (val == Py_True) {
403 414 return bser_append(bser, &bser_true, sizeof(bser_true));
404 415 }
405 416 return bser_append(bser, &bser_false, sizeof(bser_false));
406 417 }
407 418
408 419 if (val == Py_None) {
409 420 return bser_append(bser, &bser_null, sizeof(bser_null));
410 421 }
411 422
412 423 // Python 3 has one integer type.
413 424 #if PY_MAJOR_VERSION < 3
414 425 if (PyInt_Check(val)) {
415 426 return bser_long(bser, PyInt_AS_LONG(val));
416 427 }
417 428 #endif // PY_MAJOR_VERSION < 3
418 429
419 430 if (PyLong_Check(val)) {
420 431 return bser_long(bser, PyLong_AsLongLong(val));
421 432 }
422 433
423 434 if (PyBytes_Check(val) || PyUnicode_Check(val)) {
424 435 return bser_bytestring(bser, val);
425 436 }
426 437
427 438 if (PyFloat_Check(val)) {
428 439 double dval = PyFloat_AS_DOUBLE(val);
429 440 char sz = BSER_REAL;
430 441
431 442 if (!bser_append(bser, &sz, sizeof(sz))) {
432 443 return 0;
433 444 }
434 445
435 446 return bser_append(bser, (char*)&dval, sizeof(dval));
436 447 }
437 448
438 449 if (PyList_Check(val)) {
439 450 Py_ssize_t i, len = PyList_GET_SIZE(val);
440 451
441 452 if (!bser_append(bser, &bser_array_hdr, sizeof(bser_array_hdr))) {
442 453 return 0;
443 454 }
444 455
445 456 if (!bser_long(bser, len)) {
446 457 return 0;
447 458 }
448 459
449 460 for (i = 0; i < len; i++) {
450 461 PyObject* ele = PyList_GET_ITEM(val, i);
451 462
452 463 if (!bser_recursive(bser, ele)) {
453 464 return 0;
454 465 }
455 466 }
456 467
457 468 return 1;
458 469 }
459 470
460 471 if (PyTuple_Check(val)) {
461 472 Py_ssize_t i, len = PyTuple_GET_SIZE(val);
462 473
463 474 if (!bser_append(bser, &bser_array_hdr, sizeof(bser_array_hdr))) {
464 475 return 0;
465 476 }
466 477
467 478 if (!bser_long(bser, len)) {
468 479 return 0;
469 480 }
470 481
471 482 for (i = 0; i < len; i++) {
472 483 PyObject* ele = PyTuple_GET_ITEM(val, i);
473 484
474 485 if (!bser_recursive(bser, ele)) {
475 486 return 0;
476 487 }
477 488 }
478 489
479 490 return 1;
480 491 }
481 492
482 493 if (PyMapping_Check(val)) {
483 494 Py_ssize_t len = PyMapping_Length(val);
484 495 Py_ssize_t pos = 0;
485 496 PyObject *key, *ele;
486 497
487 498 if (!bser_append(bser, &bser_object_hdr, sizeof(bser_object_hdr))) {
488 499 return 0;
489 500 }
490 501
491 502 if (!bser_long(bser, len)) {
492 503 return 0;
493 504 }
494 505
495 506 while (PyDict_Next(val, &pos, &key, &ele)) {
496 507 if (!bser_bytestring(bser, key)) {
497 508 return 0;
498 509 }
499 510 if (!bser_recursive(bser, ele)) {
500 511 return 0;
501 512 }
502 513 }
503 514
504 515 return 1;
505 516 }
506 517
507 518 PyErr_SetString(PyExc_ValueError, "Unsupported value type");
508 519 return 0;
509 520 }
510 521
511 522 static PyObject* bser_dumps(PyObject* self, PyObject* args, PyObject* kw) {
512 523 PyObject *val = NULL, *res;
513 524 bser_t bser;
514 525 uint32_t len, bser_version = 1, bser_capabilities = 0;
515 526
516 527 static char* kw_list[] = {"val", "version", "capabilities", NULL};
517 528
518 529 if (!PyArg_ParseTupleAndKeywords(
519 530 args,
520 531 kw,
521 532 "O|ii:dumps",
522 533 kw_list,
523 534 &val,
524 535 &bser_version,
525 536 &bser_capabilities)) {
526 537 return NULL;
527 538 }
528 539
529 540 if (!bser_init(&bser, bser_version, bser_capabilities)) {
530 541 return PyErr_NoMemory();
531 542 }
532 543
533 544 if (!bser_recursive(&bser, val)) {
534 545 bser_dtor(&bser);
535 546 if (errno == ENOMEM) {
536 547 return PyErr_NoMemory();
537 548 }
538 549 // otherwise, we've already set the error to something reasonable
539 550 return NULL;
540 551 }
541 552
542 553 // Now fill in the overall length
543 554 if (bser_version == 1) {
544 555 len = bser.wpos - (sizeof(EMPTY_HEADER) - 1);
545 556 memcpy(bser.buf + 3, &len, sizeof(len));
546 557 } else {
547 558 len = bser.wpos - (sizeof(EMPTY_HEADER_V2) - 1);
548 559 // The BSER capabilities block comes before the PDU length
549 560 memcpy(bser.buf + 2, &bser_capabilities, sizeof(bser_capabilities));
550 561 memcpy(bser.buf + 7, &len, sizeof(len));
551 562 }
552 563
553 564 res = PyBytes_FromStringAndSize(bser.buf, bser.wpos);
554 565 bser_dtor(&bser);
555 566
556 567 return res;
557 568 }
558 569
559 570 int bunser_int(const char** ptr, const char* end, int64_t* val) {
560 571 int needed;
561 572 const char* buf = *ptr;
562 573 int8_t i8;
563 574 int16_t i16;
564 575 int32_t i32;
565 576 int64_t i64;
566 577
567 578 switch (buf[0]) {
568 579 case BSER_INT8:
569 580 needed = 2;
570 581 break;
571 582 case BSER_INT16:
572 583 needed = 3;
573 584 break;
574 585 case BSER_INT32:
575 586 needed = 5;
576 587 break;
577 588 case BSER_INT64:
578 589 needed = 9;
579 590 break;
580 591 default:
581 592 PyErr_Format(
582 593 PyExc_ValueError, "invalid bser int encoding 0x%02x", buf[0]);
583 594 return 0;
584 595 }
585 596 if (end - buf < needed) {
586 597 PyErr_SetString(PyExc_ValueError, "input buffer to small for int encoding");
587 598 return 0;
588 599 }
589 600 *ptr = buf + needed;
590 601 switch (buf[0]) {
591 602 case BSER_INT8:
592 603 memcpy(&i8, buf + 1, sizeof(i8));
593 604 *val = i8;
594 605 return 1;
595 606 case BSER_INT16:
596 607 memcpy(&i16, buf + 1, sizeof(i16));
597 608 *val = i16;
598 609 return 1;
599 610 case BSER_INT32:
600 611 memcpy(&i32, buf + 1, sizeof(i32));
601 612 *val = i32;
602 613 return 1;
603 614 case BSER_INT64:
604 615 memcpy(&i64, buf + 1, sizeof(i64));
605 616 *val = i64;
606 617 return 1;
607 618 default:
608 619 return 0;
609 620 }
610 621 }
611 622
612 623 static int bunser_bytestring(
613 624 const char** ptr,
614 625 const char* end,
615 626 const char** start,
616 627 int64_t* len) {
617 628 const char* buf = *ptr;
618 629
619 630 // skip string marker
620 631 buf++;
621 632 if (!bunser_int(&buf, end, len)) {
622 633 return 0;
623 634 }
624 635
625 636 if (buf + *len > end) {
626 637 PyErr_Format(PyExc_ValueError, "invalid string length in bser data");
627 638 return 0;
628 639 }
629 640
630 641 *ptr = buf + *len;
631 642 *start = buf;
632 643 return 1;
633 644 }
634 645
635 646 static PyObject*
636 647 bunser_array(const char** ptr, const char* end, const unser_ctx_t* ctx) {
637 648 const char* buf = *ptr;
638 649 int64_t nitems, i;
639 650 int mutable = ctx->mutable;
640 651 PyObject* res;
641 652
642 653 // skip array header
643 654 buf++;
644 655 if (!bunser_int(&buf, end, &nitems)) {
645 656 return 0;
646 657 }
647 658 *ptr = buf;
648 659
649 660 if (nitems > LONG_MAX) {
650 661 PyErr_Format(PyExc_ValueError, "too many items for python array");
651 662 return NULL;
652 663 }
653 664
654 665 if (mutable) {
655 666 res = PyList_New((Py_ssize_t)nitems);
656 667 } else {
657 668 res = PyTuple_New((Py_ssize_t)nitems);
658 669 }
659 670
660 671 for (i = 0; i < nitems; i++) {
661 672 PyObject* ele = bser_loads_recursive(ptr, end, ctx);
662 673
663 674 if (!ele) {
664 675 Py_DECREF(res);
665 676 return NULL;
666 677 }
667 678
668 679 if (mutable) {
669 680 PyList_SET_ITEM(res, i, ele);
670 681 } else {
671 682 PyTuple_SET_ITEM(res, i, ele);
672 683 }
673 684 // DECREF(ele) not required as SET_ITEM steals the ref
674 685 }
675 686
676 687 return res;
677 688 }
678 689
679 690 static PyObject*
680 691 bunser_object(const char** ptr, const char* end, const unser_ctx_t* ctx) {
681 692 const char* buf = *ptr;
682 693 int64_t nitems, i;
683 694 int mutable = ctx->mutable;
684 695 PyObject* res;
685 696 bserObject* obj;
686 697
687 698 // skip array header
688 699 buf++;
689 700 if (!bunser_int(&buf, end, &nitems)) {
690 701 return 0;
691 702 }
692 703 *ptr = buf;
693 704
694 705 if (mutable) {
695 706 res = PyDict_New();
696 707 } else {
697 708 obj = PyObject_New(bserObject, &bserObjectType);
698 709 obj->keys = PyTuple_New((Py_ssize_t)nitems);
699 710 obj->values = PyTuple_New((Py_ssize_t)nitems);
700 711 res = (PyObject*)obj;
701 712 }
702 713
703 714 for (i = 0; i < nitems; i++) {
704 715 const char* keystr;
705 716 int64_t keylen;
706 717 PyObject* key;
707 718 PyObject* ele;
708 719
709 720 if (!bunser_bytestring(ptr, end, &keystr, &keylen)) {
710 721 Py_DECREF(res);
711 722 return NULL;
712 723 }
713 724
714 725 if (keylen > LONG_MAX) {
715 726 PyErr_Format(PyExc_ValueError, "string too big for python");
716 727 Py_DECREF(res);
717 728 return NULL;
718 729 }
719 730
720 731 if (mutable) {
721 732 // This will interpret the key as UTF-8.
722 733 key = PyUnicode_FromStringAndSize(keystr, (Py_ssize_t)keylen);
723 734 } else {
724 735 // For immutable objects we'll manage key lookups, so we can avoid going
725 736 // through the Unicode APIs. This avoids a potentially expensive and
726 737 // definitely unnecessary conversion to UTF-16 and back for Python 2.
727 738 // TODO: On Python 3 the Unicode APIs are smarter: we might be able to use
728 739 // Unicode keys there without an appreciable performance loss.
729 740 key = PyBytes_FromStringAndSize(keystr, (Py_ssize_t)keylen);
730 741 }
731 742
732 743 if (!key) {
733 744 Py_DECREF(res);
734 745 return NULL;
735 746 }
736 747
737 748 ele = bser_loads_recursive(ptr, end, ctx);
738 749
739 750 if (!ele) {
740 751 Py_DECREF(key);
741 752 Py_DECREF(res);
742 753 return NULL;
743 754 }
744 755
745 756 if (mutable) {
746 757 PyDict_SetItem(res, key, ele);
747 758 Py_DECREF(key);
748 759 Py_DECREF(ele);
749 760 } else {
750 761 /* PyTuple_SET_ITEM steals ele, key */
751 762 PyTuple_SET_ITEM(obj->values, i, ele);
752 763 PyTuple_SET_ITEM(obj->keys, i, key);
753 764 }
754 765 }
755 766
756 767 return res;
757 768 }
758 769
759 770 static PyObject*
760 771 bunser_template(const char** ptr, const char* end, const unser_ctx_t* ctx) {
761 772 const char* buf = *ptr;
762 773 int64_t nitems, i;
763 774 int mutable = ctx->mutable;
764 775 PyObject* arrval;
765 776 PyObject* keys;
766 777 Py_ssize_t numkeys, keyidx;
767 778 unser_ctx_t keys_ctx = {0};
768 779 if (mutable) {
769 780 keys_ctx.mutable = 1;
770 781 // Decode keys as UTF-8 in this case.
771 782 keys_ctx.value_encoding = "utf-8";
772 783 keys_ctx.value_errors = "strict";
773 784 } else {
774 785 // Treat keys as bytestrings in this case -- we'll do Unicode conversions at
775 786 // lookup time.
776 787 }
777 788
778 789 if (buf[1] != BSER_ARRAY) {
779 790 PyErr_Format(PyExc_ValueError, "Expect ARRAY to follow TEMPLATE");
780 791 return NULL;
781 792 }
782 793
783 794 // skip header
784 795 buf++;
785 796 *ptr = buf;
786 797
787 798 // Load template keys.
788 799 // For keys we don't want to do any decoding right now.
789 800 keys = bunser_array(ptr, end, &keys_ctx);
790 801 if (!keys) {
791 802 return NULL;
792 803 }
793 804
794 805 numkeys = PySequence_Length(keys);
795 806
796 807 // Load number of array elements
797 808 if (!bunser_int(ptr, end, &nitems)) {
798 809 Py_DECREF(keys);
799 810 return 0;
800 811 }
801 812
802 813 if (nitems > LONG_MAX) {
803 814 PyErr_Format(PyExc_ValueError, "Too many items for python");
804 815 Py_DECREF(keys);
805 816 return NULL;
806 817 }
807 818
808 819 arrval = PyList_New((Py_ssize_t)nitems);
809 820 if (!arrval) {
810 821 Py_DECREF(keys);
811 822 return NULL;
812 823 }
813 824
814 825 for (i = 0; i < nitems; i++) {
815 826 PyObject* dict = NULL;
816 827 bserObject* obj = NULL;
817 828
818 829 if (mutable) {
819 830 dict = PyDict_New();
820 831 } else {
821 832 obj = PyObject_New(bserObject, &bserObjectType);
822 833 if (obj) {
823 834 obj->keys = keys;
824 835 Py_INCREF(obj->keys);
825 836 obj->values = PyTuple_New(numkeys);
826 837 }
827 838 dict = (PyObject*)obj;
828 839 }
829 840 if (!dict) {
830 841 fail:
831 842 Py_DECREF(keys);
832 843 Py_DECREF(arrval);
833 844 return NULL;
834 845 }
835 846
836 847 for (keyidx = 0; keyidx < numkeys; keyidx++) {
837 848 PyObject* key;
838 849 PyObject* ele;
839 850
840 851 if (**ptr == BSER_SKIP) {
841 852 *ptr = *ptr + 1;
842 853 ele = Py_None;
843 854 Py_INCREF(ele);
844 855 } else {
845 856 ele = bser_loads_recursive(ptr, end, ctx);
846 857 }
847 858
848 859 if (!ele) {
849 860 goto fail;
850 861 }
851 862
852 863 if (mutable) {
853 864 key = PyList_GET_ITEM(keys, keyidx);
854 865 PyDict_SetItem(dict, key, ele);
855 866 Py_DECREF(ele);
856 867 } else {
857 868 PyTuple_SET_ITEM(obj->values, keyidx, ele);
858 869 // DECREF(ele) not required as SET_ITEM steals the ref
859 870 }
860 871 }
861 872
862 873 PyList_SET_ITEM(arrval, i, dict);
863 874 // DECREF(obj) not required as SET_ITEM steals the ref
864 875 }
865 876
866 877 Py_DECREF(keys);
867 878
868 879 return arrval;
869 880 }
870 881
871 882 static PyObject* bser_loads_recursive(
872 883 const char** ptr,
873 884 const char* end,
874 885 const unser_ctx_t* ctx) {
875 886 const char* buf = *ptr;
876 887
877 888 switch (buf[0]) {
878 889 case BSER_INT8:
879 890 case BSER_INT16:
880 891 case BSER_INT32:
881 892 case BSER_INT64: {
882 893 int64_t ival;
883 894 if (!bunser_int(ptr, end, &ival)) {
884 895 return NULL;
885 896 }
886 897 // Python 3 has one integer type.
887 898 #if PY_MAJOR_VERSION >= 3
888 899 return PyLong_FromLongLong(ival);
889 900 #else
890 901 if (ival < LONG_MIN || ival > LONG_MAX) {
891 902 return PyLong_FromLongLong(ival);
892 903 }
893 904 return PyInt_FromSsize_t(Py_SAFE_DOWNCAST(ival, int64_t, Py_ssize_t));
894 905 #endif // PY_MAJOR_VERSION >= 3
895 906 }
896 907
897 908 case BSER_REAL: {
898 909 double dval;
899 910 memcpy(&dval, buf + 1, sizeof(dval));
900 911 *ptr = buf + 1 + sizeof(double);
901 912 return PyFloat_FromDouble(dval);
902 913 }
903 914
904 915 case BSER_TRUE:
905 916 *ptr = buf + 1;
906 917 Py_INCREF(Py_True);
907 918 return Py_True;
908 919
909 920 case BSER_FALSE:
910 921 *ptr = buf + 1;
911 922 Py_INCREF(Py_False);
912 923 return Py_False;
913 924
914 925 case BSER_NULL:
915 926 *ptr = buf + 1;
916 927 Py_INCREF(Py_None);
917 928 return Py_None;
918 929
919 930 case BSER_BYTESTRING: {
920 931 const char* start;
921 932 int64_t len;
922 933
923 934 if (!bunser_bytestring(ptr, end, &start, &len)) {
924 935 return NULL;
925 936 }
926 937
927 938 if (len > LONG_MAX) {
928 939 PyErr_Format(PyExc_ValueError, "string too long for python");
929 940 return NULL;
930 941 }
931 942
932 943 if (ctx->value_encoding != NULL) {
933 944 return PyUnicode_Decode(
934 945 start, (long)len, ctx->value_encoding, ctx->value_errors);
935 946 } else {
936 947 return PyBytes_FromStringAndSize(start, (long)len);
937 948 }
938 949 }
939 950
940 951 case BSER_UTF8STRING: {
941 952 const char* start;
942 953 int64_t len;
943 954
944 955 if (!bunser_bytestring(ptr, end, &start, &len)) {
945 956 return NULL;
946 957 }
947 958
948 959 if (len > LONG_MAX) {
949 960 PyErr_Format(PyExc_ValueError, "string too long for python");
950 961 return NULL;
951 962 }
952 963
953 964 return PyUnicode_Decode(start, (long)len, "utf-8", "strict");
954 965 }
955 966
956 967 case BSER_ARRAY:
957 968 return bunser_array(ptr, end, ctx);
958 969
959 970 case BSER_OBJECT:
960 971 return bunser_object(ptr, end, ctx);
961 972
962 973 case BSER_TEMPLATE:
963 974 return bunser_template(ptr, end, ctx);
964 975
965 976 default:
966 977 PyErr_Format(PyExc_ValueError, "unhandled bser opcode 0x%02x", buf[0]);
967 978 }
968 979
969 980 return NULL;
970 981 }
971 982
972 983 static int _pdu_info_helper(
973 984 const char* data,
974 985 const char* end,
975 986 uint32_t* bser_version_out,
976 987 uint32_t* bser_capabilities_out,
977 988 int64_t* expected_len_out,
978 989 off_t* position_out) {
979 990 uint32_t bser_version;
980 991 uint32_t bser_capabilities = 0;
981 992 int64_t expected_len;
982 993
983 994 const char* start;
984 995 start = data;
985 996 // Validate the header and length
986 997 if (memcmp(data, EMPTY_HEADER, 2) == 0) {
987 998 bser_version = 1;
988 999 } else if (memcmp(data, EMPTY_HEADER_V2, 2) == 0) {
989 1000 bser_version = 2;
990 1001 } else {
991 1002 PyErr_SetString(PyExc_ValueError, "invalid bser header");
992 1003 return 0;
993 1004 }
994 1005
995 1006 data += 2;
996 1007
997 1008 if (bser_version == 2) {
998 1009 // Expect an integer telling us what capabilities are supported by the
999 1010 // remote server (currently unused).
1000 1011 if (!memcpy(&bser_capabilities, &data, sizeof(bser_capabilities))) {
1001 1012 return 0;
1002 1013 }
1003 1014 data += sizeof(bser_capabilities);
1004 1015 }
1005 1016
1006 1017 // Expect an integer telling us how big the rest of the data
1007 1018 // should be
1008 1019 if (!bunser_int(&data, end, &expected_len)) {
1009 1020 return 0;
1010 1021 }
1011 1022
1012 1023 *bser_version_out = bser_version;
1013 1024 *bser_capabilities_out = (uint32_t)bser_capabilities;
1014 1025 *expected_len_out = expected_len;
1015 1026 *position_out = (off_t)(data - start);
1016 1027 return 1;
1017 1028 }
1018 1029
1019 1030 // This function parses the PDU header and provides info about the packet
1020 1031 // Returns false if unsuccessful
1021 1032 static int pdu_info_helper(
1022 1033 PyObject* self,
1023 1034 PyObject* args,
1024 1035 uint32_t* bser_version_out,
1025 1036 uint32_t* bser_capabilities_out,
1026 1037 int64_t* total_len_out) {
1027 1038 const char* start = NULL;
1028 1039 const char* data = NULL;
1029 1040 int datalen = 0;
1030 1041 const char* end;
1031 1042 int64_t expected_len;
1032 1043 off_t position;
1033 1044
1034 1045 if (!PyArg_ParseTuple(args, "s#", &start, &datalen)) {
1035 1046 return 0;
1036 1047 }
1037 1048 data = start;
1038 1049 end = data + datalen;
1039 1050
1040 1051 if (!_pdu_info_helper(
1041 1052 data,
1042 1053 end,
1043 1054 bser_version_out,
1044 1055 bser_capabilities_out,
1045 1056 &expected_len,
1046 1057 &position)) {
1047 1058 return 0;
1048 1059 }
1049 1060 *total_len_out = (int64_t)(expected_len + position);
1050 1061 return 1;
1051 1062 }
1052 1063
1053 1064 // Expected use case is to read a packet from the socket and then call
1054 1065 // bser.pdu_info on the packet. It returns the BSER version, BSER capabilities,
1055 1066 // and the total length of the entire response that the peer is sending,
1056 1067 // including the bytes already received. This allows the client to compute the
1057 1068 // data size it needs to read before it can decode the data.
1058 1069 static PyObject* bser_pdu_info(PyObject* self, PyObject* args) {
1059 1070 uint32_t version, capabilities;
1060 1071 int64_t total_len;
1061 1072 if (!pdu_info_helper(self, args, &version, &capabilities, &total_len)) {
1062 1073 return NULL;
1063 1074 }
1064 1075 return Py_BuildValue("kkL", version, capabilities, total_len);
1065 1076 }
1066 1077
1067 1078 static PyObject* bser_pdu_len(PyObject* self, PyObject* args) {
1068 1079 uint32_t version, capabilities;
1069 1080 int64_t total_len;
1070 1081 if (!pdu_info_helper(self, args, &version, &capabilities, &total_len)) {
1071 1082 return NULL;
1072 1083 }
1073 1084 return Py_BuildValue("L", total_len);
1074 1085 }
1075 1086
1076 1087 static PyObject* bser_loads(PyObject* self, PyObject* args, PyObject* kw) {
1077 1088 const char* data = NULL;
1078 1089 int datalen = 0;
1079 1090 const char* start;
1080 1091 const char* end;
1081 1092 int64_t expected_len;
1082 1093 off_t position;
1083 1094 PyObject* mutable_obj = NULL;
1084 1095 const char* value_encoding = NULL;
1085 1096 const char* value_errors = NULL;
1086 1097 unser_ctx_t ctx = {1, 0};
1087 1098
1088 1099 static char* kw_list[] = {
1089 1100 "buf", "mutable", "value_encoding", "value_errors", NULL};
1090 1101
1091 1102 if (!PyArg_ParseTupleAndKeywords(
1092 1103 args,
1093 1104 kw,
1094 1105 "s#|Ozz:loads",
1095 1106 kw_list,
1096 1107 &start,
1097 1108 &datalen,
1098 1109 &mutable_obj,
1099 1110 &value_encoding,
1100 1111 &value_errors)) {
1101 1112 return NULL;
1102 1113 }
1103 1114
1104 1115 if (mutable_obj) {
1105 1116 ctx.mutable = PyObject_IsTrue(mutable_obj) > 0 ? 1 : 0;
1106 1117 }
1107 1118 ctx.value_encoding = value_encoding;
1108 1119 if (value_encoding == NULL) {
1109 1120 ctx.value_errors = NULL;
1110 1121 } else if (value_errors == NULL) {
1111 1122 ctx.value_errors = "strict";
1112 1123 } else {
1113 1124 ctx.value_errors = value_errors;
1114 1125 }
1115 1126 data = start;
1116 1127 end = data + datalen;
1117 1128
1118 1129 if (!_pdu_info_helper(
1119 1130 data,
1120 1131 end,
1121 1132 &ctx.bser_version,
1122 1133 &ctx.bser_capabilities,
1123 1134 &expected_len,
1124 1135 &position)) {
1125 1136 return NULL;
1126 1137 }
1127 1138
1128 1139 data = start + position;
1129 1140 // Verify
1130 1141 if (expected_len + data != end) {
1131 1142 PyErr_SetString(PyExc_ValueError, "bser data len != header len");
1132 1143 return NULL;
1133 1144 }
1134 1145
1135 1146 return bser_loads_recursive(&data, end, &ctx);
1136 1147 }
1137 1148
1138 1149 static PyObject* bser_load(PyObject* self, PyObject* args, PyObject* kw) {
1139 1150 PyObject *load, *string;
1140 1151 PyObject* fp = NULL;
1141 1152 PyObject* mutable_obj = NULL;
1142 1153 const char* value_encoding = NULL;
1143 1154 const char* value_errors = NULL;
1144 1155
1145 1156 static char* kw_list[] = {
1146 1157 "fp", "mutable", "value_encoding", "value_errors", NULL};
1147 1158
1148 1159 if (!PyArg_ParseTupleAndKeywords(
1149 1160 args,
1150 1161 kw,
1151 1162 "OOzz:load",
1152 1163 kw_list,
1153 1164 &fp,
1154 1165 &mutable_obj,
1155 1166 &value_encoding,
1156 1167 &value_errors)) {
1157 1168 return NULL;
1158 1169 }
1159 1170
1160 1171 load = PyImport_ImportModule("pywatchman.load");
1161 1172 if (load == NULL) {
1162 1173 return NULL;
1163 1174 }
1164 1175 string = PyObject_CallMethod(
1165 1176 load, "load", "OOzz", fp, mutable_obj, value_encoding, value_errors);
1166 1177 Py_DECREF(load);
1167 1178 return string;
1168 1179 }
1169 1180
1170 1181 // clang-format off
1171 1182 static PyMethodDef bser_methods[] = {
1172 1183 {"loads", (PyCFunction)bser_loads, METH_VARARGS | METH_KEYWORDS,
1173 1184 "Deserialize string."},
1174 1185 {"load", (PyCFunction)bser_load, METH_VARARGS | METH_KEYWORDS,
1175 1186 "Deserialize a file object"},
1176 1187 {"pdu_info", (PyCFunction)bser_pdu_info, METH_VARARGS,
1177 1188 "Extract PDU information."},
1178 1189 {"pdu_len", (PyCFunction)bser_pdu_len, METH_VARARGS,
1179 1190 "Extract total PDU length."},
1180 1191 {"dumps", (PyCFunction)bser_dumps, METH_VARARGS | METH_KEYWORDS,
1181 1192 "Serialize string."},
1182 1193 {NULL, NULL, 0, NULL}
1183 1194 };
1184 1195
1185 1196 #if PY_MAJOR_VERSION >= 3
1186 1197 static struct PyModuleDef bser_module = {
1187 1198 PyModuleDef_HEAD_INIT,
1188 1199 "bser",
1189 1200 "Efficient encoding and decoding of BSER.",
1190 1201 -1,
1191 1202 bser_methods
1192 1203 };
1193 1204 // clang-format on
1194 1205
1195 1206 PyMODINIT_FUNC PyInit_bser(void) {
1196 1207 PyObject* mod;
1197 1208
1198 1209 mod = PyModule_Create(&bser_module);
1199 1210 PyType_Ready(&bserObjectType);
1200 1211
1201 1212 return mod;
1202 1213 }
1203 1214 #else
1204 1215
1205 1216 PyMODINIT_FUNC initbser(void) {
1206 1217 (void)Py_InitModule("bser", bser_methods);
1207 1218 PyType_Ready(&bserObjectType);
1208 1219 }
1209 1220 #endif // PY_MAJOR_VERSION >= 3
1210 1221
1211 1222 /* vim:ts=2:sw=2:et:
1212 1223 */
General Comments 0
You need to be logged in to leave comments. Login now