##// END OF EJS Templates
parsers: fix list sizing rounding error (SEC)...
Matt Mackall -
r28656:b6ed2505 stable
parent child Browse files
Show More
@@ -0,0 +1,15 b''
1 Test for CVE-2016-3630
2
3 $ hg init
4
5 >>> open("a.i", "w").write(
6 ... """eJxjYGZgZIAAYQYGxhgom+k/FMx8YKx9ZUaKSOyqo4cnuKb8mbqHV5cBCVTMWb1Cwqkhe4Gsg9AD
7 ... Joa3dYtcYYYBAQ8Qr4OqZAYRICPTSr5WKd/42rV36d+8/VmrNpv7NP1jQAXrQE4BqQUARngwVA=="""
8 ... .decode("base64").decode("zlib"))
9
10 $ hg debugindex a.i
11 rev offset length delta linkrev nodeid p1 p2
12 0 0 19 -1 2 99e0332bd498 000000000000 000000000000
13 1 19 12 0 3 6674f57a23d8 99e0332bd498 000000000000
14 $ hg debugdata a.i 1 2>&1 | grep decoded
15 mpatch.mpatchError: patch cannot be decoded
@@ -1,420 +1,420 b''
1 1 /*
2 2 mpatch.c - efficient binary patching for Mercurial
3 3
4 4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 5 size of the output and n is the number of patches.
6 6
7 7 Given a list of binary patches, it unpacks each into a hunk list,
8 8 then combines the hunk lists with a treewise recursion to form a
9 9 single hunk list. This hunk list is then applied to the original
10 10 text.
11 11
12 12 The text (or binary) fragments are copied directly from their source
13 13 Python objects into a preallocated output string to avoid the
14 14 allocation of intermediate Python objects. Working memory is about 2x
15 15 the total number of hunks.
16 16
17 17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18 18
19 19 This software may be used and distributed according to the terms
20 20 of the GNU General Public License, incorporated herein by reference.
21 21 */
22 22
23 23 #define PY_SSIZE_T_CLEAN
24 24 #include <Python.h>
25 25 #include <stdlib.h>
26 26 #include <string.h>
27 27
28 28 #include "util.h"
29 29
30 30 static char mpatch_doc[] = "Efficient binary patching.";
31 31 static PyObject *mpatch_Error;
32 32
33 33 struct frag {
34 34 int start, end, len;
35 35 const char *data;
36 36 };
37 37
38 38 struct flist {
39 39 struct frag *base, *head, *tail;
40 40 };
41 41
42 42 static struct flist *lalloc(Py_ssize_t size)
43 43 {
44 44 struct flist *a = NULL;
45 45
46 46 if (size < 1)
47 47 size = 1;
48 48
49 49 a = (struct flist *)malloc(sizeof(struct flist));
50 50 if (a) {
51 51 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
52 52 if (a->base) {
53 53 a->head = a->tail = a->base;
54 54 return a;
55 55 }
56 56 free(a);
57 57 a = NULL;
58 58 }
59 59 if (!PyErr_Occurred())
60 60 PyErr_NoMemory();
61 61 return NULL;
62 62 }
63 63
64 64 static void lfree(struct flist *a)
65 65 {
66 66 if (a) {
67 67 free(a->base);
68 68 free(a);
69 69 }
70 70 }
71 71
72 72 static Py_ssize_t lsize(struct flist *a)
73 73 {
74 74 return a->tail - a->head;
75 75 }
76 76
77 77 /* move hunks in source that are less cut to dest, compensating
78 78 for changes in offset. the last hunk may be split if necessary.
79 79 */
80 80 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
81 81 {
82 82 struct frag *d = dest->tail, *s = src->head;
83 83 int postend, c, l;
84 84
85 85 while (s != src->tail) {
86 86 if (s->start + offset >= cut)
87 87 break; /* we've gone far enough */
88 88
89 89 postend = offset + s->start + s->len;
90 90 if (postend <= cut) {
91 91 /* save this hunk */
92 92 offset += s->start + s->len - s->end;
93 93 *d++ = *s++;
94 94 }
95 95 else {
96 96 /* break up this hunk */
97 97 c = cut - offset;
98 98 if (s->end < c)
99 99 c = s->end;
100 100 l = cut - offset - s->start;
101 101 if (s->len < l)
102 102 l = s->len;
103 103
104 104 offset += s->start + l - c;
105 105
106 106 d->start = s->start;
107 107 d->end = c;
108 108 d->len = l;
109 109 d->data = s->data;
110 110 d++;
111 111 s->start = c;
112 112 s->len = s->len - l;
113 113 s->data = s->data + l;
114 114
115 115 break;
116 116 }
117 117 }
118 118
119 119 dest->tail = d;
120 120 src->head = s;
121 121 return offset;
122 122 }
123 123
124 124 /* like gather, but with no output list */
125 125 static int discard(struct flist *src, int cut, int offset)
126 126 {
127 127 struct frag *s = src->head;
128 128 int postend, c, l;
129 129
130 130 while (s != src->tail) {
131 131 if (s->start + offset >= cut)
132 132 break;
133 133
134 134 postend = offset + s->start + s->len;
135 135 if (postend <= cut) {
136 136 offset += s->start + s->len - s->end;
137 137 s++;
138 138 }
139 139 else {
140 140 c = cut - offset;
141 141 if (s->end < c)
142 142 c = s->end;
143 143 l = cut - offset - s->start;
144 144 if (s->len < l)
145 145 l = s->len;
146 146
147 147 offset += s->start + l - c;
148 148 s->start = c;
149 149 s->len = s->len - l;
150 150 s->data = s->data + l;
151 151
152 152 break;
153 153 }
154 154 }
155 155
156 156 src->head = s;
157 157 return offset;
158 158 }
159 159
160 160 /* combine hunk lists a and b, while adjusting b for offset changes in a/
161 161 this deletes a and b and returns the resultant list. */
162 162 static struct flist *combine(struct flist *a, struct flist *b)
163 163 {
164 164 struct flist *c = NULL;
165 165 struct frag *bh, *ct;
166 166 int offset = 0, post;
167 167
168 168 if (a && b)
169 169 c = lalloc((lsize(a) + lsize(b)) * 2);
170 170
171 171 if (c) {
172 172
173 173 for (bh = b->head; bh != b->tail; bh++) {
174 174 /* save old hunks */
175 175 offset = gather(c, a, bh->start, offset);
176 176
177 177 /* discard replaced hunks */
178 178 post = discard(a, bh->end, offset);
179 179
180 180 /* insert new hunk */
181 181 ct = c->tail;
182 182 ct->start = bh->start - offset;
183 183 ct->end = bh->end - post;
184 184 ct->len = bh->len;
185 185 ct->data = bh->data;
186 186 c->tail++;
187 187 offset = post;
188 188 }
189 189
190 190 /* hold on to tail from a */
191 191 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
192 192 c->tail += lsize(a);
193 193 }
194 194
195 195 lfree(a);
196 196 lfree(b);
197 197 return c;
198 198 }
199 199
200 200 /* decode a binary patch into a hunk list */
201 201 static struct flist *decode(const char *bin, Py_ssize_t len)
202 202 {
203 203 struct flist *l;
204 204 struct frag *lt;
205 205 int pos = 0;
206 206
207 207 /* assume worst case size, we won't have many of these lists */
208 l = lalloc(len / 12);
208 l = lalloc(len / 12 + 1);
209 209 if (!l)
210 210 return NULL;
211 211
212 212 lt = l->tail;
213 213
214 214 while (pos >= 0 && pos < len) {
215 215 lt->start = getbe32(bin + pos);
216 216 lt->end = getbe32(bin + pos + 4);
217 217 lt->len = getbe32(bin + pos + 8);
218 218 if (lt->start > lt->end)
219 219 break; /* sanity check */
220 220 lt->data = bin + pos + 12;
221 221 pos += 12 + lt->len;
222 222 lt++;
223 223 }
224 224
225 225 if (pos != len) {
226 226 if (!PyErr_Occurred())
227 227 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
228 228 lfree(l);
229 229 return NULL;
230 230 }
231 231
232 232 l->tail = lt;
233 233 return l;
234 234 }
235 235
236 236 /* calculate the size of resultant text */
237 237 static Py_ssize_t calcsize(Py_ssize_t len, struct flist *l)
238 238 {
239 239 Py_ssize_t outlen = 0, last = 0;
240 240 struct frag *f = l->head;
241 241
242 242 while (f != l->tail) {
243 243 if (f->start < last || f->end > len) {
244 244 if (!PyErr_Occurred())
245 245 PyErr_SetString(mpatch_Error,
246 246 "invalid patch");
247 247 return -1;
248 248 }
249 249 outlen += f->start - last;
250 250 last = f->end;
251 251 outlen += f->len;
252 252 f++;
253 253 }
254 254
255 255 outlen += len - last;
256 256 return outlen;
257 257 }
258 258
259 259 static int apply(char *buf, const char *orig, Py_ssize_t len, struct flist *l)
260 260 {
261 261 struct frag *f = l->head;
262 262 int last = 0;
263 263 char *p = buf;
264 264
265 265 while (f != l->tail) {
266 266 if (f->start < last || f->end > len) {
267 267 if (!PyErr_Occurred())
268 268 PyErr_SetString(mpatch_Error,
269 269 "invalid patch");
270 270 return 0;
271 271 }
272 272 memcpy(p, orig + last, f->start - last);
273 273 p += f->start - last;
274 274 memcpy(p, f->data, f->len);
275 275 last = f->end;
276 276 p += f->len;
277 277 f++;
278 278 }
279 279 memcpy(p, orig + last, len - last);
280 280 return 1;
281 281 }
282 282
283 283 /* recursively generate a patch of all bins between start and end */
284 284 static struct flist *fold(PyObject *bins, Py_ssize_t start, Py_ssize_t end)
285 285 {
286 286 Py_ssize_t len, blen;
287 287 const char *buffer;
288 288
289 289 if (start + 1 == end) {
290 290 /* trivial case, output a decoded list */
291 291 PyObject *tmp = PyList_GetItem(bins, start);
292 292 if (!tmp)
293 293 return NULL;
294 294 if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
295 295 return NULL;
296 296 return decode(buffer, blen);
297 297 }
298 298
299 299 /* divide and conquer, memory management is elsewhere */
300 300 len = (end - start) / 2;
301 301 return combine(fold(bins, start, start + len),
302 302 fold(bins, start + len, end));
303 303 }
304 304
305 305 static PyObject *
306 306 patches(PyObject *self, PyObject *args)
307 307 {
308 308 PyObject *text, *bins, *result;
309 309 struct flist *patch;
310 310 const char *in;
311 311 char *out;
312 312 Py_ssize_t len, outlen, inlen;
313 313
314 314 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
315 315 return NULL;
316 316
317 317 len = PyList_Size(bins);
318 318 if (!len) {
319 319 /* nothing to do */
320 320 Py_INCREF(text);
321 321 return text;
322 322 }
323 323
324 324 if (PyObject_AsCharBuffer(text, &in, &inlen))
325 325 return NULL;
326 326
327 327 patch = fold(bins, 0, len);
328 328 if (!patch)
329 329 return NULL;
330 330
331 331 outlen = calcsize(inlen, patch);
332 332 if (outlen < 0) {
333 333 result = NULL;
334 334 goto cleanup;
335 335 }
336 336 result = PyBytes_FromStringAndSize(NULL, outlen);
337 337 if (!result) {
338 338 result = NULL;
339 339 goto cleanup;
340 340 }
341 341 out = PyBytes_AsString(result);
342 342 if (!apply(out, in, inlen, patch)) {
343 343 Py_DECREF(result);
344 344 result = NULL;
345 345 }
346 346 cleanup:
347 347 lfree(patch);
348 348 return result;
349 349 }
350 350
351 351 /* calculate size of a patched file directly */
352 352 static PyObject *
353 353 patchedsize(PyObject *self, PyObject *args)
354 354 {
355 355 long orig, start, end, len, outlen = 0, last = 0, pos = 0;
356 356 Py_ssize_t patchlen;
357 357 char *bin;
358 358
359 359 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
360 360 return NULL;
361 361
362 362 while (pos >= 0 && pos < patchlen) {
363 363 start = getbe32(bin + pos);
364 364 end = getbe32(bin + pos + 4);
365 365 len = getbe32(bin + pos + 8);
366 366 if (start > end)
367 367 break; /* sanity check */
368 368 pos += 12 + len;
369 369 outlen += start - last;
370 370 last = end;
371 371 outlen += len;
372 372 }
373 373
374 374 if (pos != patchlen) {
375 375 if (!PyErr_Occurred())
376 376 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
377 377 return NULL;
378 378 }
379 379
380 380 outlen += orig - last;
381 381 return Py_BuildValue("l", outlen);
382 382 }
383 383
384 384 static PyMethodDef methods[] = {
385 385 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
386 386 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
387 387 {NULL, NULL}
388 388 };
389 389
390 390 #ifdef IS_PY3K
391 391 static struct PyModuleDef mpatch_module = {
392 392 PyModuleDef_HEAD_INIT,
393 393 "mpatch",
394 394 mpatch_doc,
395 395 -1,
396 396 methods
397 397 };
398 398
399 399 PyMODINIT_FUNC PyInit_mpatch(void)
400 400 {
401 401 PyObject *m;
402 402
403 403 m = PyModule_Create(&mpatch_module);
404 404 if (m == NULL)
405 405 return NULL;
406 406
407 407 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
408 408 Py_INCREF(mpatch_Error);
409 409 PyModule_AddObject(m, "mpatchError", mpatch_Error);
410 410
411 411 return m;
412 412 }
413 413 #else
414 414 PyMODINIT_FUNC
415 415 initmpatch(void)
416 416 {
417 417 Py_InitModule3("mpatch", methods, mpatch_doc);
418 418 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
419 419 }
420 420 #endif
General Comments 0
You need to be logged in to leave comments. Login now