##// END OF EJS Templates
util.h: unify some common platform tweaks
Matt Mackall -
r16385:e501f45b default
parent child Browse files
Show More
@@ -1,499 +1,467 b''
1 1 /*
2 2 bdiff.c - efficient binary diff extension for Mercurial
3 3
4 4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8
9 9 Based roughly on Python difflib
10 10 */
11 11
12 12 #include <Python.h>
13 13 #include <stdlib.h>
14 14 #include <string.h>
15 15 #include <limits.h>
16 16
17 #if defined __hpux || defined __SUNPRO_C || defined _AIX
18 #define inline
19 #endif
20
21 #ifdef __linux
22 #define inline __inline
23 #endif
24
25 #ifdef _WIN32
26 #ifdef _MSC_VER
27 #define inline __inline
28 typedef unsigned long uint32_t;
29 #else
30 #include <stdint.h>
31 #endif
32 static uint32_t htonl(uint32_t x)
33 {
34 return ((x & 0x000000ffUL) << 24) |
35 ((x & 0x0000ff00UL) << 8) |
36 ((x & 0x00ff0000UL) >> 8) |
37 ((x & 0xff000000UL) >> 24);
38 }
39 #else
40 #include <sys/types.h>
41 #if defined __BEOS__ && !defined __HAIKU__
42 #include <ByteOrder.h>
43 #else
44 #include <arpa/inet.h>
45 #endif
46 #include <inttypes.h>
47 #endif
48
49 17 #include "util.h"
50 18
51 19 struct line {
52 20 int hash, len, n, e;
53 21 const char *l;
54 22 };
55 23
56 24 struct pos {
57 25 int pos, len;
58 26 };
59 27
60 28 struct hunk;
61 29 struct hunk {
62 30 int a1, a2, b1, b2;
63 31 struct hunk *next;
64 32 };
65 33
66 34 static int splitlines(const char *a, int len, struct line **lr)
67 35 {
68 36 unsigned hash;
69 37 int i;
70 38 const char *p, *b = a;
71 39 const char * const plast = a + len - 1;
72 40 struct line *l;
73 41
74 42 /* count the lines */
75 43 i = 1; /* extra line for sentinel */
76 44 for (p = a; p < a + len; p++)
77 45 if (*p == '\n' || p == plast)
78 46 i++;
79 47
80 48 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
81 49 if (!l)
82 50 return -1;
83 51
84 52 /* build the line array and calculate hashes */
85 53 hash = 0;
86 54 for (p = a; p < a + len; p++) {
87 55 /* Leonid Yuriev's hash */
88 56 hash = (hash * 1664525) + (unsigned char)*p + 1013904223;
89 57
90 58 if (*p == '\n' || p == plast) {
91 59 l->hash = hash;
92 60 hash = 0;
93 61 l->len = p - b + 1;
94 62 l->l = b;
95 63 l->n = INT_MAX;
96 64 l++;
97 65 b = p + 1;
98 66 }
99 67 }
100 68
101 69 /* set up a sentinel */
102 70 l->hash = 0;
103 71 l->len = 0;
104 72 l->l = a + len;
105 73 return i - 1;
106 74 }
107 75
108 76 static inline int cmp(struct line *a, struct line *b)
109 77 {
110 78 return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len);
111 79 }
112 80
113 81 static int equatelines(struct line *a, int an, struct line *b, int bn)
114 82 {
115 83 int i, j, buckets = 1, t, scale;
116 84 struct pos *h = NULL;
117 85
118 86 /* build a hash table of the next highest power of 2 */
119 87 while (buckets < bn + 1)
120 88 buckets *= 2;
121 89
122 90 /* try to allocate a large hash table to avoid collisions */
123 91 for (scale = 4; scale; scale /= 2) {
124 92 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
125 93 if (h)
126 94 break;
127 95 }
128 96
129 97 if (!h)
130 98 return 0;
131 99
132 100 buckets = buckets * scale - 1;
133 101
134 102 /* clear the hash table */
135 103 for (i = 0; i <= buckets; i++) {
136 104 h[i].pos = INT_MAX;
137 105 h[i].len = 0;
138 106 }
139 107
140 108 /* add lines to the hash table chains */
141 109 for (i = bn - 1; i >= 0; i--) {
142 110 /* find the equivalence class */
143 111 for (j = b[i].hash & buckets; h[j].pos != INT_MAX;
144 112 j = (j + 1) & buckets)
145 113 if (!cmp(b + i, b + h[j].pos))
146 114 break;
147 115
148 116 /* add to the head of the equivalence class */
149 117 b[i].n = h[j].pos;
150 118 b[i].e = j;
151 119 h[j].pos = i;
152 120 h[j].len++; /* keep track of popularity */
153 121 }
154 122
155 123 /* compute popularity threshold */
156 124 t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);
157 125
158 126 /* match items in a to their equivalence class in b */
159 127 for (i = 0; i < an; i++) {
160 128 /* find the equivalence class */
161 129 for (j = a[i].hash & buckets; h[j].pos != INT_MAX;
162 130 j = (j + 1) & buckets)
163 131 if (!cmp(a + i, b + h[j].pos))
164 132 break;
165 133
166 134 a[i].e = j; /* use equivalence class for quick compare */
167 135 if (h[j].len <= t)
168 136 a[i].n = h[j].pos; /* point to head of match list */
169 137 else
170 138 a[i].n = INT_MAX; /* too popular */
171 139 }
172 140
173 141 /* discard hash tables */
174 142 free(h);
175 143 return 1;
176 144 }
177 145
178 146 static int longest_match(struct line *a, struct line *b, struct pos *pos,
179 147 int a1, int a2, int b1, int b2, int *omi, int *omj)
180 148 {
181 149 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
182 150
183 151 for (i = a1; i < a2; i++) {
184 152 /* skip things before the current block */
185 153 for (j = a[i].n; j < b1; j = b[j].n)
186 154 ;
187 155
188 156 /* loop through all lines match a[i] in b */
189 157 for (; j < b2; j = b[j].n) {
190 158 /* does this extend an earlier match? */
191 159 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
192 160 k = pos[j - 1].len + 1;
193 161 else
194 162 k = 1;
195 163 pos[j].pos = i;
196 164 pos[j].len = k;
197 165
198 166 /* best match so far? */
199 167 if (k > mk) {
200 168 mi = i;
201 169 mj = j;
202 170 mk = k;
203 171 }
204 172 }
205 173 }
206 174
207 175 if (mk) {
208 176 mi = mi - mk + 1;
209 177 mj = mj - mk + 1;
210 178 }
211 179
212 180 /* expand match to include neighboring popular lines */
213 181 while (mi - mb > a1 && mj - mb > b1 &&
214 182 a[mi - mb - 1].e == b[mj - mb - 1].e)
215 183 mb++;
216 184 while (mi + mk < a2 && mj + mk < b2 &&
217 185 a[mi + mk].e == b[mj + mk].e)
218 186 mk++;
219 187
220 188 *omi = mi - mb;
221 189 *omj = mj - mb;
222 190
223 191 return mk + mb;
224 192 }
225 193
226 194 static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos,
227 195 int a1, int a2, int b1, int b2, struct hunk *l)
228 196 {
229 197 int i, j, k;
230 198
231 199 while (1) {
232 200 /* find the longest match in this chunk */
233 201 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
234 202 if (!k)
235 203 return l;
236 204
237 205 /* and recurse on the remaining chunks on either side */
238 206 l = recurse(a, b, pos, a1, i, b1, j, l);
239 207 if (!l)
240 208 return NULL;
241 209
242 210 l->next = (struct hunk *)malloc(sizeof(struct hunk));
243 211 if (!l->next)
244 212 return NULL;
245 213
246 214 l = l->next;
247 215 l->a1 = i;
248 216 l->a2 = i + k;
249 217 l->b1 = j;
250 218 l->b2 = j + k;
251 219 l->next = NULL;
252 220
253 221 /* tail-recursion didn't happen, so do equivalent iteration */
254 222 a1 = i + k;
255 223 b1 = j + k;
256 224 }
257 225 }
258 226
259 227 static int diff(struct line *a, int an, struct line *b, int bn,
260 228 struct hunk *base)
261 229 {
262 230 struct hunk *curr;
263 231 struct pos *pos;
264 232 int t, count = 0;
265 233
266 234 /* allocate and fill arrays */
267 235 t = equatelines(a, an, b, bn);
268 236 pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
269 237
270 238 if (pos && t) {
271 239 /* generate the matching block list */
272 240
273 241 curr = recurse(a, b, pos, 0, an, 0, bn, base);
274 242 if (!curr)
275 243 return -1;
276 244
277 245 /* sentinel end hunk */
278 246 curr->next = (struct hunk *)malloc(sizeof(struct hunk));
279 247 if (!curr->next)
280 248 return -1;
281 249 curr = curr->next;
282 250 curr->a1 = curr->a2 = an;
283 251 curr->b1 = curr->b2 = bn;
284 252 curr->next = NULL;
285 253 }
286 254
287 255 free(pos);
288 256
289 257 /* normalize the hunk list, try to push each hunk towards the end */
290 258 for (curr = base->next; curr; curr = curr->next) {
291 259 struct hunk *next = curr->next;
292 260 int shift = 0;
293 261
294 262 if (!next)
295 263 break;
296 264
297 265 if (curr->a2 == next->a1)
298 266 while (curr->a2 + shift < an && curr->b2 + shift < bn
299 267 && !cmp(a + curr->a2 + shift,
300 268 b + curr->b2 + shift))
301 269 shift++;
302 270 else if (curr->b2 == next->b1)
303 271 while (curr->b2 + shift < bn && curr->a2 + shift < an
304 272 && !cmp(b + curr->b2 + shift,
305 273 a + curr->a2 + shift))
306 274 shift++;
307 275 if (!shift)
308 276 continue;
309 277 curr->b2 += shift;
310 278 next->b1 += shift;
311 279 curr->a2 += shift;
312 280 next->a1 += shift;
313 281 }
314 282
315 283 for (curr = base->next; curr; curr = curr->next)
316 284 count++;
317 285 return count;
318 286 }
319 287
320 288 static void freehunks(struct hunk *l)
321 289 {
322 290 struct hunk *n;
323 291 for (; l; l = n) {
324 292 n = l->next;
325 293 free(l);
326 294 }
327 295 }
328 296
329 297 static PyObject *blocks(PyObject *self, PyObject *args)
330 298 {
331 299 PyObject *sa, *sb, *rl = NULL, *m;
332 300 struct line *a, *b;
333 301 struct hunk l, *h;
334 302 int an, bn, count, pos = 0;
335 303
336 304 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
337 305 return NULL;
338 306
339 307 an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
340 308 bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
341 309
342 310 if (!a || !b)
343 311 goto nomem;
344 312
345 313 l.next = NULL;
346 314 count = diff(a, an, b, bn, &l);
347 315 if (count < 0)
348 316 goto nomem;
349 317
350 318 rl = PyList_New(count);
351 319 if (!rl)
352 320 goto nomem;
353 321
354 322 for (h = l.next; h; h = h->next) {
355 323 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
356 324 PyList_SetItem(rl, pos, m);
357 325 pos++;
358 326 }
359 327
360 328 nomem:
361 329 free(a);
362 330 free(b);
363 331 freehunks(l.next);
364 332 return rl ? rl : PyErr_NoMemory();
365 333 }
366 334
367 335 static PyObject *bdiff(PyObject *self, PyObject *args)
368 336 {
369 337 char *sa, *sb, *rb;
370 338 PyObject *result = NULL;
371 339 struct line *al, *bl;
372 340 struct hunk l, *h;
373 341 uint32_t encode[3];
374 342 int an, bn, len = 0, la, lb, count;
375 343
376 344 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
377 345 return NULL;
378 346
379 347 an = splitlines(sa, la, &al);
380 348 bn = splitlines(sb, lb, &bl);
381 349 if (!al || !bl)
382 350 goto nomem;
383 351
384 352 l.next = NULL;
385 353 count = diff(al, an, bl, bn, &l);
386 354 if (count < 0)
387 355 goto nomem;
388 356
389 357 /* calculate length of output */
390 358 la = lb = 0;
391 359 for (h = l.next; h; h = h->next) {
392 360 if (h->a1 != la || h->b1 != lb)
393 361 len += 12 + bl[h->b1].l - bl[lb].l;
394 362 la = h->a2;
395 363 lb = h->b2;
396 364 }
397 365
398 366 result = PyBytes_FromStringAndSize(NULL, len);
399 367
400 368 if (!result)
401 369 goto nomem;
402 370
403 371 /* build binary patch */
404 372 rb = PyBytes_AsString(result);
405 373 la = lb = 0;
406 374
407 375 for (h = l.next; h; h = h->next) {
408 376 if (h->a1 != la || h->b1 != lb) {
409 377 len = bl[h->b1].l - bl[lb].l;
410 378 encode[0] = htonl(al[la].l - al->l);
411 379 encode[1] = htonl(al[h->a1].l - al->l);
412 380 encode[2] = htonl(len);
413 381 memcpy(rb, encode, 12);
414 382 memcpy(rb + 12, bl[lb].l, len);
415 383 rb += 12 + len;
416 384 }
417 385 la = h->a2;
418 386 lb = h->b2;
419 387 }
420 388
421 389 nomem:
422 390 free(al);
423 391 free(bl);
424 392 freehunks(l.next);
425 393 return result ? result : PyErr_NoMemory();
426 394 }
427 395
428 396 /*
429 397 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
430 398 * reduce whitespace sequences to a single space and trim remaining whitespace
431 399 * from end of lines.
432 400 */
433 401 static PyObject *fixws(PyObject *self, PyObject *args)
434 402 {
435 403 PyObject *s, *result = NULL;
436 404 char allws, c;
437 405 const char *r;
438 406 int i, rlen, wlen = 0;
439 407 char *w;
440 408
441 409 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
442 410 return NULL;
443 411 r = PyBytes_AsString(s);
444 412 rlen = PyBytes_Size(s);
445 413
446 414 w = (char *)malloc(rlen ? rlen : 1);
447 415 if (!w)
448 416 goto nomem;
449 417
450 418 for (i = 0; i != rlen; i++) {
451 419 c = r[i];
452 420 if (c == ' ' || c == '\t' || c == '\r') {
453 421 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
454 422 w[wlen++] = ' ';
455 423 } else if (c == '\n' && !allws
456 424 && wlen > 0 && w[wlen - 1] == ' ') {
457 425 w[wlen - 1] = '\n';
458 426 } else {
459 427 w[wlen++] = c;
460 428 }
461 429 }
462 430
463 431 result = PyBytes_FromStringAndSize(w, wlen);
464 432
465 433 nomem:
466 434 free(w);
467 435 return result ? result : PyErr_NoMemory();
468 436 }
469 437
470 438
471 439 static char mdiff_doc[] = "Efficient binary diff.";
472 440
473 441 static PyMethodDef methods[] = {
474 442 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
475 443 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
476 444 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
477 445 {NULL, NULL}
478 446 };
479 447
480 448 #ifdef IS_PY3K
481 449 static struct PyModuleDef bdiff_module = {
482 450 PyModuleDef_HEAD_INIT,
483 451 "bdiff",
484 452 mdiff_doc,
485 453 -1,
486 454 methods
487 455 };
488 456
489 457 PyMODINIT_FUNC PyInit_bdiff(void)
490 458 {
491 459 return PyModule_Create(&bdiff_module);
492 460 }
493 461 #else
494 462 PyMODINIT_FUNC initbdiff(void)
495 463 {
496 464 Py_InitModule3("bdiff", methods, mdiff_doc);
497 465 }
498 466 #endif
499 467
@@ -1,460 +1,434 b''
1 1 /*
2 2 mpatch.c - efficient binary patching for Mercurial
3 3
4 4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 5 size of the output and n is the number of patches.
6 6
7 7 Given a list of binary patches, it unpacks each into a hunk list,
8 8 then combines the hunk lists with a treewise recursion to form a
9 9 single hunk list. This hunk list is then applied to the original
10 10 text.
11 11
12 12 The text (or binary) fragments are copied directly from their source
13 13 Python objects into a preallocated output string to avoid the
14 14 allocation of intermediate Python objects. Working memory is about 2x
15 15 the total number of hunks.
16 16
17 17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18 18
19 19 This software may be used and distributed according to the terms
20 20 of the GNU General Public License, incorporated herein by reference.
21 21 */
22 22
23 23 #include <Python.h>
24 24 #include <stdlib.h>
25 25 #include <string.h>
26 26
27 27 #include "util.h"
28 28
29 #ifdef _WIN32
30 #ifdef _MSC_VER
31 /* msvc 6.0 has problems */
32 #define inline __inline
33 typedef unsigned long uint32_t;
34 #else
35 #include <stdint.h>
36 #endif
37 static uint32_t ntohl(uint32_t x)
38 {
39 return ((x & 0x000000ffUL) << 24) |
40 ((x & 0x0000ff00UL) << 8) |
41 ((x & 0x00ff0000UL) >> 8) |
42 ((x & 0xff000000UL) >> 24);
43 }
44 #else
45 /* not windows */
46 #include <sys/types.h>
47 #if defined __BEOS__ && !defined __HAIKU__
48 #include <ByteOrder.h>
49 #else
50 #include <arpa/inet.h>
51 #endif
52 #include <inttypes.h>
53 #endif
54
55 29 static char mpatch_doc[] = "Efficient binary patching.";
56 30 static PyObject *mpatch_Error;
57 31
58 32 struct frag {
59 33 int start, end, len;
60 34 const char *data;
61 35 };
62 36
63 37 struct flist {
64 38 struct frag *base, *head, *tail;
65 39 };
66 40
67 41 static struct flist *lalloc(int size)
68 42 {
69 43 struct flist *a = NULL;
70 44
71 45 if (size < 1)
72 46 size = 1;
73 47
74 48 a = (struct flist *)malloc(sizeof(struct flist));
75 49 if (a) {
76 50 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
77 51 if (a->base) {
78 52 a->head = a->tail = a->base;
79 53 return a;
80 54 }
81 55 free(a);
82 56 a = NULL;
83 57 }
84 58 if (!PyErr_Occurred())
85 59 PyErr_NoMemory();
86 60 return NULL;
87 61 }
88 62
89 63 static void lfree(struct flist *a)
90 64 {
91 65 if (a) {
92 66 free(a->base);
93 67 free(a);
94 68 }
95 69 }
96 70
97 71 static int lsize(struct flist *a)
98 72 {
99 73 return a->tail - a->head;
100 74 }
101 75
102 76 /* move hunks in source that are less cut to dest, compensating
103 77 for changes in offset. the last hunk may be split if necessary.
104 78 */
105 79 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
106 80 {
107 81 struct frag *d = dest->tail, *s = src->head;
108 82 int postend, c, l;
109 83
110 84 while (s != src->tail) {
111 85 if (s->start + offset >= cut)
112 86 break; /* we've gone far enough */
113 87
114 88 postend = offset + s->start + s->len;
115 89 if (postend <= cut) {
116 90 /* save this hunk */
117 91 offset += s->start + s->len - s->end;
118 92 *d++ = *s++;
119 93 }
120 94 else {
121 95 /* break up this hunk */
122 96 c = cut - offset;
123 97 if (s->end < c)
124 98 c = s->end;
125 99 l = cut - offset - s->start;
126 100 if (s->len < l)
127 101 l = s->len;
128 102
129 103 offset += s->start + l - c;
130 104
131 105 d->start = s->start;
132 106 d->end = c;
133 107 d->len = l;
134 108 d->data = s->data;
135 109 d++;
136 110 s->start = c;
137 111 s->len = s->len - l;
138 112 s->data = s->data + l;
139 113
140 114 break;
141 115 }
142 116 }
143 117
144 118 dest->tail = d;
145 119 src->head = s;
146 120 return offset;
147 121 }
148 122
149 123 /* like gather, but with no output list */
150 124 static int discard(struct flist *src, int cut, int offset)
151 125 {
152 126 struct frag *s = src->head;
153 127 int postend, c, l;
154 128
155 129 while (s != src->tail) {
156 130 if (s->start + offset >= cut)
157 131 break;
158 132
159 133 postend = offset + s->start + s->len;
160 134 if (postend <= cut) {
161 135 offset += s->start + s->len - s->end;
162 136 s++;
163 137 }
164 138 else {
165 139 c = cut - offset;
166 140 if (s->end < c)
167 141 c = s->end;
168 142 l = cut - offset - s->start;
169 143 if (s->len < l)
170 144 l = s->len;
171 145
172 146 offset += s->start + l - c;
173 147 s->start = c;
174 148 s->len = s->len - l;
175 149 s->data = s->data + l;
176 150
177 151 break;
178 152 }
179 153 }
180 154
181 155 src->head = s;
182 156 return offset;
183 157 }
184 158
185 159 /* combine hunk lists a and b, while adjusting b for offset changes in a/
186 160 this deletes a and b and returns the resultant list. */
187 161 static struct flist *combine(struct flist *a, struct flist *b)
188 162 {
189 163 struct flist *c = NULL;
190 164 struct frag *bh, *ct;
191 165 int offset = 0, post;
192 166
193 167 if (a && b)
194 168 c = lalloc((lsize(a) + lsize(b)) * 2);
195 169
196 170 if (c) {
197 171
198 172 for (bh = b->head; bh != b->tail; bh++) {
199 173 /* save old hunks */
200 174 offset = gather(c, a, bh->start, offset);
201 175
202 176 /* discard replaced hunks */
203 177 post = discard(a, bh->end, offset);
204 178
205 179 /* insert new hunk */
206 180 ct = c->tail;
207 181 ct->start = bh->start - offset;
208 182 ct->end = bh->end - post;
209 183 ct->len = bh->len;
210 184 ct->data = bh->data;
211 185 c->tail++;
212 186 offset = post;
213 187 }
214 188
215 189 /* hold on to tail from a */
216 190 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
217 191 c->tail += lsize(a);
218 192 }
219 193
220 194 lfree(a);
221 195 lfree(b);
222 196 return c;
223 197 }
224 198
225 199 /* decode a binary patch into a hunk list */
226 200 static struct flist *decode(const char *bin, int len)
227 201 {
228 202 struct flist *l;
229 203 struct frag *lt;
230 204 const char *data = bin + 12, *end = bin + len;
231 205 uint32_t decode[3]; /* for dealing with alignment issues */
232 206
233 207 /* assume worst case size, we won't have many of these lists */
234 208 l = lalloc(len / 12);
235 209 if (!l)
236 210 return NULL;
237 211
238 212 lt = l->tail;
239 213
240 214 while (data <= end) {
241 215 memcpy(decode, bin, 12);
242 216 lt->start = ntohl(decode[0]);
243 217 lt->end = ntohl(decode[1]);
244 218 lt->len = ntohl(decode[2]);
245 219 if (lt->start > lt->end)
246 220 break; /* sanity check */
247 221 bin = data + lt->len;
248 222 if (bin < data)
249 223 break; /* big data + big (bogus) len can wrap around */
250 224 lt->data = data;
251 225 data = bin + 12;
252 226 lt++;
253 227 }
254 228
255 229 if (bin != end) {
256 230 if (!PyErr_Occurred())
257 231 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
258 232 lfree(l);
259 233 return NULL;
260 234 }
261 235
262 236 l->tail = lt;
263 237 return l;
264 238 }
265 239
266 240 /* calculate the size of resultant text */
267 241 static int calcsize(int len, struct flist *l)
268 242 {
269 243 int outlen = 0, last = 0;
270 244 struct frag *f = l->head;
271 245
272 246 while (f != l->tail) {
273 247 if (f->start < last || f->end > len) {
274 248 if (!PyErr_Occurred())
275 249 PyErr_SetString(mpatch_Error,
276 250 "invalid patch");
277 251 return -1;
278 252 }
279 253 outlen += f->start - last;
280 254 last = f->end;
281 255 outlen += f->len;
282 256 f++;
283 257 }
284 258
285 259 outlen += len - last;
286 260 return outlen;
287 261 }
288 262
289 263 static int apply(char *buf, const char *orig, int len, struct flist *l)
290 264 {
291 265 struct frag *f = l->head;
292 266 int last = 0;
293 267 char *p = buf;
294 268
295 269 while (f != l->tail) {
296 270 if (f->start < last || f->end > len) {
297 271 if (!PyErr_Occurred())
298 272 PyErr_SetString(mpatch_Error,
299 273 "invalid patch");
300 274 return 0;
301 275 }
302 276 memcpy(p, orig + last, f->start - last);
303 277 p += f->start - last;
304 278 memcpy(p, f->data, f->len);
305 279 last = f->end;
306 280 p += f->len;
307 281 f++;
308 282 }
309 283 memcpy(p, orig + last, len - last);
310 284 return 1;
311 285 }
312 286
313 287 /* recursively generate a patch of all bins between start and end */
314 288 static struct flist *fold(PyObject *bins, int start, int end)
315 289 {
316 290 int len;
317 291 Py_ssize_t blen;
318 292 const char *buffer;
319 293
320 294 if (start + 1 == end) {
321 295 /* trivial case, output a decoded list */
322 296 PyObject *tmp = PyList_GetItem(bins, start);
323 297 if (!tmp)
324 298 return NULL;
325 299 if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
326 300 return NULL;
327 301 return decode(buffer, blen);
328 302 }
329 303
330 304 /* divide and conquer, memory management is elsewhere */
331 305 len = (end - start) / 2;
332 306 return combine(fold(bins, start, start + len),
333 307 fold(bins, start + len, end));
334 308 }
335 309
336 310 static PyObject *
337 311 patches(PyObject *self, PyObject *args)
338 312 {
339 313 PyObject *text, *bins, *result;
340 314 struct flist *patch;
341 315 const char *in;
342 316 char *out;
343 317 int len, outlen;
344 318 Py_ssize_t inlen;
345 319
346 320 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
347 321 return NULL;
348 322
349 323 len = PyList_Size(bins);
350 324 if (!len) {
351 325 /* nothing to do */
352 326 Py_INCREF(text);
353 327 return text;
354 328 }
355 329
356 330 if (PyObject_AsCharBuffer(text, &in, &inlen))
357 331 return NULL;
358 332
359 333 patch = fold(bins, 0, len);
360 334 if (!patch)
361 335 return NULL;
362 336
363 337 outlen = calcsize(inlen, patch);
364 338 if (outlen < 0) {
365 339 result = NULL;
366 340 goto cleanup;
367 341 }
368 342 result = PyBytes_FromStringAndSize(NULL, outlen);
369 343 if (!result) {
370 344 result = NULL;
371 345 goto cleanup;
372 346 }
373 347 out = PyBytes_AsString(result);
374 348 if (!apply(out, in, inlen, patch)) {
375 349 Py_DECREF(result);
376 350 result = NULL;
377 351 }
378 352 cleanup:
379 353 lfree(patch);
380 354 return result;
381 355 }
382 356
383 357 /* calculate size of a patched file directly */
384 358 static PyObject *
385 359 patchedsize(PyObject *self, PyObject *args)
386 360 {
387 361 long orig, start, end, len, outlen = 0, last = 0;
388 362 int patchlen;
389 363 char *bin, *binend, *data;
390 364 uint32_t decode[3]; /* for dealing with alignment issues */
391 365
392 366 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
393 367 return NULL;
394 368
395 369 binend = bin + patchlen;
396 370 data = bin + 12;
397 371
398 372 while (data <= binend) {
399 373 memcpy(decode, bin, 12);
400 374 start = ntohl(decode[0]);
401 375 end = ntohl(decode[1]);
402 376 len = ntohl(decode[2]);
403 377 if (start > end)
404 378 break; /* sanity check */
405 379 bin = data + len;
406 380 if (bin < data)
407 381 break; /* big data + big (bogus) len can wrap around */
408 382 data = bin + 12;
409 383 outlen += start - last;
410 384 last = end;
411 385 outlen += len;
412 386 }
413 387
414 388 if (bin != binend) {
415 389 if (!PyErr_Occurred())
416 390 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
417 391 return NULL;
418 392 }
419 393
420 394 outlen += orig - last;
421 395 return Py_BuildValue("l", outlen);
422 396 }
423 397
424 398 static PyMethodDef methods[] = {
425 399 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
426 400 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
427 401 {NULL, NULL}
428 402 };
429 403
430 404 #ifdef IS_PY3K
431 405 static struct PyModuleDef mpatch_module = {
432 406 PyModuleDef_HEAD_INIT,
433 407 "mpatch",
434 408 mpatch_doc,
435 409 -1,
436 410 methods
437 411 };
438 412
439 413 PyMODINIT_FUNC PyInit_mpatch(void)
440 414 {
441 415 PyObject *m;
442 416
443 417 m = PyModule_Create(&mpatch_module);
444 418 if (m == NULL)
445 419 return NULL;
446 420
447 421 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
448 422 Py_INCREF(mpatch_Error);
449 423 PyModule_AddObject(m, "mpatchError", mpatch_Error);
450 424
451 425 return m;
452 426 }
453 427 #else
454 428 PyMODINIT_FUNC
455 429 initmpatch(void)
456 430 {
457 431 Py_InitModule3("mpatch", methods, mpatch_doc);
458 432 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
459 433 }
460 434 #endif
@@ -1,740 +1,713 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <string.h>
13 13
14 14 #include "util.h"
15 15
16 16 static int hexdigit(char c)
17 17 {
18 18 if (c >= '0' && c <= '9')
19 19 return c - '0';
20 20 if (c >= 'a' && c <= 'f')
21 21 return c - 'a' + 10;
22 22 if (c >= 'A' && c <= 'F')
23 23 return c - 'A' + 10;
24 24
25 25 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
26 26 return 0;
27 27 }
28 28
29 29 /*
30 30 * Turn a hex-encoded string into binary.
31 31 */
32 32 static PyObject *unhexlify(const char *str, int len)
33 33 {
34 34 PyObject *ret;
35 35 const char *c;
36 36 char *d;
37 37
38 38 ret = PyBytes_FromStringAndSize(NULL, len / 2);
39 39
40 40 if (!ret)
41 41 return NULL;
42 42
43 43 d = PyBytes_AsString(ret);
44 44
45 45 for (c = str; c < str + len;) {
46 46 int hi = hexdigit(*c++);
47 47 int lo = hexdigit(*c++);
48 48 *d++ = (hi << 4) | lo;
49 49 }
50 50
51 51 return ret;
52 52 }
53 53
54 54 /*
55 55 * This code assumes that a manifest is stitched together with newline
56 56 * ('\n') characters.
57 57 */
58 58 static PyObject *parse_manifest(PyObject *self, PyObject *args)
59 59 {
60 60 PyObject *mfdict, *fdict;
61 61 char *str, *cur, *start, *zero;
62 62 int len;
63 63
64 64 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
65 65 &PyDict_Type, &mfdict,
66 66 &PyDict_Type, &fdict,
67 67 &str, &len))
68 68 goto quit;
69 69
70 70 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
71 71 PyObject *file = NULL, *node = NULL;
72 72 PyObject *flags = NULL;
73 73 int nlen;
74 74
75 75 if (!*cur) {
76 76 zero = cur;
77 77 continue;
78 78 }
79 79 else if (*cur != '\n')
80 80 continue;
81 81
82 82 if (!zero) {
83 83 PyErr_SetString(PyExc_ValueError,
84 84 "manifest entry has no separator");
85 85 goto quit;
86 86 }
87 87
88 88 file = PyBytes_FromStringAndSize(start, zero - start);
89 89
90 90 if (!file)
91 91 goto bail;
92 92
93 93 nlen = cur - zero - 1;
94 94
95 95 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
96 96 if (!node)
97 97 goto bail;
98 98
99 99 if (nlen > 40) {
100 100 flags = PyBytes_FromStringAndSize(zero + 41,
101 101 nlen - 40);
102 102 if (!flags)
103 103 goto bail;
104 104
105 105 if (PyDict_SetItem(fdict, file, flags) == -1)
106 106 goto bail;
107 107 }
108 108
109 109 if (PyDict_SetItem(mfdict, file, node) == -1)
110 110 goto bail;
111 111
112 112 start = cur + 1;
113 113 zero = NULL;
114 114
115 115 Py_XDECREF(flags);
116 116 Py_XDECREF(node);
117 117 Py_XDECREF(file);
118 118 continue;
119 119 bail:
120 120 Py_XDECREF(flags);
121 121 Py_XDECREF(node);
122 122 Py_XDECREF(file);
123 123 goto quit;
124 124 }
125 125
126 126 if (len > 0 && *(cur - 1) != '\n') {
127 127 PyErr_SetString(PyExc_ValueError,
128 128 "manifest contains trailing garbage");
129 129 goto quit;
130 130 }
131 131
132 132 Py_INCREF(Py_None);
133 133 return Py_None;
134 134 quit:
135 135 return NULL;
136 136 }
137 137
138 #ifdef _WIN32
139 #ifdef _MSC_VER
140 /* msvc 6.0 has problems */
141 #define inline __inline
142 typedef unsigned long uint32_t;
143 typedef unsigned __int64 uint64_t;
144 #else
145 #include <stdint.h>
146 #endif
147 static uint32_t ntohl(uint32_t x)
148 {
149 return ((x & 0x000000ffUL) << 24) |
150 ((x & 0x0000ff00UL) << 8) |
151 ((x & 0x00ff0000UL) >> 8) |
152 ((x & 0xff000000UL) >> 24);
153 }
154 #else
155 /* not windows */
156 #include <sys/types.h>
157 #if defined __BEOS__ && !defined __HAIKU__
158 #include <ByteOrder.h>
159 #else
160 #include <arpa/inet.h>
161 #endif
162 #include <inttypes.h>
163 #endif
164
165 138 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
166 139 {
167 140 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
168 141 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
169 142 char *str, *cur, *end, *cpos;
170 143 int state, mode, size, mtime;
171 144 unsigned int flen;
172 145 int len;
173 146 uint32_t decode[4]; /* for alignment */
174 147
175 148 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
176 149 &PyDict_Type, &dmap,
177 150 &PyDict_Type, &cmap,
178 151 &str, &len))
179 152 goto quit;
180 153
181 154 /* read parents */
182 155 if (len < 40)
183 156 goto quit;
184 157
185 158 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
186 159 if (!parents)
187 160 goto quit;
188 161
189 162 /* read filenames */
190 163 cur = str + 40;
191 164 end = str + len;
192 165
193 166 while (cur < end - 17) {
194 167 /* unpack header */
195 168 state = *cur;
196 169 memcpy(decode, cur + 1, 16);
197 170 mode = ntohl(decode[0]);
198 171 size = ntohl(decode[1]);
199 172 mtime = ntohl(decode[2]);
200 173 flen = ntohl(decode[3]);
201 174 cur += 17;
202 175 if (cur + flen > end || cur + flen < cur) {
203 176 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
204 177 goto quit;
205 178 }
206 179
207 180 entry = Py_BuildValue("ciii", state, mode, size, mtime);
208 181 if (!entry)
209 182 goto quit;
210 183 PyObject_GC_UnTrack(entry); /* don't waste time with this */
211 184
212 185 cpos = memchr(cur, 0, flen);
213 186 if (cpos) {
214 187 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
215 188 cname = PyBytes_FromStringAndSize(cpos + 1,
216 189 flen - (cpos - cur) - 1);
217 190 if (!fname || !cname ||
218 191 PyDict_SetItem(cmap, fname, cname) == -1 ||
219 192 PyDict_SetItem(dmap, fname, entry) == -1)
220 193 goto quit;
221 194 Py_DECREF(cname);
222 195 } else {
223 196 fname = PyBytes_FromStringAndSize(cur, flen);
224 197 if (!fname ||
225 198 PyDict_SetItem(dmap, fname, entry) == -1)
226 199 goto quit;
227 200 }
228 201 cur += flen;
229 202 Py_DECREF(fname);
230 203 Py_DECREF(entry);
231 204 fname = cname = entry = NULL;
232 205 }
233 206
234 207 ret = parents;
235 208 Py_INCREF(ret);
236 209 quit:
237 210 Py_XDECREF(fname);
238 211 Py_XDECREF(cname);
239 212 Py_XDECREF(entry);
240 213 Py_XDECREF(parents);
241 214 return ret;
242 215 }
243 216
244 217 /*
245 218 * A list-like object that decodes the contents of a RevlogNG index
246 219 * file on demand. It has limited support for insert and delete at the
247 220 * last element before the end. The last entry is always a sentinel
248 221 * nullid.
249 222 */
250 223 typedef struct {
251 224 PyObject_HEAD
252 225 /* Type-specific fields go here. */
253 226 PyObject *data; /* raw bytes of index */
254 227 PyObject **cache; /* cached tuples */
255 228 const char **offsets; /* populated on demand */
256 229 Py_ssize_t raw_length; /* original number of elements */
257 230 Py_ssize_t length; /* current number of elements */
258 231 PyObject *added; /* populated on demand */
259 232 int inlined;
260 233 } indexObject;
261 234
262 235 static Py_ssize_t index_length(indexObject *self)
263 236 {
264 237 if (self->added == NULL)
265 238 return self->length;
266 239 return self->length + PyList_GET_SIZE(self->added);
267 240 }
268 241
269 242 static PyObject *nullentry;
270 243
271 244 static long inline_scan(indexObject *self, const char **offsets);
272 245
273 246 #if LONG_MAX == 0x7fffffffL
274 247 static const char *tuple_format = "Kiiiiiis#";
275 248 #else
276 249 static const char *tuple_format = "kiiiiiis#";
277 250 #endif
278 251
279 252 /* RevlogNG format (all in big endian, data may be inlined):
280 253 * 6 bytes: offset
281 254 * 2 bytes: flags
282 255 * 4 bytes: compressed length
283 256 * 4 bytes: uncompressed length
284 257 * 4 bytes: base revision
285 258 * 4 bytes: link revision
286 259 * 4 bytes: parent 1 revision
287 260 * 4 bytes: parent 2 revision
288 261 * 32 bytes: nodeid (only 20 bytes used)
289 262 */
290 263 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
291 264 {
292 265 uint32_t decode[8]; /* to enforce alignment with inline data */
293 266 uint64_t offset_flags;
294 267 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
295 268 const char *c_node_id;
296 269 const char *data;
297 270 Py_ssize_t length = index_length(self);
298 271 PyObject *entry;
299 272
300 273 if (pos >= length) {
301 274 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
302 275 return NULL;
303 276 }
304 277
305 278 if (pos == length - 1) {
306 279 Py_INCREF(nullentry);
307 280 return nullentry;
308 281 }
309 282
310 283 if (pos >= self->length - 1) {
311 284 PyObject *obj;
312 285 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
313 286 Py_INCREF(obj);
314 287 return obj;
315 288 }
316 289
317 290 if (self->cache) {
318 291 if (self->cache[pos]) {
319 292 Py_INCREF(self->cache[pos]);
320 293 return self->cache[pos];
321 294 }
322 295 } else {
323 296 self->cache = calloc(self->raw_length, sizeof(PyObject *));
324 297 if (self->cache == NULL)
325 298 return PyErr_NoMemory();
326 299 }
327 300
328 301 if (self->inlined && pos > 0) {
329 302 if (self->offsets == NULL) {
330 303 self->offsets = malloc(self->raw_length *
331 304 sizeof(*self->offsets));
332 305 if (self->offsets == NULL)
333 306 return PyErr_NoMemory();
334 307 inline_scan(self, self->offsets);
335 308 }
336 309 data = self->offsets[pos];
337 310 } else
338 311 data = PyString_AS_STRING(self->data) + pos * 64;
339 312
340 313 memcpy(decode, data, 8 * sizeof(uint32_t));
341 314
342 315 offset_flags = ntohl(decode[1]);
343 316 if (pos == 0) /* mask out version number for the first entry */
344 317 offset_flags &= 0xFFFF;
345 318 else {
346 319 uint32_t offset_high = ntohl(decode[0]);
347 320 offset_flags |= ((uint64_t)offset_high) << 32;
348 321 }
349 322
350 323 comp_len = ntohl(decode[2]);
351 324 uncomp_len = ntohl(decode[3]);
352 325 base_rev = ntohl(decode[4]);
353 326 link_rev = ntohl(decode[5]);
354 327 parent_1 = ntohl(decode[6]);
355 328 parent_2 = ntohl(decode[7]);
356 329 c_node_id = data + 32;
357 330
358 331 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
359 332 uncomp_len, base_rev, link_rev,
360 333 parent_1, parent_2, c_node_id, 20);
361 334
362 335 if (entry)
363 336 PyObject_GC_UnTrack(entry);
364 337
365 338 self->cache[pos] = entry;
366 339 Py_INCREF(entry);
367 340
368 341 return entry;
369 342 }
370 343
371 344 static PyObject *index_insert(indexObject *self, PyObject *args)
372 345 {
373 346 PyObject *obj, *node;
374 347 long offset;
375 348 Py_ssize_t len;
376 349
377 350 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
378 351 return NULL;
379 352
380 353 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
381 354 PyErr_SetString(PyExc_ValueError, "8-tuple required");
382 355 return NULL;
383 356 }
384 357
385 358 node = PyTuple_GET_ITEM(obj, 7);
386 359 if (!PyString_Check(node) || PyString_GET_SIZE(node) != 20) {
387 360 PyErr_SetString(PyExc_ValueError,
388 361 "20-byte hash required as last element");
389 362 return NULL;
390 363 }
391 364
392 365 len = index_length(self);
393 366
394 367 if (offset < 0)
395 368 offset += len;
396 369
397 370 if (offset != len - 1) {
398 371 PyErr_SetString(PyExc_IndexError,
399 372 "insert only supported at index -1");
400 373 return NULL;
401 374 }
402 375
403 376 if (self->added == NULL) {
404 377 self->added = PyList_New(0);
405 378 if (self->added == NULL)
406 379 return NULL;
407 380 }
408 381
409 382 if (PyList_Append(self->added, obj) == -1)
410 383 return NULL;
411 384
412 385 Py_RETURN_NONE;
413 386 }
414 387
415 388 static void _index_clearcaches(indexObject *self)
416 389 {
417 390 if (self->cache) {
418 391 Py_ssize_t i;
419 392
420 393 for (i = 0; i < self->raw_length; i++) {
421 394 Py_XDECREF(self->cache[i]);
422 395 self->cache[i] = NULL;
423 396 }
424 397 free(self->cache);
425 398 self->cache = NULL;
426 399 }
427 400 if (self->offsets) {
428 401 free(self->offsets);
429 402 self->offsets = NULL;
430 403 }
431 404 }
432 405
433 406 static PyObject *index_clearcaches(indexObject *self)
434 407 {
435 408 _index_clearcaches(self);
436 409 Py_RETURN_NONE;
437 410 }
438 411
439 412 static int index_assign_subscript(indexObject *self, PyObject *item,
440 413 PyObject *value)
441 414 {
442 415 Py_ssize_t start, stop, step, slicelength;
443 416 Py_ssize_t length = index_length(self);
444 417
445 418 if (!PySlice_Check(item) || value != NULL) {
446 419 PyErr_SetString(PyExc_TypeError,
447 420 "revlog index only supports slice deletion");
448 421 return -1;
449 422 }
450 423
451 424 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
452 425 &start, &stop, &step, &slicelength) < 0)
453 426 return -1;
454 427
455 428 if (slicelength <= 0)
456 429 return 0;
457 430
458 431 if ((step < 0 && start < stop) || (step > 0 && start > stop))
459 432 stop = start;
460 433
461 434 if (step < 0) {
462 435 stop = start + 1;
463 436 start = stop + step*(slicelength - 1) - 1;
464 437 step = -step;
465 438 }
466 439
467 440 if (step != 1) {
468 441 PyErr_SetString(PyExc_ValueError,
469 442 "revlog index delete requires step size of 1");
470 443 return -1;
471 444 }
472 445
473 446 if (stop != length - 1) {
474 447 PyErr_SetString(PyExc_IndexError,
475 448 "revlog index deletion indices are invalid");
476 449 return -1;
477 450 }
478 451
479 452 if (start < self->length) {
480 453 self->length = start + 1;
481 454 if (self->added) {
482 455 Py_DECREF(self->added);
483 456 self->added = NULL;
484 457 }
485 458 return 0;
486 459 }
487 460
488 461 return PyList_SetSlice(self->added, start - self->length + 1,
489 462 PyList_GET_SIZE(self->added),
490 463 NULL);
491 464 }
492 465
493 466 static long inline_scan(indexObject *self, const char **offsets)
494 467 {
495 468 const char *data = PyString_AS_STRING(self->data);
496 469 const char *end = data + PyString_GET_SIZE(self->data);
497 470 const long hdrsize = 64;
498 471 long incr = hdrsize;
499 472 Py_ssize_t len = 0;
500 473
501 474 while (data + hdrsize <= end) {
502 475 uint32_t comp_len;
503 476 const char *old_data;
504 477 /* 3rd element of header is length of compressed inline data */
505 478 memcpy(&comp_len, data + 8, sizeof(uint32_t));
506 479 incr = hdrsize + ntohl(comp_len);
507 480 if (incr < hdrsize)
508 481 break;
509 482 if (offsets)
510 483 offsets[len] = data;
511 484 len++;
512 485 old_data = data;
513 486 data += incr;
514 487 if (data <= old_data)
515 488 break;
516 489 }
517 490
518 491 if (data != end && data + hdrsize != end) {
519 492 if (!PyErr_Occurred())
520 493 PyErr_SetString(PyExc_ValueError, "corrupt index file");
521 494 return -1;
522 495 }
523 496
524 497 return len;
525 498 }
526 499
527 500 static int index_real_init(indexObject *self, const char *data, int size,
528 501 PyObject *inlined_obj, PyObject *data_obj)
529 502 {
530 503 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
531 504 self->data = data_obj;
532 505 self->cache = NULL;
533 506
534 507 self->added = NULL;
535 508 self->offsets = NULL;
536 509 Py_INCREF(self->data);
537 510
538 511 if (self->inlined) {
539 512 long len = inline_scan(self, NULL);
540 513 if (len == -1)
541 514 goto bail;
542 515 self->raw_length = len;
543 516 self->length = len + 1;
544 517 } else {
545 518 if (size % 64) {
546 519 PyErr_SetString(PyExc_ValueError, "corrupt index file");
547 520 goto bail;
548 521 }
549 522 self->raw_length = size / 64;
550 523 self->length = self->raw_length + 1;
551 524 }
552 525
553 526 return 0;
554 527 bail:
555 528 return -1;
556 529 }
557 530
558 531 static int index_init(indexObject *self, PyObject *args, PyObject *kwds)
559 532 {
560 533 const char *data;
561 534 int size;
562 535 PyObject *inlined_obj;
563 536
564 537 if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
565 538 return -1;
566 539
567 540 return index_real_init(self, data, size, inlined_obj,
568 541 PyTuple_GET_ITEM(args, 0));
569 542 }
570 543
571 544 static void index_dealloc(indexObject *self)
572 545 {
573 546 _index_clearcaches(self);
574 547 Py_DECREF(self->data);
575 548 Py_XDECREF(self->added);
576 549 PyObject_Del(self);
577 550 }
578 551
579 552 static PySequenceMethods index_sequence_methods = {
580 553 (lenfunc)index_length, /* sq_length */
581 554 0, /* sq_concat */
582 555 0, /* sq_repeat */
583 556 (ssizeargfunc)index_get, /* sq_item */
584 557 };
585 558
586 559 static PyMappingMethods index_mapping_methods = {
587 560 (lenfunc)index_length, /* mp_length */
588 561 NULL, /* mp_subscript */
589 562 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
590 563 };
591 564
592 565 static PyMethodDef index_methods[] = {
593 566 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
594 567 "clear the index caches"},
595 568 {"insert", (PyCFunction)index_insert, METH_VARARGS,
596 569 "insert an index entry"},
597 570 {NULL} /* Sentinel */
598 571 };
599 572
600 573 static PyTypeObject indexType = {
601 574 PyObject_HEAD_INIT(NULL)
602 575 0, /* ob_size */
603 576 "parsers.index", /* tp_name */
604 577 sizeof(indexObject), /* tp_basicsize */
605 578 0, /* tp_itemsize */
606 579 (destructor)index_dealloc, /* tp_dealloc */
607 580 0, /* tp_print */
608 581 0, /* tp_getattr */
609 582 0, /* tp_setattr */
610 583 0, /* tp_compare */
611 584 0, /* tp_repr */
612 585 0, /* tp_as_number */
613 586 &index_sequence_methods, /* tp_as_sequence */
614 587 &index_mapping_methods, /* tp_as_mapping */
615 588 0, /* tp_hash */
616 589 0, /* tp_call */
617 590 0, /* tp_str */
618 591 0, /* tp_getattro */
619 592 0, /* tp_setattro */
620 593 0, /* tp_as_buffer */
621 594 Py_TPFLAGS_DEFAULT, /* tp_flags */
622 595 "revlog index", /* tp_doc */
623 596 0, /* tp_traverse */
624 597 0, /* tp_clear */
625 598 0, /* tp_richcompare */
626 599 0, /* tp_weaklistoffset */
627 600 0, /* tp_iter */
628 601 0, /* tp_iternext */
629 602 index_methods, /* tp_methods */
630 603 0, /* tp_members */
631 604 0, /* tp_getset */
632 605 0, /* tp_base */
633 606 0, /* tp_dict */
634 607 0, /* tp_descr_get */
635 608 0, /* tp_descr_set */
636 609 0, /* tp_dictoffset */
637 610 (initproc)index_init, /* tp_init */
638 611 0, /* tp_alloc */
639 612 PyType_GenericNew, /* tp_new */
640 613 };
641 614
642 615 /*
643 616 * returns a tuple of the form (index, None, cache) with elements as
644 617 * follows:
645 618 *
646 619 * index: an index object that lazily parses the RevlogNG records
647 620 * cache: if data is inlined, a tuple (index_file_content, 0), else None
648 621 *
649 622 * added complications are for backwards compatibility
650 623 */
651 624 static PyObject *parse_index2(PyObject *self, PyObject *args)
652 625 {
653 626 const char *data;
654 627 int size, ret;
655 628 PyObject *inlined_obj, *tuple = NULL, *cache = NULL;
656 629 indexObject *idx;
657 630
658 631 if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
659 632 return NULL;
660 633
661 634 idx = PyObject_New(indexObject, &indexType);
662 635
663 636 if (idx == NULL)
664 637 goto bail;
665 638
666 639 ret = index_real_init(idx, data, size, inlined_obj,
667 640 PyTuple_GET_ITEM(args, 0));
668 641 if (ret)
669 642 goto bail;
670 643
671 644 if (idx->inlined) {
672 645 Py_INCREF(idx->data);
673 646 cache = Py_BuildValue("iO", 0, idx->data);
674 647 if (cache == NULL)
675 648 goto bail;
676 649 } else {
677 650 cache = Py_None;
678 651 Py_INCREF(cache);
679 652 }
680 653
681 654 tuple = Py_BuildValue("NN", idx, cache);
682 655 if (!tuple)
683 656 goto bail;
684 657 return tuple;
685 658
686 659 bail:
687 660 Py_XDECREF(idx);
688 661 Py_XDECREF(cache);
689 662 Py_XDECREF(tuple);
690 663 return NULL;
691 664 }
692 665
693 666 static char parsers_doc[] = "Efficient content parsing.";
694 667
695 668 static PyMethodDef methods[] = {
696 669 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
697 670 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
698 671 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
699 672 {NULL, NULL}
700 673 };
701 674
702 675 static void module_init(PyObject *mod)
703 676 {
704 677 static const char nullid[20];
705 678
706 679 if (PyType_Ready(&indexType) < 0)
707 680 return;
708 681 Py_INCREF(&indexType);
709 682
710 683 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
711 684
712 685 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
713 686 -1, -1, -1, -1, nullid, 20);
714 687 if (nullentry)
715 688 PyObject_GC_UnTrack(nullentry);
716 689 }
717 690
718 691 #ifdef IS_PY3K
719 692 static struct PyModuleDef parsers_module = {
720 693 PyModuleDef_HEAD_INIT,
721 694 "parsers",
722 695 parsers_doc,
723 696 -1,
724 697 methods
725 698 };
726 699
727 700 PyMODINIT_FUNC PyInit_parsers(void)
728 701 {
729 702 PyObject *mod = PyModule_Create(&parsers_module);
730 703 module_init(mod);
731 704 return mod;
732 705 }
733 706 #else
734 707 PyMODINIT_FUNC initparsers(void)
735 708 {
736 709 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
737 710 module_init(mod);
738 711 }
739 712 #endif
740 713
@@ -1,116 +1,151 b''
1 1 /*
2 2 util.h - utility functions for interfacing with the various python APIs.
3 3
4 4 This software may be used and distributed according to the terms of
5 5 the GNU General Public License, incorporated herein by reference.
6 6 */
7 7
8 8 #ifndef _HG_UTIL_H_
9 9 #define _HG_UTIL_H_
10 10
11 11 #if PY_MAJOR_VERSION >= 3
12 12
13 13 #define IS_PY3K
14 14 #define PyInt_FromLong PyLong_FromLong
15 15 #define PyInt_AsLong PyLong_AsLong
16 16
17 17 /*
18 18 Mapping of some of the python < 2.x PyString* functions to py3k's PyUnicode.
19 19
20 20 The commented names below represent those that are present in the PyBytes
21 21 definitions for python < 2.6 (below in this file) that don't have a direct
22 22 implementation.
23 23 */
24 24
25 25 #define PyStringObject PyUnicodeObject
26 26 #define PyString_Type PyUnicode_Type
27 27
28 28 #define PyString_Check PyUnicode_Check
29 29 #define PyString_CheckExact PyUnicode_CheckExact
30 30 #define PyString_CHECK_INTERNED PyUnicode_CHECK_INTERNED
31 31 #define PyString_AS_STRING PyUnicode_AsLatin1String
32 32 #define PyString_GET_SIZE PyUnicode_GET_SIZE
33 33
34 34 #define PyString_FromStringAndSize PyUnicode_FromStringAndSize
35 35 #define PyString_FromString PyUnicode_FromString
36 36 #define PyString_FromFormatV PyUnicode_FromFormatV
37 37 #define PyString_FromFormat PyUnicode_FromFormat
38 38 /* #define PyString_Size PyUnicode_GET_SIZE */
39 39 /* #define PyString_AsString */
40 40 /* #define PyString_Repr */
41 41 #define PyString_Concat PyUnicode_Concat
42 42 #define PyString_ConcatAndDel PyUnicode_AppendAndDel
43 43 #define _PyString_Resize PyUnicode_Resize
44 44 /* #define _PyString_Eq */
45 45 #define PyString_Format PyUnicode_Format
46 46 /* #define _PyString_FormatLong */
47 47 /* #define PyString_DecodeEscape */
48 48 #define _PyString_Join PyUnicode_Join
49 49 #define PyString_Decode PyUnicode_Decode
50 50 #define PyString_Encode PyUnicode_Encode
51 51 #define PyString_AsEncodedObject PyUnicode_AsEncodedObject
52 52 #define PyString_AsEncodedString PyUnicode_AsEncodedString
53 53 #define PyString_AsDecodedObject PyUnicode_AsDecodedObject
54 54 #define PyString_AsDecodedString PyUnicode_AsDecodedUnicode
55 55 /* #define PyString_AsStringAndSize */
56 56 #define _PyString_InsertThousandsGrouping _PyUnicode_InsertThousandsGrouping
57 57
58 58 #endif /* PY_MAJOR_VERSION */
59 59
60 60 /* Backports from 2.6 */
61 61 #if PY_VERSION_HEX < 0x02060000
62 62
63 63 #define Py_TYPE(ob) (ob)->ob_type
64 64 #define Py_SIZE(ob) (ob)->ob_size
65 65 #define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
66 66
67 67 /* Shamelessly stolen from bytesobject.h */
68 68 #define PyBytesObject PyStringObject
69 69 #define PyBytes_Type PyString_Type
70 70
71 71 #define PyBytes_Check PyString_Check
72 72 #define PyBytes_CheckExact PyString_CheckExact
73 73 #define PyBytes_CHECK_INTERNED PyString_CHECK_INTERNED
74 74 #define PyBytes_AS_STRING PyString_AS_STRING
75 75 #define PyBytes_GET_SIZE PyString_GET_SIZE
76 76 #define Py_TPFLAGS_BYTES_SUBCLASS Py_TPFLAGS_STRING_SUBCLASS
77 77
78 78 #define PyBytes_FromStringAndSize PyString_FromStringAndSize
79 79 #define PyBytes_FromString PyString_FromString
80 80 #define PyBytes_FromFormatV PyString_FromFormatV
81 81 #define PyBytes_FromFormat PyString_FromFormat
82 82 #define PyBytes_Size PyString_Size
83 83 #define PyBytes_AsString PyString_AsString
84 84 #define PyBytes_Repr PyString_Repr
85 85 #define PyBytes_Concat PyString_Concat
86 86 #define PyBytes_ConcatAndDel PyString_ConcatAndDel
87 87 #define _PyBytes_Resize _PyString_Resize
88 88 #define _PyBytes_Eq _PyString_Eq
89 89 #define PyBytes_Format PyString_Format
90 90 #define _PyBytes_FormatLong _PyString_FormatLong
91 91 #define PyBytes_DecodeEscape PyString_DecodeEscape
92 92 #define _PyBytes_Join _PyString_Join
93 93 #define PyBytes_Decode PyString_Decode
94 94 #define PyBytes_Encode PyString_Encode
95 95 #define PyBytes_AsEncodedObject PyString_AsEncodedObject
96 96 #define PyBytes_AsEncodedString PyString_AsEncodedString
97 97 #define PyBytes_AsDecodedObject PyString_AsDecodedObject
98 98 #define PyBytes_AsDecodedString PyString_AsDecodedString
99 99 #define PyBytes_AsStringAndSize PyString_AsStringAndSize
100 100 #define _PyBytes_InsertThousandsGrouping _PyString_InsertThousandsGrouping
101 101
102 102 #endif /* PY_VERSION_HEX */
103 103
104 104 #if (PY_VERSION_HEX < 0x02050000)
105 105 /* Definitions to get compatibility with python 2.4 and earlier which
106 106 does not have Py_ssize_t. See also PEP 353.
107 107 Note: msvc (8 or earlier) does not have ssize_t, so we use Py_ssize_t.
108 108 */
109 109 typedef int Py_ssize_t;
110 110 #if !defined(PY_SSIZE_T_MIN)
111 111 #define PY_SSIZE_T_MAX INT_MAX
112 112 #define PY_SSIZE_T_MIN INT_MIN
113 113 #endif
114 114 #endif
115 115
116 #ifdef _WIN32
117 #ifdef _MSC_VER
118 /* msvc 6.0 has problems */
119 #define inline __inline
120 typedef unsigned long uint32_t;
121 typedef unsigned __int64 uint64_t;
122 #else
123 #include <stdint.h>
124 #endif
125 static uint32_t ntohl(uint32_t x)
126 {
127 return ((x & 0x000000ffUL) << 24) |
128 ((x & 0x0000ff00UL) << 8) |
129 ((x & 0x00ff0000UL) >> 8) |
130 ((x & 0xff000000UL) >> 24);
131 }
132 #else
133 /* not windows */
134 #include <sys/types.h>
135 #if defined __BEOS__ && !defined __HAIKU__
136 #include <ByteOrder.h>
137 #else
138 #include <arpa/inet.h>
139 #endif
140 #include <inttypes.h>
141 #endif
142
143 #if defined __hpux || defined __SUNPRO_C || defined _AIX
144 #define inline
145 #endif
146
147 #ifdef __linux
148 #define inline __inline
149 #endif
150
116 151 #endif /* _HG_UTIL_H_ */
General Comments 0
You need to be logged in to leave comments. Login now