##// END OF EJS Templates
util.h: replace ntohl/htonl with get/putbe32
Matt Mackall -
r16437:d126a0d1 default
parent child Browse files
Show More
@@ -1,467 +1,465
1 1 /*
2 2 bdiff.c - efficient binary diff extension for Mercurial
3 3
4 4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8
9 9 Based roughly on Python difflib
10 10 */
11 11
12 12 #include <Python.h>
13 13 #include <stdlib.h>
14 14 #include <string.h>
15 15 #include <limits.h>
16 16
17 17 #include "util.h"
18 18
19 19 struct line {
20 20 int hash, len, n, e;
21 21 const char *l;
22 22 };
23 23
24 24 struct pos {
25 25 int pos, len;
26 26 };
27 27
28 28 struct hunk;
29 29 struct hunk {
30 30 int a1, a2, b1, b2;
31 31 struct hunk *next;
32 32 };
33 33
34 34 static int splitlines(const char *a, int len, struct line **lr)
35 35 {
36 36 unsigned hash;
37 37 int i;
38 38 const char *p, *b = a;
39 39 const char * const plast = a + len - 1;
40 40 struct line *l;
41 41
42 42 /* count the lines */
43 43 i = 1; /* extra line for sentinel */
44 44 for (p = a; p < a + len; p++)
45 45 if (*p == '\n' || p == plast)
46 46 i++;
47 47
48 48 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
49 49 if (!l)
50 50 return -1;
51 51
52 52 /* build the line array and calculate hashes */
53 53 hash = 0;
54 54 for (p = a; p < a + len; p++) {
55 55 /* Leonid Yuriev's hash */
56 56 hash = (hash * 1664525) + (unsigned char)*p + 1013904223;
57 57
58 58 if (*p == '\n' || p == plast) {
59 59 l->hash = hash;
60 60 hash = 0;
61 61 l->len = p - b + 1;
62 62 l->l = b;
63 63 l->n = INT_MAX;
64 64 l++;
65 65 b = p + 1;
66 66 }
67 67 }
68 68
69 69 /* set up a sentinel */
70 70 l->hash = 0;
71 71 l->len = 0;
72 72 l->l = a + len;
73 73 return i - 1;
74 74 }
75 75
76 76 static inline int cmp(struct line *a, struct line *b)
77 77 {
78 78 return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len);
79 79 }
80 80
81 81 static int equatelines(struct line *a, int an, struct line *b, int bn)
82 82 {
83 83 int i, j, buckets = 1, t, scale;
84 84 struct pos *h = NULL;
85 85
86 86 /* build a hash table of the next highest power of 2 */
87 87 while (buckets < bn + 1)
88 88 buckets *= 2;
89 89
90 90 /* try to allocate a large hash table to avoid collisions */
91 91 for (scale = 4; scale; scale /= 2) {
92 92 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
93 93 if (h)
94 94 break;
95 95 }
96 96
97 97 if (!h)
98 98 return 0;
99 99
100 100 buckets = buckets * scale - 1;
101 101
102 102 /* clear the hash table */
103 103 for (i = 0; i <= buckets; i++) {
104 104 h[i].pos = INT_MAX;
105 105 h[i].len = 0;
106 106 }
107 107
108 108 /* add lines to the hash table chains */
109 109 for (i = bn - 1; i >= 0; i--) {
110 110 /* find the equivalence class */
111 111 for (j = b[i].hash & buckets; h[j].pos != INT_MAX;
112 112 j = (j + 1) & buckets)
113 113 if (!cmp(b + i, b + h[j].pos))
114 114 break;
115 115
116 116 /* add to the head of the equivalence class */
117 117 b[i].n = h[j].pos;
118 118 b[i].e = j;
119 119 h[j].pos = i;
120 120 h[j].len++; /* keep track of popularity */
121 121 }
122 122
123 123 /* compute popularity threshold */
124 124 t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);
125 125
126 126 /* match items in a to their equivalence class in b */
127 127 for (i = 0; i < an; i++) {
128 128 /* find the equivalence class */
129 129 for (j = a[i].hash & buckets; h[j].pos != INT_MAX;
130 130 j = (j + 1) & buckets)
131 131 if (!cmp(a + i, b + h[j].pos))
132 132 break;
133 133
134 134 a[i].e = j; /* use equivalence class for quick compare */
135 135 if (h[j].len <= t)
136 136 a[i].n = h[j].pos; /* point to head of match list */
137 137 else
138 138 a[i].n = INT_MAX; /* too popular */
139 139 }
140 140
141 141 /* discard hash tables */
142 142 free(h);
143 143 return 1;
144 144 }
145 145
146 146 static int longest_match(struct line *a, struct line *b, struct pos *pos,
147 147 int a1, int a2, int b1, int b2, int *omi, int *omj)
148 148 {
149 149 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
150 150
151 151 for (i = a1; i < a2; i++) {
152 152 /* skip things before the current block */
153 153 for (j = a[i].n; j < b1; j = b[j].n)
154 154 ;
155 155
156 156 /* loop through all lines match a[i] in b */
157 157 for (; j < b2; j = b[j].n) {
158 158 /* does this extend an earlier match? */
159 159 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
160 160 k = pos[j - 1].len + 1;
161 161 else
162 162 k = 1;
163 163 pos[j].pos = i;
164 164 pos[j].len = k;
165 165
166 166 /* best match so far? */
167 167 if (k > mk) {
168 168 mi = i;
169 169 mj = j;
170 170 mk = k;
171 171 }
172 172 }
173 173 }
174 174
175 175 if (mk) {
176 176 mi = mi - mk + 1;
177 177 mj = mj - mk + 1;
178 178 }
179 179
180 180 /* expand match to include neighboring popular lines */
181 181 while (mi - mb > a1 && mj - mb > b1 &&
182 182 a[mi - mb - 1].e == b[mj - mb - 1].e)
183 183 mb++;
184 184 while (mi + mk < a2 && mj + mk < b2 &&
185 185 a[mi + mk].e == b[mj + mk].e)
186 186 mk++;
187 187
188 188 *omi = mi - mb;
189 189 *omj = mj - mb;
190 190
191 191 return mk + mb;
192 192 }
193 193
194 194 static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos,
195 195 int a1, int a2, int b1, int b2, struct hunk *l)
196 196 {
197 197 int i, j, k;
198 198
199 199 while (1) {
200 200 /* find the longest match in this chunk */
201 201 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
202 202 if (!k)
203 203 return l;
204 204
205 205 /* and recurse on the remaining chunks on either side */
206 206 l = recurse(a, b, pos, a1, i, b1, j, l);
207 207 if (!l)
208 208 return NULL;
209 209
210 210 l->next = (struct hunk *)malloc(sizeof(struct hunk));
211 211 if (!l->next)
212 212 return NULL;
213 213
214 214 l = l->next;
215 215 l->a1 = i;
216 216 l->a2 = i + k;
217 217 l->b1 = j;
218 218 l->b2 = j + k;
219 219 l->next = NULL;
220 220
221 221 /* tail-recursion didn't happen, so do equivalent iteration */
222 222 a1 = i + k;
223 223 b1 = j + k;
224 224 }
225 225 }
226 226
227 227 static int diff(struct line *a, int an, struct line *b, int bn,
228 228 struct hunk *base)
229 229 {
230 230 struct hunk *curr;
231 231 struct pos *pos;
232 232 int t, count = 0;
233 233
234 234 /* allocate and fill arrays */
235 235 t = equatelines(a, an, b, bn);
236 236 pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
237 237
238 238 if (pos && t) {
239 239 /* generate the matching block list */
240 240
241 241 curr = recurse(a, b, pos, 0, an, 0, bn, base);
242 242 if (!curr)
243 243 return -1;
244 244
245 245 /* sentinel end hunk */
246 246 curr->next = (struct hunk *)malloc(sizeof(struct hunk));
247 247 if (!curr->next)
248 248 return -1;
249 249 curr = curr->next;
250 250 curr->a1 = curr->a2 = an;
251 251 curr->b1 = curr->b2 = bn;
252 252 curr->next = NULL;
253 253 }
254 254
255 255 free(pos);
256 256
257 257 /* normalize the hunk list, try to push each hunk towards the end */
258 258 for (curr = base->next; curr; curr = curr->next) {
259 259 struct hunk *next = curr->next;
260 260 int shift = 0;
261 261
262 262 if (!next)
263 263 break;
264 264
265 265 if (curr->a2 == next->a1)
266 266 while (curr->a2 + shift < an && curr->b2 + shift < bn
267 267 && !cmp(a + curr->a2 + shift,
268 268 b + curr->b2 + shift))
269 269 shift++;
270 270 else if (curr->b2 == next->b1)
271 271 while (curr->b2 + shift < bn && curr->a2 + shift < an
272 272 && !cmp(b + curr->b2 + shift,
273 273 a + curr->a2 + shift))
274 274 shift++;
275 275 if (!shift)
276 276 continue;
277 277 curr->b2 += shift;
278 278 next->b1 += shift;
279 279 curr->a2 += shift;
280 280 next->a1 += shift;
281 281 }
282 282
283 283 for (curr = base->next; curr; curr = curr->next)
284 284 count++;
285 285 return count;
286 286 }
287 287
288 288 static void freehunks(struct hunk *l)
289 289 {
290 290 struct hunk *n;
291 291 for (; l; l = n) {
292 292 n = l->next;
293 293 free(l);
294 294 }
295 295 }
296 296
297 297 static PyObject *blocks(PyObject *self, PyObject *args)
298 298 {
299 299 PyObject *sa, *sb, *rl = NULL, *m;
300 300 struct line *a, *b;
301 301 struct hunk l, *h;
302 302 int an, bn, count, pos = 0;
303 303
304 304 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
305 305 return NULL;
306 306
307 307 an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
308 308 bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
309 309
310 310 if (!a || !b)
311 311 goto nomem;
312 312
313 313 l.next = NULL;
314 314 count = diff(a, an, b, bn, &l);
315 315 if (count < 0)
316 316 goto nomem;
317 317
318 318 rl = PyList_New(count);
319 319 if (!rl)
320 320 goto nomem;
321 321
322 322 for (h = l.next; h; h = h->next) {
323 323 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
324 324 PyList_SetItem(rl, pos, m);
325 325 pos++;
326 326 }
327 327
328 328 nomem:
329 329 free(a);
330 330 free(b);
331 331 freehunks(l.next);
332 332 return rl ? rl : PyErr_NoMemory();
333 333 }
334 334
335 335 static PyObject *bdiff(PyObject *self, PyObject *args)
336 336 {
337 337 char *sa, *sb, *rb;
338 338 PyObject *result = NULL;
339 339 struct line *al, *bl;
340 340 struct hunk l, *h;
341 uint32_t encode[3];
342 341 int an, bn, len = 0, la, lb, count;
343 342
344 343 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
345 344 return NULL;
346 345
347 346 an = splitlines(sa, la, &al);
348 347 bn = splitlines(sb, lb, &bl);
349 348 if (!al || !bl)
350 349 goto nomem;
351 350
352 351 l.next = NULL;
353 352 count = diff(al, an, bl, bn, &l);
354 353 if (count < 0)
355 354 goto nomem;
356 355
357 356 /* calculate length of output */
358 357 la = lb = 0;
359 358 for (h = l.next; h; h = h->next) {
360 359 if (h->a1 != la || h->b1 != lb)
361 360 len += 12 + bl[h->b1].l - bl[lb].l;
362 361 la = h->a2;
363 362 lb = h->b2;
364 363 }
365 364
366 365 result = PyBytes_FromStringAndSize(NULL, len);
367 366
368 367 if (!result)
369 368 goto nomem;
370 369
371 370 /* build binary patch */
372 371 rb = PyBytes_AsString(result);
373 372 la = lb = 0;
374 373
375 374 for (h = l.next; h; h = h->next) {
376 375 if (h->a1 != la || h->b1 != lb) {
377 376 len = bl[h->b1].l - bl[lb].l;
378 encode[0] = htonl(al[la].l - al->l);
379 encode[1] = htonl(al[h->a1].l - al->l);
380 encode[2] = htonl(len);
381 memcpy(rb, encode, 12);
377 putbe32(al[la].l - al->l, rb);
378 putbe32(al[h->a1].l - al->l, rb + 4);
379 putbe32(len, rb + 8);
382 380 memcpy(rb + 12, bl[lb].l, len);
383 381 rb += 12 + len;
384 382 }
385 383 la = h->a2;
386 384 lb = h->b2;
387 385 }
388 386
389 387 nomem:
390 388 free(al);
391 389 free(bl);
392 390 freehunks(l.next);
393 391 return result ? result : PyErr_NoMemory();
394 392 }
395 393
396 394 /*
397 395 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
398 396 * reduce whitespace sequences to a single space and trim remaining whitespace
399 397 * from end of lines.
400 398 */
401 399 static PyObject *fixws(PyObject *self, PyObject *args)
402 400 {
403 401 PyObject *s, *result = NULL;
404 402 char allws, c;
405 403 const char *r;
406 404 int i, rlen, wlen = 0;
407 405 char *w;
408 406
409 407 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
410 408 return NULL;
411 409 r = PyBytes_AsString(s);
412 410 rlen = PyBytes_Size(s);
413 411
414 412 w = (char *)malloc(rlen ? rlen : 1);
415 413 if (!w)
416 414 goto nomem;
417 415
418 416 for (i = 0; i != rlen; i++) {
419 417 c = r[i];
420 418 if (c == ' ' || c == '\t' || c == '\r') {
421 419 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
422 420 w[wlen++] = ' ';
423 421 } else if (c == '\n' && !allws
424 422 && wlen > 0 && w[wlen - 1] == ' ') {
425 423 w[wlen - 1] = '\n';
426 424 } else {
427 425 w[wlen++] = c;
428 426 }
429 427 }
430 428
431 429 result = PyBytes_FromStringAndSize(w, wlen);
432 430
433 431 nomem:
434 432 free(w);
435 433 return result ? result : PyErr_NoMemory();
436 434 }
437 435
438 436
439 437 static char mdiff_doc[] = "Efficient binary diff.";
440 438
441 439 static PyMethodDef methods[] = {
442 440 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
443 441 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
444 442 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
445 443 {NULL, NULL}
446 444 };
447 445
448 446 #ifdef IS_PY3K
449 447 static struct PyModuleDef bdiff_module = {
450 448 PyModuleDef_HEAD_INIT,
451 449 "bdiff",
452 450 mdiff_doc,
453 451 -1,
454 452 methods
455 453 };
456 454
457 455 PyMODINIT_FUNC PyInit_bdiff(void)
458 456 {
459 457 return PyModule_Create(&bdiff_module);
460 458 }
461 459 #else
462 460 PyMODINIT_FUNC initbdiff(void)
463 461 {
464 462 Py_InitModule3("bdiff", methods, mdiff_doc);
465 463 }
466 464 #endif
467 465
@@ -1,434 +1,430
1 1 /*
2 2 mpatch.c - efficient binary patching for Mercurial
3 3
4 4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 5 size of the output and n is the number of patches.
6 6
7 7 Given a list of binary patches, it unpacks each into a hunk list,
8 8 then combines the hunk lists with a treewise recursion to form a
9 9 single hunk list. This hunk list is then applied to the original
10 10 text.
11 11
12 12 The text (or binary) fragments are copied directly from their source
13 13 Python objects into a preallocated output string to avoid the
14 14 allocation of intermediate Python objects. Working memory is about 2x
15 15 the total number of hunks.
16 16
17 17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18 18
19 19 This software may be used and distributed according to the terms
20 20 of the GNU General Public License, incorporated herein by reference.
21 21 */
22 22
23 23 #include <Python.h>
24 24 #include <stdlib.h>
25 25 #include <string.h>
26 26
27 27 #include "util.h"
28 28
29 29 static char mpatch_doc[] = "Efficient binary patching.";
30 30 static PyObject *mpatch_Error;
31 31
32 32 struct frag {
33 33 int start, end, len;
34 34 const char *data;
35 35 };
36 36
37 37 struct flist {
38 38 struct frag *base, *head, *tail;
39 39 };
40 40
41 41 static struct flist *lalloc(int size)
42 42 {
43 43 struct flist *a = NULL;
44 44
45 45 if (size < 1)
46 46 size = 1;
47 47
48 48 a = (struct flist *)malloc(sizeof(struct flist));
49 49 if (a) {
50 50 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
51 51 if (a->base) {
52 52 a->head = a->tail = a->base;
53 53 return a;
54 54 }
55 55 free(a);
56 56 a = NULL;
57 57 }
58 58 if (!PyErr_Occurred())
59 59 PyErr_NoMemory();
60 60 return NULL;
61 61 }
62 62
63 63 static void lfree(struct flist *a)
64 64 {
65 65 if (a) {
66 66 free(a->base);
67 67 free(a);
68 68 }
69 69 }
70 70
71 71 static int lsize(struct flist *a)
72 72 {
73 73 return a->tail - a->head;
74 74 }
75 75
76 76 /* move hunks in source that are less cut to dest, compensating
77 77 for changes in offset. the last hunk may be split if necessary.
78 78 */
79 79 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
80 80 {
81 81 struct frag *d = dest->tail, *s = src->head;
82 82 int postend, c, l;
83 83
84 84 while (s != src->tail) {
85 85 if (s->start + offset >= cut)
86 86 break; /* we've gone far enough */
87 87
88 88 postend = offset + s->start + s->len;
89 89 if (postend <= cut) {
90 90 /* save this hunk */
91 91 offset += s->start + s->len - s->end;
92 92 *d++ = *s++;
93 93 }
94 94 else {
95 95 /* break up this hunk */
96 96 c = cut - offset;
97 97 if (s->end < c)
98 98 c = s->end;
99 99 l = cut - offset - s->start;
100 100 if (s->len < l)
101 101 l = s->len;
102 102
103 103 offset += s->start + l - c;
104 104
105 105 d->start = s->start;
106 106 d->end = c;
107 107 d->len = l;
108 108 d->data = s->data;
109 109 d++;
110 110 s->start = c;
111 111 s->len = s->len - l;
112 112 s->data = s->data + l;
113 113
114 114 break;
115 115 }
116 116 }
117 117
118 118 dest->tail = d;
119 119 src->head = s;
120 120 return offset;
121 121 }
122 122
123 123 /* like gather, but with no output list */
124 124 static int discard(struct flist *src, int cut, int offset)
125 125 {
126 126 struct frag *s = src->head;
127 127 int postend, c, l;
128 128
129 129 while (s != src->tail) {
130 130 if (s->start + offset >= cut)
131 131 break;
132 132
133 133 postend = offset + s->start + s->len;
134 134 if (postend <= cut) {
135 135 offset += s->start + s->len - s->end;
136 136 s++;
137 137 }
138 138 else {
139 139 c = cut - offset;
140 140 if (s->end < c)
141 141 c = s->end;
142 142 l = cut - offset - s->start;
143 143 if (s->len < l)
144 144 l = s->len;
145 145
146 146 offset += s->start + l - c;
147 147 s->start = c;
148 148 s->len = s->len - l;
149 149 s->data = s->data + l;
150 150
151 151 break;
152 152 }
153 153 }
154 154
155 155 src->head = s;
156 156 return offset;
157 157 }
158 158
159 159 /* combine hunk lists a and b, while adjusting b for offset changes in a/
160 160 this deletes a and b and returns the resultant list. */
161 161 static struct flist *combine(struct flist *a, struct flist *b)
162 162 {
163 163 struct flist *c = NULL;
164 164 struct frag *bh, *ct;
165 165 int offset = 0, post;
166 166
167 167 if (a && b)
168 168 c = lalloc((lsize(a) + lsize(b)) * 2);
169 169
170 170 if (c) {
171 171
172 172 for (bh = b->head; bh != b->tail; bh++) {
173 173 /* save old hunks */
174 174 offset = gather(c, a, bh->start, offset);
175 175
176 176 /* discard replaced hunks */
177 177 post = discard(a, bh->end, offset);
178 178
179 179 /* insert new hunk */
180 180 ct = c->tail;
181 181 ct->start = bh->start - offset;
182 182 ct->end = bh->end - post;
183 183 ct->len = bh->len;
184 184 ct->data = bh->data;
185 185 c->tail++;
186 186 offset = post;
187 187 }
188 188
189 189 /* hold on to tail from a */
190 190 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
191 191 c->tail += lsize(a);
192 192 }
193 193
194 194 lfree(a);
195 195 lfree(b);
196 196 return c;
197 197 }
198 198
199 199 /* decode a binary patch into a hunk list */
200 200 static struct flist *decode(const char *bin, int len)
201 201 {
202 202 struct flist *l;
203 203 struct frag *lt;
204 204 const char *data = bin + 12, *end = bin + len;
205 uint32_t decode[3]; /* for dealing with alignment issues */
206 205
207 206 /* assume worst case size, we won't have many of these lists */
208 207 l = lalloc(len / 12);
209 208 if (!l)
210 209 return NULL;
211 210
212 211 lt = l->tail;
213 212
214 213 while (data <= end) {
215 memcpy(decode, bin, 12);
216 lt->start = ntohl(decode[0]);
217 lt->end = ntohl(decode[1]);
218 lt->len = ntohl(decode[2]);
214 lt->start = getbe32(bin);
215 lt->end = getbe32(bin + 4);
216 lt->len = getbe32(bin + 8);
219 217 if (lt->start > lt->end)
220 218 break; /* sanity check */
221 219 bin = data + lt->len;
222 220 if (bin < data)
223 221 break; /* big data + big (bogus) len can wrap around */
224 222 lt->data = data;
225 223 data = bin + 12;
226 224 lt++;
227 225 }
228 226
229 227 if (bin != end) {
230 228 if (!PyErr_Occurred())
231 229 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
232 230 lfree(l);
233 231 return NULL;
234 232 }
235 233
236 234 l->tail = lt;
237 235 return l;
238 236 }
239 237
240 238 /* calculate the size of resultant text */
241 239 static int calcsize(int len, struct flist *l)
242 240 {
243 241 int outlen = 0, last = 0;
244 242 struct frag *f = l->head;
245 243
246 244 while (f != l->tail) {
247 245 if (f->start < last || f->end > len) {
248 246 if (!PyErr_Occurred())
249 247 PyErr_SetString(mpatch_Error,
250 248 "invalid patch");
251 249 return -1;
252 250 }
253 251 outlen += f->start - last;
254 252 last = f->end;
255 253 outlen += f->len;
256 254 f++;
257 255 }
258 256
259 257 outlen += len - last;
260 258 return outlen;
261 259 }
262 260
263 261 static int apply(char *buf, const char *orig, int len, struct flist *l)
264 262 {
265 263 struct frag *f = l->head;
266 264 int last = 0;
267 265 char *p = buf;
268 266
269 267 while (f != l->tail) {
270 268 if (f->start < last || f->end > len) {
271 269 if (!PyErr_Occurred())
272 270 PyErr_SetString(mpatch_Error,
273 271 "invalid patch");
274 272 return 0;
275 273 }
276 274 memcpy(p, orig + last, f->start - last);
277 275 p += f->start - last;
278 276 memcpy(p, f->data, f->len);
279 277 last = f->end;
280 278 p += f->len;
281 279 f++;
282 280 }
283 281 memcpy(p, orig + last, len - last);
284 282 return 1;
285 283 }
286 284
287 285 /* recursively generate a patch of all bins between start and end */
288 286 static struct flist *fold(PyObject *bins, int start, int end)
289 287 {
290 288 int len;
291 289 Py_ssize_t blen;
292 290 const char *buffer;
293 291
294 292 if (start + 1 == end) {
295 293 /* trivial case, output a decoded list */
296 294 PyObject *tmp = PyList_GetItem(bins, start);
297 295 if (!tmp)
298 296 return NULL;
299 297 if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
300 298 return NULL;
301 299 return decode(buffer, blen);
302 300 }
303 301
304 302 /* divide and conquer, memory management is elsewhere */
305 303 len = (end - start) / 2;
306 304 return combine(fold(bins, start, start + len),
307 305 fold(bins, start + len, end));
308 306 }
309 307
310 308 static PyObject *
311 309 patches(PyObject *self, PyObject *args)
312 310 {
313 311 PyObject *text, *bins, *result;
314 312 struct flist *patch;
315 313 const char *in;
316 314 char *out;
317 315 int len, outlen;
318 316 Py_ssize_t inlen;
319 317
320 318 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
321 319 return NULL;
322 320
323 321 len = PyList_Size(bins);
324 322 if (!len) {
325 323 /* nothing to do */
326 324 Py_INCREF(text);
327 325 return text;
328 326 }
329 327
330 328 if (PyObject_AsCharBuffer(text, &in, &inlen))
331 329 return NULL;
332 330
333 331 patch = fold(bins, 0, len);
334 332 if (!patch)
335 333 return NULL;
336 334
337 335 outlen = calcsize(inlen, patch);
338 336 if (outlen < 0) {
339 337 result = NULL;
340 338 goto cleanup;
341 339 }
342 340 result = PyBytes_FromStringAndSize(NULL, outlen);
343 341 if (!result) {
344 342 result = NULL;
345 343 goto cleanup;
346 344 }
347 345 out = PyBytes_AsString(result);
348 346 if (!apply(out, in, inlen, patch)) {
349 347 Py_DECREF(result);
350 348 result = NULL;
351 349 }
352 350 cleanup:
353 351 lfree(patch);
354 352 return result;
355 353 }
356 354
357 355 /* calculate size of a patched file directly */
358 356 static PyObject *
359 357 patchedsize(PyObject *self, PyObject *args)
360 358 {
361 359 long orig, start, end, len, outlen = 0, last = 0;
362 360 int patchlen;
363 361 char *bin, *binend, *data;
364 uint32_t decode[3]; /* for dealing with alignment issues */
365 362
366 363 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
367 364 return NULL;
368 365
369 366 binend = bin + patchlen;
370 367 data = bin + 12;
371 368
372 369 while (data <= binend) {
373 memcpy(decode, bin, 12);
374 start = ntohl(decode[0]);
375 end = ntohl(decode[1]);
376 len = ntohl(decode[2]);
370 start = getbe32(bin);
371 end = getbe32(bin + 4);
372 len = getbe32(bin + 8);
377 373 if (start > end)
378 374 break; /* sanity check */
379 375 bin = data + len;
380 376 if (bin < data)
381 377 break; /* big data + big (bogus) len can wrap around */
382 378 data = bin + 12;
383 379 outlen += start - last;
384 380 last = end;
385 381 outlen += len;
386 382 }
387 383
388 384 if (bin != binend) {
389 385 if (!PyErr_Occurred())
390 386 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
391 387 return NULL;
392 388 }
393 389
394 390 outlen += orig - last;
395 391 return Py_BuildValue("l", outlen);
396 392 }
397 393
398 394 static PyMethodDef methods[] = {
399 395 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
400 396 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
401 397 {NULL, NULL}
402 398 };
403 399
404 400 #ifdef IS_PY3K
405 401 static struct PyModuleDef mpatch_module = {
406 402 PyModuleDef_HEAD_INIT,
407 403 "mpatch",
408 404 mpatch_doc,
409 405 -1,
410 406 methods
411 407 };
412 408
413 409 PyMODINIT_FUNC PyInit_mpatch(void)
414 410 {
415 411 PyObject *m;
416 412
417 413 m = PyModule_Create(&mpatch_module);
418 414 if (m == NULL)
419 415 return NULL;
420 416
421 417 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
422 418 Py_INCREF(mpatch_Error);
423 419 PyModule_AddObject(m, "mpatchError", mpatch_Error);
424 420
425 421 return m;
426 422 }
427 423 #else
428 424 PyMODINIT_FUNC
429 425 initmpatch(void)
430 426 {
431 427 Py_InitModule3("mpatch", methods, mpatch_doc);
432 428 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
433 429 }
434 430 #endif
@@ -1,1200 +1,1195
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <string.h>
13 13
14 14 #include "util.h"
15 15
16 16 static int hexdigit(char c)
17 17 {
18 18 if (c >= '0' && c <= '9')
19 19 return c - '0';
20 20 if (c >= 'a' && c <= 'f')
21 21 return c - 'a' + 10;
22 22 if (c >= 'A' && c <= 'F')
23 23 return c - 'A' + 10;
24 24
25 25 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
26 26 return 0;
27 27 }
28 28
29 29 /*
30 30 * Turn a hex-encoded string into binary.
31 31 */
32 32 static PyObject *unhexlify(const char *str, int len)
33 33 {
34 34 PyObject *ret;
35 35 const char *c;
36 36 char *d;
37 37
38 38 ret = PyBytes_FromStringAndSize(NULL, len / 2);
39 39
40 40 if (!ret)
41 41 return NULL;
42 42
43 43 d = PyBytes_AsString(ret);
44 44
45 45 for (c = str; c < str + len;) {
46 46 int hi = hexdigit(*c++);
47 47 int lo = hexdigit(*c++);
48 48 *d++ = (hi << 4) | lo;
49 49 }
50 50
51 51 return ret;
52 52 }
53 53
54 54 /*
55 55 * This code assumes that a manifest is stitched together with newline
56 56 * ('\n') characters.
57 57 */
58 58 static PyObject *parse_manifest(PyObject *self, PyObject *args)
59 59 {
60 60 PyObject *mfdict, *fdict;
61 61 char *str, *cur, *start, *zero;
62 62 int len;
63 63
64 64 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
65 65 &PyDict_Type, &mfdict,
66 66 &PyDict_Type, &fdict,
67 67 &str, &len))
68 68 goto quit;
69 69
70 70 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
71 71 PyObject *file = NULL, *node = NULL;
72 72 PyObject *flags = NULL;
73 73 int nlen;
74 74
75 75 if (!*cur) {
76 76 zero = cur;
77 77 continue;
78 78 }
79 79 else if (*cur != '\n')
80 80 continue;
81 81
82 82 if (!zero) {
83 83 PyErr_SetString(PyExc_ValueError,
84 84 "manifest entry has no separator");
85 85 goto quit;
86 86 }
87 87
88 88 file = PyBytes_FromStringAndSize(start, zero - start);
89 89
90 90 if (!file)
91 91 goto bail;
92 92
93 93 nlen = cur - zero - 1;
94 94
95 95 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
96 96 if (!node)
97 97 goto bail;
98 98
99 99 if (nlen > 40) {
100 100 flags = PyBytes_FromStringAndSize(zero + 41,
101 101 nlen - 40);
102 102 if (!flags)
103 103 goto bail;
104 104
105 105 if (PyDict_SetItem(fdict, file, flags) == -1)
106 106 goto bail;
107 107 }
108 108
109 109 if (PyDict_SetItem(mfdict, file, node) == -1)
110 110 goto bail;
111 111
112 112 start = cur + 1;
113 113 zero = NULL;
114 114
115 115 Py_XDECREF(flags);
116 116 Py_XDECREF(node);
117 117 Py_XDECREF(file);
118 118 continue;
119 119 bail:
120 120 Py_XDECREF(flags);
121 121 Py_XDECREF(node);
122 122 Py_XDECREF(file);
123 123 goto quit;
124 124 }
125 125
126 126 if (len > 0 && *(cur - 1) != '\n') {
127 127 PyErr_SetString(PyExc_ValueError,
128 128 "manifest contains trailing garbage");
129 129 goto quit;
130 130 }
131 131
132 132 Py_INCREF(Py_None);
133 133 return Py_None;
134 134 quit:
135 135 return NULL;
136 136 }
137 137
138 138 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
139 139 {
140 140 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
141 141 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
142 142 char *str, *cur, *end, *cpos;
143 143 int state, mode, size, mtime;
144 144 unsigned int flen;
145 145 int len;
146 uint32_t decode[4]; /* for alignment */
147 146
148 147 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
149 148 &PyDict_Type, &dmap,
150 149 &PyDict_Type, &cmap,
151 150 &str, &len))
152 151 goto quit;
153 152
154 153 /* read parents */
155 154 if (len < 40)
156 155 goto quit;
157 156
158 157 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
159 158 if (!parents)
160 159 goto quit;
161 160
162 161 /* read filenames */
163 162 cur = str + 40;
164 163 end = str + len;
165 164
166 165 while (cur < end - 17) {
167 166 /* unpack header */
168 167 state = *cur;
169 memcpy(decode, cur + 1, 16);
170 mode = ntohl(decode[0]);
171 size = ntohl(decode[1]);
172 mtime = ntohl(decode[2]);
173 flen = ntohl(decode[3]);
168 mode = getbe32(cur + 1);
169 size = getbe32(cur + 5);
170 mtime = getbe32(cur + 9);
171 flen = getbe32(cur + 13);
174 172 cur += 17;
175 173 if (cur + flen > end || cur + flen < cur) {
176 174 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
177 175 goto quit;
178 176 }
179 177
180 178 entry = Py_BuildValue("ciii", state, mode, size, mtime);
181 179 if (!entry)
182 180 goto quit;
183 181 PyObject_GC_UnTrack(entry); /* don't waste time with this */
184 182
185 183 cpos = memchr(cur, 0, flen);
186 184 if (cpos) {
187 185 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
188 186 cname = PyBytes_FromStringAndSize(cpos + 1,
189 187 flen - (cpos - cur) - 1);
190 188 if (!fname || !cname ||
191 189 PyDict_SetItem(cmap, fname, cname) == -1 ||
192 190 PyDict_SetItem(dmap, fname, entry) == -1)
193 191 goto quit;
194 192 Py_DECREF(cname);
195 193 } else {
196 194 fname = PyBytes_FromStringAndSize(cur, flen);
197 195 if (!fname ||
198 196 PyDict_SetItem(dmap, fname, entry) == -1)
199 197 goto quit;
200 198 }
201 199 cur += flen;
202 200 Py_DECREF(fname);
203 201 Py_DECREF(entry);
204 202 fname = cname = entry = NULL;
205 203 }
206 204
207 205 ret = parents;
208 206 Py_INCREF(ret);
209 207 quit:
210 208 Py_XDECREF(fname);
211 209 Py_XDECREF(cname);
212 210 Py_XDECREF(entry);
213 211 Py_XDECREF(parents);
214 212 return ret;
215 213 }
216 214
217 215 /*
218 216 * A base-16 trie for fast node->rev mapping.
219 217 *
220 218 * Positive value is index of the next node in the trie
221 219 * Negative value is a leaf: -(rev + 1)
222 220 * Zero is empty
223 221 */
224 222 typedef struct {
225 223 int children[16];
226 224 } nodetree;
227 225
228 226 /*
229 227 * This class has two behaviours.
230 228 *
231 229 * When used in a list-like way (with integer keys), we decode an
232 230 * entry in a RevlogNG index file on demand. Our last entry is a
233 231 * sentinel, always a nullid. We have limited support for
234 232 * integer-keyed insert and delete, only at elements right before the
235 233 * sentinel.
236 234 *
237 235 * With string keys, we lazily perform a reverse mapping from node to
238 236 * rev, using a base-16 trie.
239 237 */
240 238 typedef struct {
241 239 PyObject_HEAD
242 240 /* Type-specific fields go here. */
243 241 PyObject *data; /* raw bytes of index */
244 242 PyObject **cache; /* cached tuples */
245 243 const char **offsets; /* populated on demand */
246 244 Py_ssize_t raw_length; /* original number of elements */
247 245 Py_ssize_t length; /* current number of elements */
248 246 PyObject *added; /* populated on demand */
249 247 nodetree *nt; /* base-16 trie */
250 248 int ntlength; /* # nodes in use */
251 249 int ntcapacity; /* # nodes allocated */
252 250 int ntdepth; /* maximum depth of tree */
253 251 int ntsplits; /* # splits performed */
254 252 int ntrev; /* last rev scanned */
255 253 int ntlookups; /* # lookups */
256 254 int ntmisses; /* # lookups that miss the cache */
257 255 int inlined;
258 256 } indexObject;
259 257
260 258 static Py_ssize_t index_length(const indexObject *self)
261 259 {
262 260 if (self->added == NULL)
263 261 return self->length;
264 262 return self->length + PyList_GET_SIZE(self->added);
265 263 }
266 264
267 265 static PyObject *nullentry;
268 266 static const char nullid[20];
269 267
270 268 static long inline_scan(indexObject *self, const char **offsets);
271 269
272 270 #if LONG_MAX == 0x7fffffffL
273 271 static char *tuple_format = "Kiiiiiis#";
274 272 #else
275 273 static char *tuple_format = "kiiiiiis#";
276 274 #endif
277 275
278 276 /*
279 277 * Return a pointer to the beginning of a RevlogNG record.
280 278 */
281 279 static const char *index_deref(indexObject *self, Py_ssize_t pos)
282 280 {
283 281 if (self->inlined && pos > 0) {
284 282 if (self->offsets == NULL) {
285 283 self->offsets = malloc(self->raw_length *
286 284 sizeof(*self->offsets));
287 285 if (self->offsets == NULL)
288 286 return (const char *)PyErr_NoMemory();
289 287 inline_scan(self, self->offsets);
290 288 }
291 289 return self->offsets[pos];
292 290 }
293 291
294 292 return PyString_AS_STRING(self->data) + pos * 64;
295 293 }
296 294
297 295 /*
298 296 * RevlogNG format (all in big endian, data may be inlined):
299 297 * 6 bytes: offset
300 298 * 2 bytes: flags
301 299 * 4 bytes: compressed length
302 300 * 4 bytes: uncompressed length
303 301 * 4 bytes: base revision
304 302 * 4 bytes: link revision
305 303 * 4 bytes: parent 1 revision
306 304 * 4 bytes: parent 2 revision
307 305 * 32 bytes: nodeid (only 20 bytes used)
308 306 */
309 307 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
310 308 {
311 uint32_t decode[8]; /* to enforce alignment with inline data */
312 309 uint64_t offset_flags;
313 310 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
314 311 const char *c_node_id;
315 312 const char *data;
316 313 Py_ssize_t length = index_length(self);
317 314 PyObject *entry;
318 315
319 316 if (pos < 0)
320 317 pos += length;
321 318
322 319 if (pos < 0 || pos >= length) {
323 320 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
324 321 return NULL;
325 322 }
326 323
327 324 if (pos == length - 1) {
328 325 Py_INCREF(nullentry);
329 326 return nullentry;
330 327 }
331 328
332 329 if (pos >= self->length - 1) {
333 330 PyObject *obj;
334 331 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
335 332 Py_INCREF(obj);
336 333 return obj;
337 334 }
338 335
339 336 if (self->cache) {
340 337 if (self->cache[pos]) {
341 338 Py_INCREF(self->cache[pos]);
342 339 return self->cache[pos];
343 340 }
344 341 } else {
345 342 self->cache = calloc(self->raw_length, sizeof(PyObject *));
346 343 if (self->cache == NULL)
347 344 return PyErr_NoMemory();
348 345 }
349 346
350 347 data = index_deref(self, pos);
351 348 if (data == NULL)
352 349 return NULL;
353 350
354 memcpy(decode, data, 8 * sizeof(uint32_t));
355
356 offset_flags = ntohl(decode[1]);
351 offset_flags = getbe32(data + 4);
357 352 if (pos == 0) /* mask out version number for the first entry */
358 353 offset_flags &= 0xFFFF;
359 354 else {
360 uint32_t offset_high = ntohl(decode[0]);
355 uint32_t offset_high = getbe32(data);
361 356 offset_flags |= ((uint64_t)offset_high) << 32;
362 357 }
363 358
364 comp_len = ntohl(decode[2]);
365 uncomp_len = ntohl(decode[3]);
366 base_rev = ntohl(decode[4]);
367 link_rev = ntohl(decode[5]);
368 parent_1 = ntohl(decode[6]);
369 parent_2 = ntohl(decode[7]);
359 comp_len = getbe32(data + 8);
360 uncomp_len = getbe32(data + 12);
361 base_rev = getbe32(data + 16);
362 link_rev = getbe32(data + 20);
363 parent_1 = getbe32(data + 24);
364 parent_2 = getbe32(data + 28);
370 365 c_node_id = data + 32;
371 366
372 367 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
373 368 uncomp_len, base_rev, link_rev,
374 369 parent_1, parent_2, c_node_id, 20);
375 370
376 371 if (entry)
377 372 PyObject_GC_UnTrack(entry);
378 373
379 374 self->cache[pos] = entry;
380 375 Py_INCREF(entry);
381 376
382 377 return entry;
383 378 }
384 379
385 380 /*
386 381 * Return the 20-byte SHA of the node corresponding to the given rev.
387 382 */
388 383 static const char *index_node(indexObject *self, Py_ssize_t pos)
389 384 {
390 385 Py_ssize_t length = index_length(self);
391 386 const char *data;
392 387
393 388 if (pos == length - 1)
394 389 return nullid;
395 390
396 391 if (pos >= length)
397 392 return NULL;
398 393
399 394 if (pos >= self->length - 1) {
400 395 PyObject *tuple, *str;
401 396 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
402 397 str = PyTuple_GetItem(tuple, 7);
403 398 return str ? PyString_AS_STRING(str) : NULL;
404 399 }
405 400
406 401 data = index_deref(self, pos);
407 402 return data ? data + 32 : NULL;
408 403 }
409 404
410 405 static int nt_insert(indexObject *self, const char *node, int rev);
411 406
412 407 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
413 408 {
414 409 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
415 410 return -1;
416 411 if (*nodelen == 20)
417 412 return 0;
418 413 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
419 414 return -1;
420 415 }
421 416
422 417 static PyObject *index_insert(indexObject *self, PyObject *args)
423 418 {
424 419 PyObject *obj;
425 420 char *node;
426 421 long offset;
427 422 Py_ssize_t len, nodelen;
428 423
429 424 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
430 425 return NULL;
431 426
432 427 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
433 428 PyErr_SetString(PyExc_TypeError, "8-tuple required");
434 429 return NULL;
435 430 }
436 431
437 432 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
438 433 return NULL;
439 434
440 435 len = index_length(self);
441 436
442 437 if (offset < 0)
443 438 offset += len;
444 439
445 440 if (offset != len - 1) {
446 441 PyErr_SetString(PyExc_IndexError,
447 442 "insert only supported at index -1");
448 443 return NULL;
449 444 }
450 445
451 446 if (offset > INT_MAX) {
452 447 PyErr_SetString(PyExc_ValueError,
453 448 "currently only 2**31 revs supported");
454 449 return NULL;
455 450 }
456 451
457 452 if (self->added == NULL) {
458 453 self->added = PyList_New(0);
459 454 if (self->added == NULL)
460 455 return NULL;
461 456 }
462 457
463 458 if (PyList_Append(self->added, obj) == -1)
464 459 return NULL;
465 460
466 461 if (self->nt)
467 462 nt_insert(self, node, (int)offset);
468 463
469 464 Py_RETURN_NONE;
470 465 }
471 466
472 467 static void _index_clearcaches(indexObject *self)
473 468 {
474 469 if (self->cache) {
475 470 Py_ssize_t i;
476 471
477 472 for (i = 0; i < self->raw_length; i++) {
478 473 Py_XDECREF(self->cache[i]);
479 474 self->cache[i] = NULL;
480 475 }
481 476 free(self->cache);
482 477 self->cache = NULL;
483 478 }
484 479 if (self->offsets) {
485 480 free(self->offsets);
486 481 self->offsets = NULL;
487 482 }
488 483 if (self->nt) {
489 484 free(self->nt);
490 485 self->nt = NULL;
491 486 }
492 487 }
493 488
494 489 static PyObject *index_clearcaches(indexObject *self)
495 490 {
496 491 _index_clearcaches(self);
497 492 self->ntlength = self->ntcapacity = 0;
498 493 self->ntdepth = self->ntsplits = 0;
499 494 self->ntrev = -1;
500 495 self->ntlookups = self->ntmisses = 0;
501 496 Py_RETURN_NONE;
502 497 }
503 498
504 499 static PyObject *index_stats(indexObject *self)
505 500 {
506 501 PyObject *obj = PyDict_New();
507 502
508 503 if (obj == NULL)
509 504 return NULL;
510 505
511 506 #define istat(__n, __d) \
512 507 if (PyDict_SetItemString(obj, __d, PyInt_FromLong(self->__n)) == -1) \
513 508 goto bail;
514 509
515 510 if (self->added) {
516 511 Py_ssize_t len = PyList_GET_SIZE(self->added);
517 512 if (PyDict_SetItemString(obj, "index entries added",
518 513 PyInt_FromLong(len)) == -1)
519 514 goto bail;
520 515 }
521 516
522 517 if (self->raw_length != self->length - 1)
523 518 istat(raw_length, "revs on disk");
524 519 istat(length, "revs in memory");
525 520 istat(ntcapacity, "node trie capacity");
526 521 istat(ntdepth, "node trie depth");
527 522 istat(ntlength, "node trie count");
528 523 istat(ntlookups, "node trie lookups");
529 524 istat(ntmisses, "node trie misses");
530 525 istat(ntrev, "node trie last rev scanned");
531 526 istat(ntsplits, "node trie splits");
532 527
533 528 #undef istat
534 529
535 530 return obj;
536 531
537 532 bail:
538 533 Py_XDECREF(obj);
539 534 return NULL;
540 535 }
541 536
542 537 static inline int nt_level(const char *node, int level)
543 538 {
544 539 int v = node[level>>1];
545 540 if (!(level & 1))
546 541 v >>= 4;
547 542 return v & 0xf;
548 543 }
549 544
550 545 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen)
551 546 {
552 547 int level, off;
553 548
554 549 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
555 550 return -1;
556 551
557 552 if (self->nt == NULL)
558 553 return -2;
559 554
560 555 for (level = off = 0; level < nodelen; level++) {
561 556 int k = nt_level(node, level);
562 557 nodetree *n = &self->nt[off];
563 558 int v = n->children[k];
564 559
565 560 if (v < 0) {
566 561 const char *n;
567 562 v = -v - 1;
568 563 n = index_node(self, v);
569 564 if (n == NULL)
570 565 return -2;
571 566 return memcmp(node, n, nodelen > 20 ? 20 : nodelen)
572 567 ? -2 : v;
573 568 }
574 569 if (v == 0)
575 570 return -2;
576 571 off = v;
577 572 }
578 573 return -2;
579 574 }
580 575
581 576 static int nt_new(indexObject *self)
582 577 {
583 578 if (self->ntlength == self->ntcapacity) {
584 579 self->ntcapacity *= 2;
585 580 self->nt = realloc(self->nt,
586 581 self->ntcapacity * sizeof(nodetree));
587 582 if (self->nt == NULL) {
588 583 PyErr_SetString(PyExc_MemoryError, "out of memory");
589 584 return -1;
590 585 }
591 586 memset(&self->nt[self->ntlength], 0,
592 587 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
593 588 }
594 589 return self->ntlength++;
595 590 }
596 591
597 592 static int nt_insert(indexObject *self, const char *node, int rev)
598 593 {
599 594 int level = 0;
600 595 int off = 0;
601 596
602 597 while (level < 20) {
603 598 int k = nt_level(node, level);
604 599 nodetree *n;
605 600 int v;
606 601
607 602 n = &self->nt[off];
608 603 v = n->children[k];
609 604
610 605 if (v == 0) {
611 606 n->children[k] = -rev - 1;
612 607 return 0;
613 608 }
614 609 if (v < 0) {
615 610 const char *oldnode = index_node(self, -v - 1);
616 611 int noff;
617 612
618 613 if (!oldnode || !memcmp(oldnode, node, 20)) {
619 614 n->children[k] = -rev - 1;
620 615 return 0;
621 616 }
622 617 noff = nt_new(self);
623 618 if (noff == -1)
624 619 return -1;
625 620 /* self->nt may have been changed by realloc */
626 621 self->nt[off].children[k] = noff;
627 622 off = noff;
628 623 n = &self->nt[off];
629 624 n->children[nt_level(oldnode, ++level)] = v;
630 625 if (level > self->ntdepth)
631 626 self->ntdepth = level;
632 627 self->ntsplits += 1;
633 628 } else {
634 629 level += 1;
635 630 off = v;
636 631 }
637 632 }
638 633
639 634 return -1;
640 635 }
641 636
642 637 /*
643 638 * Return values:
644 639 *
645 640 * -3: error (exception set)
646 641 * -2: not found (no exception set)
647 642 * rest: valid rev
648 643 */
649 644 static int index_find_node(indexObject *self,
650 645 const char *node, Py_ssize_t nodelen)
651 646 {
652 647 int rev;
653 648
654 649 self->ntlookups++;
655 650 rev = nt_find(self, node, nodelen);
656 651 if (rev >= -1)
657 652 return rev;
658 653
659 654 if (self->nt == NULL) {
660 655 self->ntcapacity = self->raw_length < 4
661 656 ? 4 : self->raw_length / 2;
662 657 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
663 658 if (self->nt == NULL) {
664 659 PyErr_SetString(PyExc_MemoryError, "out of memory");
665 660 return -3;
666 661 }
667 662 self->ntlength = 1;
668 663 self->ntrev = (int)index_length(self) - 1;
669 664 self->ntlookups = 1;
670 665 self->ntmisses = 0;
671 666 }
672 667
673 668 /*
674 669 * For the first handful of lookups, we scan the entire index,
675 670 * and cache only the matching nodes. This optimizes for cases
676 671 * like "hg tip", where only a few nodes are accessed.
677 672 *
678 673 * After that, we cache every node we visit, using a single
679 674 * scan amortized over multiple lookups. This gives the best
680 675 * bulk performance, e.g. for "hg log".
681 676 */
682 677 if (self->ntmisses++ < 4) {
683 678 for (rev = self->ntrev - 1; rev >= 0; rev--) {
684 679 const char *n = index_node(self, rev);
685 680 if (n == NULL)
686 681 return -2;
687 682 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
688 683 if (nt_insert(self, n, rev) == -1)
689 684 return -3;
690 685 break;
691 686 }
692 687 }
693 688 } else {
694 689 for (rev = self->ntrev - 1; rev >= 0; rev--) {
695 690 const char *n = index_node(self, rev);
696 691 if (n == NULL)
697 692 return -2;
698 693 if (nt_insert(self, n, rev) == -1)
699 694 return -3;
700 695 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
701 696 break;
702 697 }
703 698 }
704 699 self->ntrev = rev;
705 700 }
706 701
707 702 if (rev >= 0)
708 703 return rev;
709 704 return -2;
710 705 }
711 706
712 707 static PyObject *raise_revlog_error(void)
713 708 {
714 709 static PyObject *errclass;
715 710 PyObject *mod = NULL, *errobj;
716 711
717 712 if (errclass == NULL) {
718 713 PyObject *dict;
719 714
720 715 mod = PyImport_ImportModule("mercurial.error");
721 716 if (mod == NULL)
722 717 goto classfail;
723 718
724 719 dict = PyModule_GetDict(mod);
725 720 if (dict == NULL)
726 721 goto classfail;
727 722
728 723 errclass = PyDict_GetItemString(dict, "RevlogError");
729 724 if (errclass == NULL) {
730 725 PyErr_SetString(PyExc_SystemError,
731 726 "could not find RevlogError");
732 727 goto classfail;
733 728 }
734 729 Py_INCREF(errclass);
735 730 }
736 731
737 732 errobj = PyObject_CallFunction(errclass, NULL);
738 733 if (errobj == NULL)
739 734 return NULL;
740 735 PyErr_SetObject(errclass, errobj);
741 736 return errobj;
742 737
743 738 classfail:
744 739 Py_XDECREF(mod);
745 740 return NULL;
746 741 }
747 742
748 743 static PyObject *index_getitem(indexObject *self, PyObject *value)
749 744 {
750 745 char *node;
751 746 Py_ssize_t nodelen;
752 747 int rev;
753 748
754 749 if (PyInt_Check(value))
755 750 return index_get(self, PyInt_AS_LONG(value));
756 751
757 752 if (PyString_AsStringAndSize(value, &node, &nodelen) == -1)
758 753 return NULL;
759 754 rev = index_find_node(self, node, nodelen);
760 755 if (rev >= -1)
761 756 return PyInt_FromLong(rev);
762 757 if (rev == -2)
763 758 raise_revlog_error();
764 759 return NULL;
765 760 }
766 761
767 762 static PyObject *index_m_get(indexObject *self, PyObject *args)
768 763 {
769 764 char *node;
770 765 int nodelen, rev;
771 766
772 767 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
773 768 return NULL;
774 769
775 770 rev = index_find_node(self, node, nodelen);
776 771 if (rev == -3)
777 772 return NULL;
778 773 if (rev == -2)
779 774 Py_RETURN_NONE;
780 775 return PyInt_FromLong(rev);
781 776 }
782 777
783 778 static int index_contains(indexObject *self, PyObject *value)
784 779 {
785 780 char *node;
786 781 Py_ssize_t nodelen;
787 782
788 783 if (PyInt_Check(value)) {
789 784 long rev = PyInt_AS_LONG(value);
790 785 return rev >= -1 && rev < index_length(self);
791 786 }
792 787
793 788 if (!PyString_Check(value))
794 789 return 0;
795 790
796 791 node = PyString_AS_STRING(value);
797 792 nodelen = PyString_GET_SIZE(value);
798 793
799 794 switch (index_find_node(self, node, nodelen)) {
800 795 case -3:
801 796 return -1;
802 797 case -2:
803 798 return 0;
804 799 default:
805 800 return 1;
806 801 }
807 802 }
808 803
809 804 /*
810 805 * Invalidate any trie entries introduced by added revs.
811 806 */
812 807 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
813 808 {
814 809 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
815 810
816 811 for (i = start; i < len; i++) {
817 812 PyObject *tuple = PyList_GET_ITEM(self->added, i);
818 813 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
819 814
820 815 nt_insert(self, PyString_AS_STRING(node), -1);
821 816 }
822 817
823 818 if (start == 0) {
824 819 Py_DECREF(self->added);
825 820 self->added = NULL;
826 821 }
827 822 }
828 823
829 824 /*
830 825 * Delete a numeric range of revs, which must be at the end of the
831 826 * range, but exclude the sentinel nullid entry.
832 827 */
833 828 static int index_slice_del(indexObject *self, PyObject *item)
834 829 {
835 830 Py_ssize_t start, stop, step, slicelength;
836 831 Py_ssize_t length = index_length(self);
837 832
838 833 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
839 834 &start, &stop, &step, &slicelength) < 0)
840 835 return -1;
841 836
842 837 if (slicelength <= 0)
843 838 return 0;
844 839
845 840 if ((step < 0 && start < stop) || (step > 0 && start > stop))
846 841 stop = start;
847 842
848 843 if (step < 0) {
849 844 stop = start + 1;
850 845 start = stop + step*(slicelength - 1) - 1;
851 846 step = -step;
852 847 }
853 848
854 849 if (step != 1) {
855 850 PyErr_SetString(PyExc_ValueError,
856 851 "revlog index delete requires step size of 1");
857 852 return -1;
858 853 }
859 854
860 855 if (stop != length - 1) {
861 856 PyErr_SetString(PyExc_IndexError,
862 857 "revlog index deletion indices are invalid");
863 858 return -1;
864 859 }
865 860
866 861 if (start < self->length - 1) {
867 862 if (self->nt) {
868 863 Py_ssize_t i;
869 864
870 865 for (i = start + 1; i < self->length - 1; i++) {
871 866 const char *node = index_node(self, i);
872 867
873 868 if (node)
874 869 nt_insert(self, node, -1);
875 870 }
876 871 if (self->added)
877 872 nt_invalidate_added(self, 0);
878 873 if (self->ntrev > start)
879 874 self->ntrev = (int)start;
880 875 }
881 876 self->length = start + 1;
882 877 return 0;
883 878 }
884 879
885 880 if (self->nt) {
886 881 nt_invalidate_added(self, start - self->length + 1);
887 882 if (self->ntrev > start)
888 883 self->ntrev = (int)start;
889 884 }
890 885 return self->added
891 886 ? PyList_SetSlice(self->added, start - self->length + 1,
892 887 PyList_GET_SIZE(self->added), NULL)
893 888 : 0;
894 889 }
895 890
896 891 /*
897 892 * Supported ops:
898 893 *
899 894 * slice deletion
900 895 * string assignment (extend node->rev mapping)
901 896 * string deletion (shrink node->rev mapping)
902 897 */
903 898 static int index_assign_subscript(indexObject *self, PyObject *item,
904 899 PyObject *value)
905 900 {
906 901 char *node;
907 902 Py_ssize_t nodelen;
908 903 long rev;
909 904
910 905 if (PySlice_Check(item) && value == NULL)
911 906 return index_slice_del(self, item);
912 907
913 908 if (node_check(item, &node, &nodelen) == -1)
914 909 return -1;
915 910
916 911 if (value == NULL)
917 912 return self->nt ? nt_insert(self, node, -1) : 0;
918 913 rev = PyInt_AsLong(value);
919 914 if (rev > INT_MAX || rev < 0) {
920 915 if (!PyErr_Occurred())
921 916 PyErr_SetString(PyExc_ValueError, "rev out of range");
922 917 return -1;
923 918 }
924 919 return nt_insert(self, node, (int)rev);
925 920 }
926 921
927 922 /*
928 923 * Find all RevlogNG entries in an index that has inline data. Update
929 924 * the optional "offsets" table with those entries.
930 925 */
931 926 static long inline_scan(indexObject *self, const char **offsets)
932 927 {
933 928 const char *data = PyString_AS_STRING(self->data);
934 929 const char *end = data + PyString_GET_SIZE(self->data);
935 930 const long hdrsize = 64;
936 931 long incr = hdrsize;
937 932 Py_ssize_t len = 0;
938 933
939 934 while (data + hdrsize <= end) {
940 935 uint32_t comp_len;
941 936 const char *old_data;
942 937 /* 3rd element of header is length of compressed inline data */
943 memcpy(&comp_len, data + 8, sizeof(uint32_t));
944 incr = hdrsize + ntohl(comp_len);
938 comp_len = getbe32(data + 8);
939 incr = hdrsize + comp_len;
945 940 if (incr < hdrsize)
946 941 break;
947 942 if (offsets)
948 943 offsets[len] = data;
949 944 len++;
950 945 old_data = data;
951 946 data += incr;
952 947 if (data <= old_data)
953 948 break;
954 949 }
955 950
956 951 if (data != end && data + hdrsize != end) {
957 952 if (!PyErr_Occurred())
958 953 PyErr_SetString(PyExc_ValueError, "corrupt index file");
959 954 return -1;
960 955 }
961 956
962 957 return len;
963 958 }
964 959
965 960 static int index_real_init(indexObject *self, const char *data, int size,
966 961 PyObject *inlined_obj, PyObject *data_obj)
967 962 {
968 963 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
969 964 self->data = data_obj;
970 965 self->cache = NULL;
971 966
972 967 self->added = NULL;
973 968 self->offsets = NULL;
974 969 self->nt = NULL;
975 970 self->ntlength = self->ntcapacity = 0;
976 971 self->ntdepth = self->ntsplits = 0;
977 972 self->ntlookups = self->ntmisses = 0;
978 973 self->ntrev = -1;
979 974 Py_INCREF(self->data);
980 975
981 976 if (self->inlined) {
982 977 long len = inline_scan(self, NULL);
983 978 if (len == -1)
984 979 goto bail;
985 980 self->raw_length = len;
986 981 self->length = len + 1;
987 982 } else {
988 983 if (size % 64) {
989 984 PyErr_SetString(PyExc_ValueError, "corrupt index file");
990 985 goto bail;
991 986 }
992 987 self->raw_length = size / 64;
993 988 self->length = self->raw_length + 1;
994 989 }
995 990
996 991 return 0;
997 992 bail:
998 993 return -1;
999 994 }
1000 995
1001 996 static int index_init(indexObject *self, PyObject *args, PyObject *kwds)
1002 997 {
1003 998 const char *data;
1004 999 int size;
1005 1000 PyObject *inlined_obj;
1006 1001
1007 1002 if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
1008 1003 return -1;
1009 1004
1010 1005 return index_real_init(self, data, size, inlined_obj,
1011 1006 PyTuple_GET_ITEM(args, 0));
1012 1007 }
1013 1008
1014 1009 static PyObject *index_nodemap(indexObject *self)
1015 1010 {
1016 1011 return (PyObject *)self;
1017 1012 }
1018 1013
1019 1014 static void index_dealloc(indexObject *self)
1020 1015 {
1021 1016 _index_clearcaches(self);
1022 1017 Py_DECREF(self->data);
1023 1018 Py_XDECREF(self->added);
1024 1019 PyObject_Del(self);
1025 1020 }
1026 1021
1027 1022 static PySequenceMethods index_sequence_methods = {
1028 1023 (lenfunc)index_length, /* sq_length */
1029 1024 0, /* sq_concat */
1030 1025 0, /* sq_repeat */
1031 1026 (ssizeargfunc)index_get, /* sq_item */
1032 1027 0, /* sq_slice */
1033 1028 0, /* sq_ass_item */
1034 1029 0, /* sq_ass_slice */
1035 1030 (objobjproc)index_contains, /* sq_contains */
1036 1031 };
1037 1032
1038 1033 static PyMappingMethods index_mapping_methods = {
1039 1034 (lenfunc)index_length, /* mp_length */
1040 1035 (binaryfunc)index_getitem, /* mp_subscript */
1041 1036 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1042 1037 };
1043 1038
1044 1039 static PyMethodDef index_methods[] = {
1045 1040 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1046 1041 "clear the index caches"},
1047 1042 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1048 1043 "get an index entry"},
1049 1044 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1050 1045 "insert an index entry"},
1051 1046 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1052 1047 "stats for the index"},
1053 1048 {NULL} /* Sentinel */
1054 1049 };
1055 1050
1056 1051 static PyGetSetDef index_getset[] = {
1057 1052 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1058 1053 {NULL} /* Sentinel */
1059 1054 };
1060 1055
1061 1056 static PyTypeObject indexType = {
1062 1057 PyObject_HEAD_INIT(NULL)
1063 1058 0, /* ob_size */
1064 1059 "parsers.index", /* tp_name */
1065 1060 sizeof(indexObject), /* tp_basicsize */
1066 1061 0, /* tp_itemsize */
1067 1062 (destructor)index_dealloc, /* tp_dealloc */
1068 1063 0, /* tp_print */
1069 1064 0, /* tp_getattr */
1070 1065 0, /* tp_setattr */
1071 1066 0, /* tp_compare */
1072 1067 0, /* tp_repr */
1073 1068 0, /* tp_as_number */
1074 1069 &index_sequence_methods, /* tp_as_sequence */
1075 1070 &index_mapping_methods, /* tp_as_mapping */
1076 1071 0, /* tp_hash */
1077 1072 0, /* tp_call */
1078 1073 0, /* tp_str */
1079 1074 0, /* tp_getattro */
1080 1075 0, /* tp_setattro */
1081 1076 0, /* tp_as_buffer */
1082 1077 Py_TPFLAGS_DEFAULT, /* tp_flags */
1083 1078 "revlog index", /* tp_doc */
1084 1079 0, /* tp_traverse */
1085 1080 0, /* tp_clear */
1086 1081 0, /* tp_richcompare */
1087 1082 0, /* tp_weaklistoffset */
1088 1083 0, /* tp_iter */
1089 1084 0, /* tp_iternext */
1090 1085 index_methods, /* tp_methods */
1091 1086 0, /* tp_members */
1092 1087 index_getset, /* tp_getset */
1093 1088 0, /* tp_base */
1094 1089 0, /* tp_dict */
1095 1090 0, /* tp_descr_get */
1096 1091 0, /* tp_descr_set */
1097 1092 0, /* tp_dictoffset */
1098 1093 (initproc)index_init, /* tp_init */
1099 1094 0, /* tp_alloc */
1100 1095 PyType_GenericNew, /* tp_new */
1101 1096 };
1102 1097
1103 1098 /*
1104 1099 * returns a tuple of the form (index, index, cache) with elements as
1105 1100 * follows:
1106 1101 *
1107 1102 * index: an index object that lazily parses RevlogNG records
1108 1103 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1109 1104 *
1110 1105 * added complications are for backwards compatibility
1111 1106 */
1112 1107 static PyObject *parse_index2(PyObject *self, PyObject *args)
1113 1108 {
1114 1109 const char *data;
1115 1110 int size, ret;
1116 1111 PyObject *inlined_obj, *tuple = NULL, *cache = NULL;
1117 1112 indexObject *idx;
1118 1113
1119 1114 if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
1120 1115 return NULL;
1121 1116
1122 1117 idx = PyObject_New(indexObject, &indexType);
1123 1118
1124 1119 if (idx == NULL)
1125 1120 goto bail;
1126 1121
1127 1122 ret = index_real_init(idx, data, size, inlined_obj,
1128 1123 PyTuple_GET_ITEM(args, 0));
1129 1124 if (ret)
1130 1125 goto bail;
1131 1126
1132 1127 if (idx->inlined) {
1133 1128 Py_INCREF(idx->data);
1134 1129 cache = Py_BuildValue("iO", 0, idx->data);
1135 1130 if (cache == NULL)
1136 1131 goto bail;
1137 1132 } else {
1138 1133 cache = Py_None;
1139 1134 Py_INCREF(cache);
1140 1135 }
1141 1136
1142 1137 Py_INCREF(idx);
1143 1138
1144 1139 tuple = Py_BuildValue("NN", idx, cache);
1145 1140 if (!tuple)
1146 1141 goto bail;
1147 1142 return tuple;
1148 1143
1149 1144 bail:
1150 1145 Py_XDECREF(idx);
1151 1146 Py_XDECREF(cache);
1152 1147 Py_XDECREF(tuple);
1153 1148 return NULL;
1154 1149 }
1155 1150
1156 1151 static char parsers_doc[] = "Efficient content parsing.";
1157 1152
1158 1153 static PyMethodDef methods[] = {
1159 1154 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1160 1155 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1161 1156 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1162 1157 {NULL, NULL}
1163 1158 };
1164 1159
1165 1160 static void module_init(PyObject *mod)
1166 1161 {
1167 1162 if (PyType_Ready(&indexType) < 0)
1168 1163 return;
1169 1164 Py_INCREF(&indexType);
1170 1165
1171 1166 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1172 1167
1173 1168 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1174 1169 -1, -1, -1, -1, nullid, 20);
1175 1170 if (nullentry)
1176 1171 PyObject_GC_UnTrack(nullentry);
1177 1172 }
1178 1173
1179 1174 #ifdef IS_PY3K
1180 1175 static struct PyModuleDef parsers_module = {
1181 1176 PyModuleDef_HEAD_INIT,
1182 1177 "parsers",
1183 1178 parsers_doc,
1184 1179 -1,
1185 1180 methods
1186 1181 };
1187 1182
1188 1183 PyMODINIT_FUNC PyInit_parsers(void)
1189 1184 {
1190 1185 PyObject *mod = PyModule_Create(&parsers_module);
1191 1186 module_init(mod);
1192 1187 return mod;
1193 1188 }
1194 1189 #else
1195 1190 PyMODINIT_FUNC initparsers(void)
1196 1191 {
1197 1192 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1198 1193 module_init(mod);
1199 1194 }
1200 1195 #endif
@@ -1,154 +1,165
1 1 /*
2 2 util.h - utility functions for interfacing with the various python APIs.
3 3
4 4 This software may be used and distributed according to the terms of
5 5 the GNU General Public License, incorporated herein by reference.
6 6 */
7 7
8 8 #ifndef _HG_UTIL_H_
9 9 #define _HG_UTIL_H_
10 10
11 11 #if PY_MAJOR_VERSION >= 3
12 12
13 13 #define IS_PY3K
14 14 #define PyInt_FromLong PyLong_FromLong
15 15 #define PyInt_AsLong PyLong_AsLong
16 16
17 17 /*
18 18 Mapping of some of the python < 2.x PyString* functions to py3k's PyUnicode.
19 19
20 20 The commented names below represent those that are present in the PyBytes
21 21 definitions for python < 2.6 (below in this file) that don't have a direct
22 22 implementation.
23 23 */
24 24
25 25 #define PyStringObject PyUnicodeObject
26 26 #define PyString_Type PyUnicode_Type
27 27
28 28 #define PyString_Check PyUnicode_Check
29 29 #define PyString_CheckExact PyUnicode_CheckExact
30 30 #define PyString_CHECK_INTERNED PyUnicode_CHECK_INTERNED
31 31 #define PyString_AS_STRING PyUnicode_AsLatin1String
32 32 #define PyString_GET_SIZE PyUnicode_GET_SIZE
33 33
34 34 #define PyString_FromStringAndSize PyUnicode_FromStringAndSize
35 35 #define PyString_FromString PyUnicode_FromString
36 36 #define PyString_FromFormatV PyUnicode_FromFormatV
37 37 #define PyString_FromFormat PyUnicode_FromFormat
38 38 /* #define PyString_Size PyUnicode_GET_SIZE */
39 39 /* #define PyString_AsString */
40 40 /* #define PyString_Repr */
41 41 #define PyString_Concat PyUnicode_Concat
42 42 #define PyString_ConcatAndDel PyUnicode_AppendAndDel
43 43 #define _PyString_Resize PyUnicode_Resize
44 44 /* #define _PyString_Eq */
45 45 #define PyString_Format PyUnicode_Format
46 46 /* #define _PyString_FormatLong */
47 47 /* #define PyString_DecodeEscape */
48 48 #define _PyString_Join PyUnicode_Join
49 49 #define PyString_Decode PyUnicode_Decode
50 50 #define PyString_Encode PyUnicode_Encode
51 51 #define PyString_AsEncodedObject PyUnicode_AsEncodedObject
52 52 #define PyString_AsEncodedString PyUnicode_AsEncodedString
53 53 #define PyString_AsDecodedObject PyUnicode_AsDecodedObject
54 54 #define PyString_AsDecodedString PyUnicode_AsDecodedUnicode
55 55 /* #define PyString_AsStringAndSize */
56 56 #define _PyString_InsertThousandsGrouping _PyUnicode_InsertThousandsGrouping
57 57
58 58 #endif /* PY_MAJOR_VERSION */
59 59
60 60 /* Backports from 2.6 */
61 61 #if PY_VERSION_HEX < 0x02060000
62 62
63 63 #define Py_TYPE(ob) (ob)->ob_type
64 64 #define Py_SIZE(ob) (ob)->ob_size
65 65 #define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
66 66
67 67 /* Shamelessly stolen from bytesobject.h */
68 68 #define PyBytesObject PyStringObject
69 69 #define PyBytes_Type PyString_Type
70 70
71 71 #define PyBytes_Check PyString_Check
72 72 #define PyBytes_CheckExact PyString_CheckExact
73 73 #define PyBytes_CHECK_INTERNED PyString_CHECK_INTERNED
74 74 #define PyBytes_AS_STRING PyString_AS_STRING
75 75 #define PyBytes_GET_SIZE PyString_GET_SIZE
76 76 #define Py_TPFLAGS_BYTES_SUBCLASS Py_TPFLAGS_STRING_SUBCLASS
77 77
78 78 #define PyBytes_FromStringAndSize PyString_FromStringAndSize
79 79 #define PyBytes_FromString PyString_FromString
80 80 #define PyBytes_FromFormatV PyString_FromFormatV
81 81 #define PyBytes_FromFormat PyString_FromFormat
82 82 #define PyBytes_Size PyString_Size
83 83 #define PyBytes_AsString PyString_AsString
84 84 #define PyBytes_Repr PyString_Repr
85 85 #define PyBytes_Concat PyString_Concat
86 86 #define PyBytes_ConcatAndDel PyString_ConcatAndDel
87 87 #define _PyBytes_Resize _PyString_Resize
88 88 #define _PyBytes_Eq _PyString_Eq
89 89 #define PyBytes_Format PyString_Format
90 90 #define _PyBytes_FormatLong _PyString_FormatLong
91 91 #define PyBytes_DecodeEscape PyString_DecodeEscape
92 92 #define _PyBytes_Join _PyString_Join
93 93 #define PyBytes_Decode PyString_Decode
94 94 #define PyBytes_Encode PyString_Encode
95 95 #define PyBytes_AsEncodedObject PyString_AsEncodedObject
96 96 #define PyBytes_AsEncodedString PyString_AsEncodedString
97 97 #define PyBytes_AsDecodedObject PyString_AsDecodedObject
98 98 #define PyBytes_AsDecodedString PyString_AsDecodedString
99 99 #define PyBytes_AsStringAndSize PyString_AsStringAndSize
100 100 #define _PyBytes_InsertThousandsGrouping _PyString_InsertThousandsGrouping
101 101
102 102 #endif /* PY_VERSION_HEX */
103 103
104 104 #if (PY_VERSION_HEX < 0x02050000)
105 105 /* Definitions to get compatibility with python 2.4 and earlier which
106 106 does not have Py_ssize_t. See also PEP 353.
107 107 Note: msvc (8 or earlier) does not have ssize_t, so we use Py_ssize_t.
108 108 */
109 109 typedef int Py_ssize_t;
110 110 typedef Py_ssize_t (*lenfunc)(PyObject *);
111 111 typedef PyObject *(*ssizeargfunc)(PyObject *, Py_ssize_t);
112 112
113 113 #if !defined(PY_SSIZE_T_MIN)
114 114 #define PY_SSIZE_T_MAX INT_MAX
115 115 #define PY_SSIZE_T_MIN INT_MIN
116 116 #endif
117 117 #endif
118 118
119 119 #ifdef _WIN32
120 120 #ifdef _MSC_VER
121 121 /* msvc 6.0 has problems */
122 122 #define inline __inline
123 123 typedef unsigned long uint32_t;
124 124 typedef unsigned __int64 uint64_t;
125 125 #else
126 126 #include <stdint.h>
127 127 #endif
128 static uint32_t ntohl(uint32_t x)
129 {
130 return ((x & 0x000000ffUL) << 24) |
131 ((x & 0x0000ff00UL) << 8) |
132 ((x & 0x00ff0000UL) >> 8) |
133 ((x & 0xff000000UL) >> 24);
134 }
135 128 #else
136 129 /* not windows */
137 130 #include <sys/types.h>
138 131 #if defined __BEOS__ && !defined __HAIKU__
139 132 #include <ByteOrder.h>
140 133 #else
141 134 #include <arpa/inet.h>
142 135 #endif
143 136 #include <inttypes.h>
144 137 #endif
145 138
146 139 #if defined __hpux || defined __SUNPRO_C || defined _AIX
147 140 #define inline
148 141 #endif
149 142
150 143 #ifdef __linux
151 144 #define inline __inline
152 145 #endif
153 146
147 static inline uint32_t getbe32(const char *c)
148 {
149 const unsigned char *d = (const unsigned char *)c;
150
151 return ((d[0] << 24) |
152 (d[1] << 16) |
153 (d[2] << 8) |
154 (d[3]));
155 }
156
157 static inline void putbe32(uint32_t x, char *c)
158 {
159 c[0] = (x >> 24) & 0xff;
160 c[1] = (x >> 16) & 0xff;
161 c[2] = (x >> 8) & 0xff;
162 c[3] = (x) & 0xff;
163 }
164
154 165 #endif /* _HG_UTIL_H_ */
General Comments 0
You need to be logged in to leave comments. Login now