##// END OF EJS Templates
util.h: replace ntohl/htonl with get/putbe32
Matt Mackall -
r16437:d126a0d1 default
parent child Browse files
Show More
@@ -1,467 +1,465
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #include <Python.h>
12 #include <Python.h>
13 #include <stdlib.h>
13 #include <stdlib.h>
14 #include <string.h>
14 #include <string.h>
15 #include <limits.h>
15 #include <limits.h>
16
16
17 #include "util.h"
17 #include "util.h"
18
18
19 struct line {
19 struct line {
20 int hash, len, n, e;
20 int hash, len, n, e;
21 const char *l;
21 const char *l;
22 };
22 };
23
23
24 struct pos {
24 struct pos {
25 int pos, len;
25 int pos, len;
26 };
26 };
27
27
28 struct hunk;
28 struct hunk;
29 struct hunk {
29 struct hunk {
30 int a1, a2, b1, b2;
30 int a1, a2, b1, b2;
31 struct hunk *next;
31 struct hunk *next;
32 };
32 };
33
33
34 static int splitlines(const char *a, int len, struct line **lr)
34 static int splitlines(const char *a, int len, struct line **lr)
35 {
35 {
36 unsigned hash;
36 unsigned hash;
37 int i;
37 int i;
38 const char *p, *b = a;
38 const char *p, *b = a;
39 const char * const plast = a + len - 1;
39 const char * const plast = a + len - 1;
40 struct line *l;
40 struct line *l;
41
41
42 /* count the lines */
42 /* count the lines */
43 i = 1; /* extra line for sentinel */
43 i = 1; /* extra line for sentinel */
44 for (p = a; p < a + len; p++)
44 for (p = a; p < a + len; p++)
45 if (*p == '\n' || p == plast)
45 if (*p == '\n' || p == plast)
46 i++;
46 i++;
47
47
48 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
48 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
49 if (!l)
49 if (!l)
50 return -1;
50 return -1;
51
51
52 /* build the line array and calculate hashes */
52 /* build the line array and calculate hashes */
53 hash = 0;
53 hash = 0;
54 for (p = a; p < a + len; p++) {
54 for (p = a; p < a + len; p++) {
55 /* Leonid Yuriev's hash */
55 /* Leonid Yuriev's hash */
56 hash = (hash * 1664525) + (unsigned char)*p + 1013904223;
56 hash = (hash * 1664525) + (unsigned char)*p + 1013904223;
57
57
58 if (*p == '\n' || p == plast) {
58 if (*p == '\n' || p == plast) {
59 l->hash = hash;
59 l->hash = hash;
60 hash = 0;
60 hash = 0;
61 l->len = p - b + 1;
61 l->len = p - b + 1;
62 l->l = b;
62 l->l = b;
63 l->n = INT_MAX;
63 l->n = INT_MAX;
64 l++;
64 l++;
65 b = p + 1;
65 b = p + 1;
66 }
66 }
67 }
67 }
68
68
69 /* set up a sentinel */
69 /* set up a sentinel */
70 l->hash = 0;
70 l->hash = 0;
71 l->len = 0;
71 l->len = 0;
72 l->l = a + len;
72 l->l = a + len;
73 return i - 1;
73 return i - 1;
74 }
74 }
75
75
76 static inline int cmp(struct line *a, struct line *b)
76 static inline int cmp(struct line *a, struct line *b)
77 {
77 {
78 return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len);
78 return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len);
79 }
79 }
80
80
81 static int equatelines(struct line *a, int an, struct line *b, int bn)
81 static int equatelines(struct line *a, int an, struct line *b, int bn)
82 {
82 {
83 int i, j, buckets = 1, t, scale;
83 int i, j, buckets = 1, t, scale;
84 struct pos *h = NULL;
84 struct pos *h = NULL;
85
85
86 /* build a hash table of the next highest power of 2 */
86 /* build a hash table of the next highest power of 2 */
87 while (buckets < bn + 1)
87 while (buckets < bn + 1)
88 buckets *= 2;
88 buckets *= 2;
89
89
90 /* try to allocate a large hash table to avoid collisions */
90 /* try to allocate a large hash table to avoid collisions */
91 for (scale = 4; scale; scale /= 2) {
91 for (scale = 4; scale; scale /= 2) {
92 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
92 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
93 if (h)
93 if (h)
94 break;
94 break;
95 }
95 }
96
96
97 if (!h)
97 if (!h)
98 return 0;
98 return 0;
99
99
100 buckets = buckets * scale - 1;
100 buckets = buckets * scale - 1;
101
101
102 /* clear the hash table */
102 /* clear the hash table */
103 for (i = 0; i <= buckets; i++) {
103 for (i = 0; i <= buckets; i++) {
104 h[i].pos = INT_MAX;
104 h[i].pos = INT_MAX;
105 h[i].len = 0;
105 h[i].len = 0;
106 }
106 }
107
107
108 /* add lines to the hash table chains */
108 /* add lines to the hash table chains */
109 for (i = bn - 1; i >= 0; i--) {
109 for (i = bn - 1; i >= 0; i--) {
110 /* find the equivalence class */
110 /* find the equivalence class */
111 for (j = b[i].hash & buckets; h[j].pos != INT_MAX;
111 for (j = b[i].hash & buckets; h[j].pos != INT_MAX;
112 j = (j + 1) & buckets)
112 j = (j + 1) & buckets)
113 if (!cmp(b + i, b + h[j].pos))
113 if (!cmp(b + i, b + h[j].pos))
114 break;
114 break;
115
115
116 /* add to the head of the equivalence class */
116 /* add to the head of the equivalence class */
117 b[i].n = h[j].pos;
117 b[i].n = h[j].pos;
118 b[i].e = j;
118 b[i].e = j;
119 h[j].pos = i;
119 h[j].pos = i;
120 h[j].len++; /* keep track of popularity */
120 h[j].len++; /* keep track of popularity */
121 }
121 }
122
122
123 /* compute popularity threshold */
123 /* compute popularity threshold */
124 t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);
124 t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);
125
125
126 /* match items in a to their equivalence class in b */
126 /* match items in a to their equivalence class in b */
127 for (i = 0; i < an; i++) {
127 for (i = 0; i < an; i++) {
128 /* find the equivalence class */
128 /* find the equivalence class */
129 for (j = a[i].hash & buckets; h[j].pos != INT_MAX;
129 for (j = a[i].hash & buckets; h[j].pos != INT_MAX;
130 j = (j + 1) & buckets)
130 j = (j + 1) & buckets)
131 if (!cmp(a + i, b + h[j].pos))
131 if (!cmp(a + i, b + h[j].pos))
132 break;
132 break;
133
133
134 a[i].e = j; /* use equivalence class for quick compare */
134 a[i].e = j; /* use equivalence class for quick compare */
135 if (h[j].len <= t)
135 if (h[j].len <= t)
136 a[i].n = h[j].pos; /* point to head of match list */
136 a[i].n = h[j].pos; /* point to head of match list */
137 else
137 else
138 a[i].n = INT_MAX; /* too popular */
138 a[i].n = INT_MAX; /* too popular */
139 }
139 }
140
140
141 /* discard hash tables */
141 /* discard hash tables */
142 free(h);
142 free(h);
143 return 1;
143 return 1;
144 }
144 }
145
145
146 static int longest_match(struct line *a, struct line *b, struct pos *pos,
146 static int longest_match(struct line *a, struct line *b, struct pos *pos,
147 int a1, int a2, int b1, int b2, int *omi, int *omj)
147 int a1, int a2, int b1, int b2, int *omi, int *omj)
148 {
148 {
149 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
149 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
150
150
151 for (i = a1; i < a2; i++) {
151 for (i = a1; i < a2; i++) {
152 /* skip things before the current block */
152 /* skip things before the current block */
153 for (j = a[i].n; j < b1; j = b[j].n)
153 for (j = a[i].n; j < b1; j = b[j].n)
154 ;
154 ;
155
155
156 /* loop through all lines match a[i] in b */
156 /* loop through all lines match a[i] in b */
157 for (; j < b2; j = b[j].n) {
157 for (; j < b2; j = b[j].n) {
158 /* does this extend an earlier match? */
158 /* does this extend an earlier match? */
159 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
159 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
160 k = pos[j - 1].len + 1;
160 k = pos[j - 1].len + 1;
161 else
161 else
162 k = 1;
162 k = 1;
163 pos[j].pos = i;
163 pos[j].pos = i;
164 pos[j].len = k;
164 pos[j].len = k;
165
165
166 /* best match so far? */
166 /* best match so far? */
167 if (k > mk) {
167 if (k > mk) {
168 mi = i;
168 mi = i;
169 mj = j;
169 mj = j;
170 mk = k;
170 mk = k;
171 }
171 }
172 }
172 }
173 }
173 }
174
174
175 if (mk) {
175 if (mk) {
176 mi = mi - mk + 1;
176 mi = mi - mk + 1;
177 mj = mj - mk + 1;
177 mj = mj - mk + 1;
178 }
178 }
179
179
180 /* expand match to include neighboring popular lines */
180 /* expand match to include neighboring popular lines */
181 while (mi - mb > a1 && mj - mb > b1 &&
181 while (mi - mb > a1 && mj - mb > b1 &&
182 a[mi - mb - 1].e == b[mj - mb - 1].e)
182 a[mi - mb - 1].e == b[mj - mb - 1].e)
183 mb++;
183 mb++;
184 while (mi + mk < a2 && mj + mk < b2 &&
184 while (mi + mk < a2 && mj + mk < b2 &&
185 a[mi + mk].e == b[mj + mk].e)
185 a[mi + mk].e == b[mj + mk].e)
186 mk++;
186 mk++;
187
187
188 *omi = mi - mb;
188 *omi = mi - mb;
189 *omj = mj - mb;
189 *omj = mj - mb;
190
190
191 return mk + mb;
191 return mk + mb;
192 }
192 }
193
193
194 static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos,
194 static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos,
195 int a1, int a2, int b1, int b2, struct hunk *l)
195 int a1, int a2, int b1, int b2, struct hunk *l)
196 {
196 {
197 int i, j, k;
197 int i, j, k;
198
198
199 while (1) {
199 while (1) {
200 /* find the longest match in this chunk */
200 /* find the longest match in this chunk */
201 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
201 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
202 if (!k)
202 if (!k)
203 return l;
203 return l;
204
204
205 /* and recurse on the remaining chunks on either side */
205 /* and recurse on the remaining chunks on either side */
206 l = recurse(a, b, pos, a1, i, b1, j, l);
206 l = recurse(a, b, pos, a1, i, b1, j, l);
207 if (!l)
207 if (!l)
208 return NULL;
208 return NULL;
209
209
210 l->next = (struct hunk *)malloc(sizeof(struct hunk));
210 l->next = (struct hunk *)malloc(sizeof(struct hunk));
211 if (!l->next)
211 if (!l->next)
212 return NULL;
212 return NULL;
213
213
214 l = l->next;
214 l = l->next;
215 l->a1 = i;
215 l->a1 = i;
216 l->a2 = i + k;
216 l->a2 = i + k;
217 l->b1 = j;
217 l->b1 = j;
218 l->b2 = j + k;
218 l->b2 = j + k;
219 l->next = NULL;
219 l->next = NULL;
220
220
221 /* tail-recursion didn't happen, so do equivalent iteration */
221 /* tail-recursion didn't happen, so do equivalent iteration */
222 a1 = i + k;
222 a1 = i + k;
223 b1 = j + k;
223 b1 = j + k;
224 }
224 }
225 }
225 }
226
226
227 static int diff(struct line *a, int an, struct line *b, int bn,
227 static int diff(struct line *a, int an, struct line *b, int bn,
228 struct hunk *base)
228 struct hunk *base)
229 {
229 {
230 struct hunk *curr;
230 struct hunk *curr;
231 struct pos *pos;
231 struct pos *pos;
232 int t, count = 0;
232 int t, count = 0;
233
233
234 /* allocate and fill arrays */
234 /* allocate and fill arrays */
235 t = equatelines(a, an, b, bn);
235 t = equatelines(a, an, b, bn);
236 pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
236 pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
237
237
238 if (pos && t) {
238 if (pos && t) {
239 /* generate the matching block list */
239 /* generate the matching block list */
240
240
241 curr = recurse(a, b, pos, 0, an, 0, bn, base);
241 curr = recurse(a, b, pos, 0, an, 0, bn, base);
242 if (!curr)
242 if (!curr)
243 return -1;
243 return -1;
244
244
245 /* sentinel end hunk */
245 /* sentinel end hunk */
246 curr->next = (struct hunk *)malloc(sizeof(struct hunk));
246 curr->next = (struct hunk *)malloc(sizeof(struct hunk));
247 if (!curr->next)
247 if (!curr->next)
248 return -1;
248 return -1;
249 curr = curr->next;
249 curr = curr->next;
250 curr->a1 = curr->a2 = an;
250 curr->a1 = curr->a2 = an;
251 curr->b1 = curr->b2 = bn;
251 curr->b1 = curr->b2 = bn;
252 curr->next = NULL;
252 curr->next = NULL;
253 }
253 }
254
254
255 free(pos);
255 free(pos);
256
256
257 /* normalize the hunk list, try to push each hunk towards the end */
257 /* normalize the hunk list, try to push each hunk towards the end */
258 for (curr = base->next; curr; curr = curr->next) {
258 for (curr = base->next; curr; curr = curr->next) {
259 struct hunk *next = curr->next;
259 struct hunk *next = curr->next;
260 int shift = 0;
260 int shift = 0;
261
261
262 if (!next)
262 if (!next)
263 break;
263 break;
264
264
265 if (curr->a2 == next->a1)
265 if (curr->a2 == next->a1)
266 while (curr->a2 + shift < an && curr->b2 + shift < bn
266 while (curr->a2 + shift < an && curr->b2 + shift < bn
267 && !cmp(a + curr->a2 + shift,
267 && !cmp(a + curr->a2 + shift,
268 b + curr->b2 + shift))
268 b + curr->b2 + shift))
269 shift++;
269 shift++;
270 else if (curr->b2 == next->b1)
270 else if (curr->b2 == next->b1)
271 while (curr->b2 + shift < bn && curr->a2 + shift < an
271 while (curr->b2 + shift < bn && curr->a2 + shift < an
272 && !cmp(b + curr->b2 + shift,
272 && !cmp(b + curr->b2 + shift,
273 a + curr->a2 + shift))
273 a + curr->a2 + shift))
274 shift++;
274 shift++;
275 if (!shift)
275 if (!shift)
276 continue;
276 continue;
277 curr->b2 += shift;
277 curr->b2 += shift;
278 next->b1 += shift;
278 next->b1 += shift;
279 curr->a2 += shift;
279 curr->a2 += shift;
280 next->a1 += shift;
280 next->a1 += shift;
281 }
281 }
282
282
283 for (curr = base->next; curr; curr = curr->next)
283 for (curr = base->next; curr; curr = curr->next)
284 count++;
284 count++;
285 return count;
285 return count;
286 }
286 }
287
287
288 static void freehunks(struct hunk *l)
288 static void freehunks(struct hunk *l)
289 {
289 {
290 struct hunk *n;
290 struct hunk *n;
291 for (; l; l = n) {
291 for (; l; l = n) {
292 n = l->next;
292 n = l->next;
293 free(l);
293 free(l);
294 }
294 }
295 }
295 }
296
296
297 static PyObject *blocks(PyObject *self, PyObject *args)
297 static PyObject *blocks(PyObject *self, PyObject *args)
298 {
298 {
299 PyObject *sa, *sb, *rl = NULL, *m;
299 PyObject *sa, *sb, *rl = NULL, *m;
300 struct line *a, *b;
300 struct line *a, *b;
301 struct hunk l, *h;
301 struct hunk l, *h;
302 int an, bn, count, pos = 0;
302 int an, bn, count, pos = 0;
303
303
304 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
304 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
305 return NULL;
305 return NULL;
306
306
307 an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
307 an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
308 bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
308 bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
309
309
310 if (!a || !b)
310 if (!a || !b)
311 goto nomem;
311 goto nomem;
312
312
313 l.next = NULL;
313 l.next = NULL;
314 count = diff(a, an, b, bn, &l);
314 count = diff(a, an, b, bn, &l);
315 if (count < 0)
315 if (count < 0)
316 goto nomem;
316 goto nomem;
317
317
318 rl = PyList_New(count);
318 rl = PyList_New(count);
319 if (!rl)
319 if (!rl)
320 goto nomem;
320 goto nomem;
321
321
322 for (h = l.next; h; h = h->next) {
322 for (h = l.next; h; h = h->next) {
323 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
323 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
324 PyList_SetItem(rl, pos, m);
324 PyList_SetItem(rl, pos, m);
325 pos++;
325 pos++;
326 }
326 }
327
327
328 nomem:
328 nomem:
329 free(a);
329 free(a);
330 free(b);
330 free(b);
331 freehunks(l.next);
331 freehunks(l.next);
332 return rl ? rl : PyErr_NoMemory();
332 return rl ? rl : PyErr_NoMemory();
333 }
333 }
334
334
335 static PyObject *bdiff(PyObject *self, PyObject *args)
335 static PyObject *bdiff(PyObject *self, PyObject *args)
336 {
336 {
337 char *sa, *sb, *rb;
337 char *sa, *sb, *rb;
338 PyObject *result = NULL;
338 PyObject *result = NULL;
339 struct line *al, *bl;
339 struct line *al, *bl;
340 struct hunk l, *h;
340 struct hunk l, *h;
341 uint32_t encode[3];
342 int an, bn, len = 0, la, lb, count;
341 int an, bn, len = 0, la, lb, count;
343
342
344 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
343 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
345 return NULL;
344 return NULL;
346
345
347 an = splitlines(sa, la, &al);
346 an = splitlines(sa, la, &al);
348 bn = splitlines(sb, lb, &bl);
347 bn = splitlines(sb, lb, &bl);
349 if (!al || !bl)
348 if (!al || !bl)
350 goto nomem;
349 goto nomem;
351
350
352 l.next = NULL;
351 l.next = NULL;
353 count = diff(al, an, bl, bn, &l);
352 count = diff(al, an, bl, bn, &l);
354 if (count < 0)
353 if (count < 0)
355 goto nomem;
354 goto nomem;
356
355
357 /* calculate length of output */
356 /* calculate length of output */
358 la = lb = 0;
357 la = lb = 0;
359 for (h = l.next; h; h = h->next) {
358 for (h = l.next; h; h = h->next) {
360 if (h->a1 != la || h->b1 != lb)
359 if (h->a1 != la || h->b1 != lb)
361 len += 12 + bl[h->b1].l - bl[lb].l;
360 len += 12 + bl[h->b1].l - bl[lb].l;
362 la = h->a2;
361 la = h->a2;
363 lb = h->b2;
362 lb = h->b2;
364 }
363 }
365
364
366 result = PyBytes_FromStringAndSize(NULL, len);
365 result = PyBytes_FromStringAndSize(NULL, len);
367
366
368 if (!result)
367 if (!result)
369 goto nomem;
368 goto nomem;
370
369
371 /* build binary patch */
370 /* build binary patch */
372 rb = PyBytes_AsString(result);
371 rb = PyBytes_AsString(result);
373 la = lb = 0;
372 la = lb = 0;
374
373
375 for (h = l.next; h; h = h->next) {
374 for (h = l.next; h; h = h->next) {
376 if (h->a1 != la || h->b1 != lb) {
375 if (h->a1 != la || h->b1 != lb) {
377 len = bl[h->b1].l - bl[lb].l;
376 len = bl[h->b1].l - bl[lb].l;
378 encode[0] = htonl(al[la].l - al->l);
377 putbe32(al[la].l - al->l, rb);
379 encode[1] = htonl(al[h->a1].l - al->l);
378 putbe32(al[h->a1].l - al->l, rb + 4);
380 encode[2] = htonl(len);
379 putbe32(len, rb + 8);
381 memcpy(rb, encode, 12);
382 memcpy(rb + 12, bl[lb].l, len);
380 memcpy(rb + 12, bl[lb].l, len);
383 rb += 12 + len;
381 rb += 12 + len;
384 }
382 }
385 la = h->a2;
383 la = h->a2;
386 lb = h->b2;
384 lb = h->b2;
387 }
385 }
388
386
389 nomem:
387 nomem:
390 free(al);
388 free(al);
391 free(bl);
389 free(bl);
392 freehunks(l.next);
390 freehunks(l.next);
393 return result ? result : PyErr_NoMemory();
391 return result ? result : PyErr_NoMemory();
394 }
392 }
395
393
396 /*
394 /*
397 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
395 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
398 * reduce whitespace sequences to a single space and trim remaining whitespace
396 * reduce whitespace sequences to a single space and trim remaining whitespace
399 * from end of lines.
397 * from end of lines.
400 */
398 */
401 static PyObject *fixws(PyObject *self, PyObject *args)
399 static PyObject *fixws(PyObject *self, PyObject *args)
402 {
400 {
403 PyObject *s, *result = NULL;
401 PyObject *s, *result = NULL;
404 char allws, c;
402 char allws, c;
405 const char *r;
403 const char *r;
406 int i, rlen, wlen = 0;
404 int i, rlen, wlen = 0;
407 char *w;
405 char *w;
408
406
409 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
407 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
410 return NULL;
408 return NULL;
411 r = PyBytes_AsString(s);
409 r = PyBytes_AsString(s);
412 rlen = PyBytes_Size(s);
410 rlen = PyBytes_Size(s);
413
411
414 w = (char *)malloc(rlen ? rlen : 1);
412 w = (char *)malloc(rlen ? rlen : 1);
415 if (!w)
413 if (!w)
416 goto nomem;
414 goto nomem;
417
415
418 for (i = 0; i != rlen; i++) {
416 for (i = 0; i != rlen; i++) {
419 c = r[i];
417 c = r[i];
420 if (c == ' ' || c == '\t' || c == '\r') {
418 if (c == ' ' || c == '\t' || c == '\r') {
421 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
419 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
422 w[wlen++] = ' ';
420 w[wlen++] = ' ';
423 } else if (c == '\n' && !allws
421 } else if (c == '\n' && !allws
424 && wlen > 0 && w[wlen - 1] == ' ') {
422 && wlen > 0 && w[wlen - 1] == ' ') {
425 w[wlen - 1] = '\n';
423 w[wlen - 1] = '\n';
426 } else {
424 } else {
427 w[wlen++] = c;
425 w[wlen++] = c;
428 }
426 }
429 }
427 }
430
428
431 result = PyBytes_FromStringAndSize(w, wlen);
429 result = PyBytes_FromStringAndSize(w, wlen);
432
430
433 nomem:
431 nomem:
434 free(w);
432 free(w);
435 return result ? result : PyErr_NoMemory();
433 return result ? result : PyErr_NoMemory();
436 }
434 }
437
435
438
436
439 static char mdiff_doc[] = "Efficient binary diff.";
437 static char mdiff_doc[] = "Efficient binary diff.";
440
438
441 static PyMethodDef methods[] = {
439 static PyMethodDef methods[] = {
442 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
440 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
443 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
441 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
444 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
442 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
445 {NULL, NULL}
443 {NULL, NULL}
446 };
444 };
447
445
448 #ifdef IS_PY3K
446 #ifdef IS_PY3K
449 static struct PyModuleDef bdiff_module = {
447 static struct PyModuleDef bdiff_module = {
450 PyModuleDef_HEAD_INIT,
448 PyModuleDef_HEAD_INIT,
451 "bdiff",
449 "bdiff",
452 mdiff_doc,
450 mdiff_doc,
453 -1,
451 -1,
454 methods
452 methods
455 };
453 };
456
454
457 PyMODINIT_FUNC PyInit_bdiff(void)
455 PyMODINIT_FUNC PyInit_bdiff(void)
458 {
456 {
459 return PyModule_Create(&bdiff_module);
457 return PyModule_Create(&bdiff_module);
460 }
458 }
461 #else
459 #else
462 PyMODINIT_FUNC initbdiff(void)
460 PyMODINIT_FUNC initbdiff(void)
463 {
461 {
464 Py_InitModule3("bdiff", methods, mdiff_doc);
462 Py_InitModule3("bdiff", methods, mdiff_doc);
465 }
463 }
466 #endif
464 #endif
467
465
@@ -1,434 +1,430
1 /*
1 /*
2 mpatch.c - efficient binary patching for Mercurial
2 mpatch.c - efficient binary patching for Mercurial
3
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
5 size of the output and n is the number of patches.
6
6
7 Given a list of binary patches, it unpacks each into a hunk list,
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
9 single hunk list. This hunk list is then applied to the original
10 text.
10 text.
11
11
12 The text (or binary) fragments are copied directly from their source
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
15 the total number of hunks.
16
16
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18
18
19 This software may be used and distributed according to the terms
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
20 of the GNU General Public License, incorporated herein by reference.
21 */
21 */
22
22
23 #include <Python.h>
23 #include <Python.h>
24 #include <stdlib.h>
24 #include <stdlib.h>
25 #include <string.h>
25 #include <string.h>
26
26
27 #include "util.h"
27 #include "util.h"
28
28
29 static char mpatch_doc[] = "Efficient binary patching.";
29 static char mpatch_doc[] = "Efficient binary patching.";
30 static PyObject *mpatch_Error;
30 static PyObject *mpatch_Error;
31
31
32 struct frag {
32 struct frag {
33 int start, end, len;
33 int start, end, len;
34 const char *data;
34 const char *data;
35 };
35 };
36
36
37 struct flist {
37 struct flist {
38 struct frag *base, *head, *tail;
38 struct frag *base, *head, *tail;
39 };
39 };
40
40
41 static struct flist *lalloc(int size)
41 static struct flist *lalloc(int size)
42 {
42 {
43 struct flist *a = NULL;
43 struct flist *a = NULL;
44
44
45 if (size < 1)
45 if (size < 1)
46 size = 1;
46 size = 1;
47
47
48 a = (struct flist *)malloc(sizeof(struct flist));
48 a = (struct flist *)malloc(sizeof(struct flist));
49 if (a) {
49 if (a) {
50 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
50 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
51 if (a->base) {
51 if (a->base) {
52 a->head = a->tail = a->base;
52 a->head = a->tail = a->base;
53 return a;
53 return a;
54 }
54 }
55 free(a);
55 free(a);
56 a = NULL;
56 a = NULL;
57 }
57 }
58 if (!PyErr_Occurred())
58 if (!PyErr_Occurred())
59 PyErr_NoMemory();
59 PyErr_NoMemory();
60 return NULL;
60 return NULL;
61 }
61 }
62
62
63 static void lfree(struct flist *a)
63 static void lfree(struct flist *a)
64 {
64 {
65 if (a) {
65 if (a) {
66 free(a->base);
66 free(a->base);
67 free(a);
67 free(a);
68 }
68 }
69 }
69 }
70
70
71 static int lsize(struct flist *a)
71 static int lsize(struct flist *a)
72 {
72 {
73 return a->tail - a->head;
73 return a->tail - a->head;
74 }
74 }
75
75
76 /* move hunks in source that are less cut to dest, compensating
76 /* move hunks in source that are less cut to dest, compensating
77 for changes in offset. the last hunk may be split if necessary.
77 for changes in offset. the last hunk may be split if necessary.
78 */
78 */
79 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
79 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
80 {
80 {
81 struct frag *d = dest->tail, *s = src->head;
81 struct frag *d = dest->tail, *s = src->head;
82 int postend, c, l;
82 int postend, c, l;
83
83
84 while (s != src->tail) {
84 while (s != src->tail) {
85 if (s->start + offset >= cut)
85 if (s->start + offset >= cut)
86 break; /* we've gone far enough */
86 break; /* we've gone far enough */
87
87
88 postend = offset + s->start + s->len;
88 postend = offset + s->start + s->len;
89 if (postend <= cut) {
89 if (postend <= cut) {
90 /* save this hunk */
90 /* save this hunk */
91 offset += s->start + s->len - s->end;
91 offset += s->start + s->len - s->end;
92 *d++ = *s++;
92 *d++ = *s++;
93 }
93 }
94 else {
94 else {
95 /* break up this hunk */
95 /* break up this hunk */
96 c = cut - offset;
96 c = cut - offset;
97 if (s->end < c)
97 if (s->end < c)
98 c = s->end;
98 c = s->end;
99 l = cut - offset - s->start;
99 l = cut - offset - s->start;
100 if (s->len < l)
100 if (s->len < l)
101 l = s->len;
101 l = s->len;
102
102
103 offset += s->start + l - c;
103 offset += s->start + l - c;
104
104
105 d->start = s->start;
105 d->start = s->start;
106 d->end = c;
106 d->end = c;
107 d->len = l;
107 d->len = l;
108 d->data = s->data;
108 d->data = s->data;
109 d++;
109 d++;
110 s->start = c;
110 s->start = c;
111 s->len = s->len - l;
111 s->len = s->len - l;
112 s->data = s->data + l;
112 s->data = s->data + l;
113
113
114 break;
114 break;
115 }
115 }
116 }
116 }
117
117
118 dest->tail = d;
118 dest->tail = d;
119 src->head = s;
119 src->head = s;
120 return offset;
120 return offset;
121 }
121 }
122
122
123 /* like gather, but with no output list */
123 /* like gather, but with no output list */
124 static int discard(struct flist *src, int cut, int offset)
124 static int discard(struct flist *src, int cut, int offset)
125 {
125 {
126 struct frag *s = src->head;
126 struct frag *s = src->head;
127 int postend, c, l;
127 int postend, c, l;
128
128
129 while (s != src->tail) {
129 while (s != src->tail) {
130 if (s->start + offset >= cut)
130 if (s->start + offset >= cut)
131 break;
131 break;
132
132
133 postend = offset + s->start + s->len;
133 postend = offset + s->start + s->len;
134 if (postend <= cut) {
134 if (postend <= cut) {
135 offset += s->start + s->len - s->end;
135 offset += s->start + s->len - s->end;
136 s++;
136 s++;
137 }
137 }
138 else {
138 else {
139 c = cut - offset;
139 c = cut - offset;
140 if (s->end < c)
140 if (s->end < c)
141 c = s->end;
141 c = s->end;
142 l = cut - offset - s->start;
142 l = cut - offset - s->start;
143 if (s->len < l)
143 if (s->len < l)
144 l = s->len;
144 l = s->len;
145
145
146 offset += s->start + l - c;
146 offset += s->start + l - c;
147 s->start = c;
147 s->start = c;
148 s->len = s->len - l;
148 s->len = s->len - l;
149 s->data = s->data + l;
149 s->data = s->data + l;
150
150
151 break;
151 break;
152 }
152 }
153 }
153 }
154
154
155 src->head = s;
155 src->head = s;
156 return offset;
156 return offset;
157 }
157 }
158
158
159 /* combine hunk lists a and b, while adjusting b for offset changes in a/
159 /* combine hunk lists a and b, while adjusting b for offset changes in a/
160 this deletes a and b and returns the resultant list. */
160 this deletes a and b and returns the resultant list. */
161 static struct flist *combine(struct flist *a, struct flist *b)
161 static struct flist *combine(struct flist *a, struct flist *b)
162 {
162 {
163 struct flist *c = NULL;
163 struct flist *c = NULL;
164 struct frag *bh, *ct;
164 struct frag *bh, *ct;
165 int offset = 0, post;
165 int offset = 0, post;
166
166
167 if (a && b)
167 if (a && b)
168 c = lalloc((lsize(a) + lsize(b)) * 2);
168 c = lalloc((lsize(a) + lsize(b)) * 2);
169
169
170 if (c) {
170 if (c) {
171
171
172 for (bh = b->head; bh != b->tail; bh++) {
172 for (bh = b->head; bh != b->tail; bh++) {
173 /* save old hunks */
173 /* save old hunks */
174 offset = gather(c, a, bh->start, offset);
174 offset = gather(c, a, bh->start, offset);
175
175
176 /* discard replaced hunks */
176 /* discard replaced hunks */
177 post = discard(a, bh->end, offset);
177 post = discard(a, bh->end, offset);
178
178
179 /* insert new hunk */
179 /* insert new hunk */
180 ct = c->tail;
180 ct = c->tail;
181 ct->start = bh->start - offset;
181 ct->start = bh->start - offset;
182 ct->end = bh->end - post;
182 ct->end = bh->end - post;
183 ct->len = bh->len;
183 ct->len = bh->len;
184 ct->data = bh->data;
184 ct->data = bh->data;
185 c->tail++;
185 c->tail++;
186 offset = post;
186 offset = post;
187 }
187 }
188
188
189 /* hold on to tail from a */
189 /* hold on to tail from a */
190 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
190 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
191 c->tail += lsize(a);
191 c->tail += lsize(a);
192 }
192 }
193
193
194 lfree(a);
194 lfree(a);
195 lfree(b);
195 lfree(b);
196 return c;
196 return c;
197 }
197 }
198
198
199 /* decode a binary patch into a hunk list */
199 /* decode a binary patch into a hunk list */
200 static struct flist *decode(const char *bin, int len)
200 static struct flist *decode(const char *bin, int len)
201 {
201 {
202 struct flist *l;
202 struct flist *l;
203 struct frag *lt;
203 struct frag *lt;
204 const char *data = bin + 12, *end = bin + len;
204 const char *data = bin + 12, *end = bin + len;
205 uint32_t decode[3]; /* for dealing with alignment issues */
206
205
207 /* assume worst case size, we won't have many of these lists */
206 /* assume worst case size, we won't have many of these lists */
208 l = lalloc(len / 12);
207 l = lalloc(len / 12);
209 if (!l)
208 if (!l)
210 return NULL;
209 return NULL;
211
210
212 lt = l->tail;
211 lt = l->tail;
213
212
214 while (data <= end) {
213 while (data <= end) {
215 memcpy(decode, bin, 12);
214 lt->start = getbe32(bin);
216 lt->start = ntohl(decode[0]);
215 lt->end = getbe32(bin + 4);
217 lt->end = ntohl(decode[1]);
216 lt->len = getbe32(bin + 8);
218 lt->len = ntohl(decode[2]);
219 if (lt->start > lt->end)
217 if (lt->start > lt->end)
220 break; /* sanity check */
218 break; /* sanity check */
221 bin = data + lt->len;
219 bin = data + lt->len;
222 if (bin < data)
220 if (bin < data)
223 break; /* big data + big (bogus) len can wrap around */
221 break; /* big data + big (bogus) len can wrap around */
224 lt->data = data;
222 lt->data = data;
225 data = bin + 12;
223 data = bin + 12;
226 lt++;
224 lt++;
227 }
225 }
228
226
229 if (bin != end) {
227 if (bin != end) {
230 if (!PyErr_Occurred())
228 if (!PyErr_Occurred())
231 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
229 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
232 lfree(l);
230 lfree(l);
233 return NULL;
231 return NULL;
234 }
232 }
235
233
236 l->tail = lt;
234 l->tail = lt;
237 return l;
235 return l;
238 }
236 }
239
237
240 /* calculate the size of resultant text */
238 /* calculate the size of resultant text */
241 static int calcsize(int len, struct flist *l)
239 static int calcsize(int len, struct flist *l)
242 {
240 {
243 int outlen = 0, last = 0;
241 int outlen = 0, last = 0;
244 struct frag *f = l->head;
242 struct frag *f = l->head;
245
243
246 while (f != l->tail) {
244 while (f != l->tail) {
247 if (f->start < last || f->end > len) {
245 if (f->start < last || f->end > len) {
248 if (!PyErr_Occurred())
246 if (!PyErr_Occurred())
249 PyErr_SetString(mpatch_Error,
247 PyErr_SetString(mpatch_Error,
250 "invalid patch");
248 "invalid patch");
251 return -1;
249 return -1;
252 }
250 }
253 outlen += f->start - last;
251 outlen += f->start - last;
254 last = f->end;
252 last = f->end;
255 outlen += f->len;
253 outlen += f->len;
256 f++;
254 f++;
257 }
255 }
258
256
259 outlen += len - last;
257 outlen += len - last;
260 return outlen;
258 return outlen;
261 }
259 }
262
260
263 static int apply(char *buf, const char *orig, int len, struct flist *l)
261 static int apply(char *buf, const char *orig, int len, struct flist *l)
264 {
262 {
265 struct frag *f = l->head;
263 struct frag *f = l->head;
266 int last = 0;
264 int last = 0;
267 char *p = buf;
265 char *p = buf;
268
266
269 while (f != l->tail) {
267 while (f != l->tail) {
270 if (f->start < last || f->end > len) {
268 if (f->start < last || f->end > len) {
271 if (!PyErr_Occurred())
269 if (!PyErr_Occurred())
272 PyErr_SetString(mpatch_Error,
270 PyErr_SetString(mpatch_Error,
273 "invalid patch");
271 "invalid patch");
274 return 0;
272 return 0;
275 }
273 }
276 memcpy(p, orig + last, f->start - last);
274 memcpy(p, orig + last, f->start - last);
277 p += f->start - last;
275 p += f->start - last;
278 memcpy(p, f->data, f->len);
276 memcpy(p, f->data, f->len);
279 last = f->end;
277 last = f->end;
280 p += f->len;
278 p += f->len;
281 f++;
279 f++;
282 }
280 }
283 memcpy(p, orig + last, len - last);
281 memcpy(p, orig + last, len - last);
284 return 1;
282 return 1;
285 }
283 }
286
284
287 /* recursively generate a patch of all bins between start and end */
285 /* recursively generate a patch of all bins between start and end */
288 static struct flist *fold(PyObject *bins, int start, int end)
286 static struct flist *fold(PyObject *bins, int start, int end)
289 {
287 {
290 int len;
288 int len;
291 Py_ssize_t blen;
289 Py_ssize_t blen;
292 const char *buffer;
290 const char *buffer;
293
291
294 if (start + 1 == end) {
292 if (start + 1 == end) {
295 /* trivial case, output a decoded list */
293 /* trivial case, output a decoded list */
296 PyObject *tmp = PyList_GetItem(bins, start);
294 PyObject *tmp = PyList_GetItem(bins, start);
297 if (!tmp)
295 if (!tmp)
298 return NULL;
296 return NULL;
299 if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
297 if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
300 return NULL;
298 return NULL;
301 return decode(buffer, blen);
299 return decode(buffer, blen);
302 }
300 }
303
301
304 /* divide and conquer, memory management is elsewhere */
302 /* divide and conquer, memory management is elsewhere */
305 len = (end - start) / 2;
303 len = (end - start) / 2;
306 return combine(fold(bins, start, start + len),
304 return combine(fold(bins, start, start + len),
307 fold(bins, start + len, end));
305 fold(bins, start + len, end));
308 }
306 }
309
307
310 static PyObject *
308 static PyObject *
311 patches(PyObject *self, PyObject *args)
309 patches(PyObject *self, PyObject *args)
312 {
310 {
313 PyObject *text, *bins, *result;
311 PyObject *text, *bins, *result;
314 struct flist *patch;
312 struct flist *patch;
315 const char *in;
313 const char *in;
316 char *out;
314 char *out;
317 int len, outlen;
315 int len, outlen;
318 Py_ssize_t inlen;
316 Py_ssize_t inlen;
319
317
320 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
318 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
321 return NULL;
319 return NULL;
322
320
323 len = PyList_Size(bins);
321 len = PyList_Size(bins);
324 if (!len) {
322 if (!len) {
325 /* nothing to do */
323 /* nothing to do */
326 Py_INCREF(text);
324 Py_INCREF(text);
327 return text;
325 return text;
328 }
326 }
329
327
330 if (PyObject_AsCharBuffer(text, &in, &inlen))
328 if (PyObject_AsCharBuffer(text, &in, &inlen))
331 return NULL;
329 return NULL;
332
330
333 patch = fold(bins, 0, len);
331 patch = fold(bins, 0, len);
334 if (!patch)
332 if (!patch)
335 return NULL;
333 return NULL;
336
334
337 outlen = calcsize(inlen, patch);
335 outlen = calcsize(inlen, patch);
338 if (outlen < 0) {
336 if (outlen < 0) {
339 result = NULL;
337 result = NULL;
340 goto cleanup;
338 goto cleanup;
341 }
339 }
342 result = PyBytes_FromStringAndSize(NULL, outlen);
340 result = PyBytes_FromStringAndSize(NULL, outlen);
343 if (!result) {
341 if (!result) {
344 result = NULL;
342 result = NULL;
345 goto cleanup;
343 goto cleanup;
346 }
344 }
347 out = PyBytes_AsString(result);
345 out = PyBytes_AsString(result);
348 if (!apply(out, in, inlen, patch)) {
346 if (!apply(out, in, inlen, patch)) {
349 Py_DECREF(result);
347 Py_DECREF(result);
350 result = NULL;
348 result = NULL;
351 }
349 }
352 cleanup:
350 cleanup:
353 lfree(patch);
351 lfree(patch);
354 return result;
352 return result;
355 }
353 }
356
354
357 /* calculate size of a patched file directly */
355 /* calculate size of a patched file directly */
358 static PyObject *
356 static PyObject *
359 patchedsize(PyObject *self, PyObject *args)
357 patchedsize(PyObject *self, PyObject *args)
360 {
358 {
361 long orig, start, end, len, outlen = 0, last = 0;
359 long orig, start, end, len, outlen = 0, last = 0;
362 int patchlen;
360 int patchlen;
363 char *bin, *binend, *data;
361 char *bin, *binend, *data;
364 uint32_t decode[3]; /* for dealing with alignment issues */
365
362
366 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
363 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
367 return NULL;
364 return NULL;
368
365
369 binend = bin + patchlen;
366 binend = bin + patchlen;
370 data = bin + 12;
367 data = bin + 12;
371
368
372 while (data <= binend) {
369 while (data <= binend) {
373 memcpy(decode, bin, 12);
370 start = getbe32(bin);
374 start = ntohl(decode[0]);
371 end = getbe32(bin + 4);
375 end = ntohl(decode[1]);
372 len = getbe32(bin + 8);
376 len = ntohl(decode[2]);
377 if (start > end)
373 if (start > end)
378 break; /* sanity check */
374 break; /* sanity check */
379 bin = data + len;
375 bin = data + len;
380 if (bin < data)
376 if (bin < data)
381 break; /* big data + big (bogus) len can wrap around */
377 break; /* big data + big (bogus) len can wrap around */
382 data = bin + 12;
378 data = bin + 12;
383 outlen += start - last;
379 outlen += start - last;
384 last = end;
380 last = end;
385 outlen += len;
381 outlen += len;
386 }
382 }
387
383
388 if (bin != binend) {
384 if (bin != binend) {
389 if (!PyErr_Occurred())
385 if (!PyErr_Occurred())
390 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
386 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
391 return NULL;
387 return NULL;
392 }
388 }
393
389
394 outlen += orig - last;
390 outlen += orig - last;
395 return Py_BuildValue("l", outlen);
391 return Py_BuildValue("l", outlen);
396 }
392 }
397
393
398 static PyMethodDef methods[] = {
394 static PyMethodDef methods[] = {
399 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
395 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
400 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
396 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
401 {NULL, NULL}
397 {NULL, NULL}
402 };
398 };
403
399
404 #ifdef IS_PY3K
400 #ifdef IS_PY3K
405 static struct PyModuleDef mpatch_module = {
401 static struct PyModuleDef mpatch_module = {
406 PyModuleDef_HEAD_INIT,
402 PyModuleDef_HEAD_INIT,
407 "mpatch",
403 "mpatch",
408 mpatch_doc,
404 mpatch_doc,
409 -1,
405 -1,
410 methods
406 methods
411 };
407 };
412
408
413 PyMODINIT_FUNC PyInit_mpatch(void)
409 PyMODINIT_FUNC PyInit_mpatch(void)
414 {
410 {
415 PyObject *m;
411 PyObject *m;
416
412
417 m = PyModule_Create(&mpatch_module);
413 m = PyModule_Create(&mpatch_module);
418 if (m == NULL)
414 if (m == NULL)
419 return NULL;
415 return NULL;
420
416
421 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
417 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
422 Py_INCREF(mpatch_Error);
418 Py_INCREF(mpatch_Error);
423 PyModule_AddObject(m, "mpatchError", mpatch_Error);
419 PyModule_AddObject(m, "mpatchError", mpatch_Error);
424
420
425 return m;
421 return m;
426 }
422 }
427 #else
423 #else
428 PyMODINIT_FUNC
424 PyMODINIT_FUNC
429 initmpatch(void)
425 initmpatch(void)
430 {
426 {
431 Py_InitModule3("mpatch", methods, mpatch_doc);
427 Py_InitModule3("mpatch", methods, mpatch_doc);
432 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
428 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
433 }
429 }
434 #endif
430 #endif
@@ -1,1200 +1,1195
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <ctype.h>
11 #include <ctype.h>
12 #include <string.h>
12 #include <string.h>
13
13
14 #include "util.h"
14 #include "util.h"
15
15
16 static int hexdigit(char c)
16 static int hexdigit(char c)
17 {
17 {
18 if (c >= '0' && c <= '9')
18 if (c >= '0' && c <= '9')
19 return c - '0';
19 return c - '0';
20 if (c >= 'a' && c <= 'f')
20 if (c >= 'a' && c <= 'f')
21 return c - 'a' + 10;
21 return c - 'a' + 10;
22 if (c >= 'A' && c <= 'F')
22 if (c >= 'A' && c <= 'F')
23 return c - 'A' + 10;
23 return c - 'A' + 10;
24
24
25 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
25 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
26 return 0;
26 return 0;
27 }
27 }
28
28
29 /*
29 /*
30 * Turn a hex-encoded string into binary.
30 * Turn a hex-encoded string into binary.
31 */
31 */
32 static PyObject *unhexlify(const char *str, int len)
32 static PyObject *unhexlify(const char *str, int len)
33 {
33 {
34 PyObject *ret;
34 PyObject *ret;
35 const char *c;
35 const char *c;
36 char *d;
36 char *d;
37
37
38 ret = PyBytes_FromStringAndSize(NULL, len / 2);
38 ret = PyBytes_FromStringAndSize(NULL, len / 2);
39
39
40 if (!ret)
40 if (!ret)
41 return NULL;
41 return NULL;
42
42
43 d = PyBytes_AsString(ret);
43 d = PyBytes_AsString(ret);
44
44
45 for (c = str; c < str + len;) {
45 for (c = str; c < str + len;) {
46 int hi = hexdigit(*c++);
46 int hi = hexdigit(*c++);
47 int lo = hexdigit(*c++);
47 int lo = hexdigit(*c++);
48 *d++ = (hi << 4) | lo;
48 *d++ = (hi << 4) | lo;
49 }
49 }
50
50
51 return ret;
51 return ret;
52 }
52 }
53
53
54 /*
54 /*
55 * This code assumes that a manifest is stitched together with newline
55 * This code assumes that a manifest is stitched together with newline
56 * ('\n') characters.
56 * ('\n') characters.
57 */
57 */
58 static PyObject *parse_manifest(PyObject *self, PyObject *args)
58 static PyObject *parse_manifest(PyObject *self, PyObject *args)
59 {
59 {
60 PyObject *mfdict, *fdict;
60 PyObject *mfdict, *fdict;
61 char *str, *cur, *start, *zero;
61 char *str, *cur, *start, *zero;
62 int len;
62 int len;
63
63
64 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
64 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
65 &PyDict_Type, &mfdict,
65 &PyDict_Type, &mfdict,
66 &PyDict_Type, &fdict,
66 &PyDict_Type, &fdict,
67 &str, &len))
67 &str, &len))
68 goto quit;
68 goto quit;
69
69
70 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
70 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
71 PyObject *file = NULL, *node = NULL;
71 PyObject *file = NULL, *node = NULL;
72 PyObject *flags = NULL;
72 PyObject *flags = NULL;
73 int nlen;
73 int nlen;
74
74
75 if (!*cur) {
75 if (!*cur) {
76 zero = cur;
76 zero = cur;
77 continue;
77 continue;
78 }
78 }
79 else if (*cur != '\n')
79 else if (*cur != '\n')
80 continue;
80 continue;
81
81
82 if (!zero) {
82 if (!zero) {
83 PyErr_SetString(PyExc_ValueError,
83 PyErr_SetString(PyExc_ValueError,
84 "manifest entry has no separator");
84 "manifest entry has no separator");
85 goto quit;
85 goto quit;
86 }
86 }
87
87
88 file = PyBytes_FromStringAndSize(start, zero - start);
88 file = PyBytes_FromStringAndSize(start, zero - start);
89
89
90 if (!file)
90 if (!file)
91 goto bail;
91 goto bail;
92
92
93 nlen = cur - zero - 1;
93 nlen = cur - zero - 1;
94
94
95 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
95 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
96 if (!node)
96 if (!node)
97 goto bail;
97 goto bail;
98
98
99 if (nlen > 40) {
99 if (nlen > 40) {
100 flags = PyBytes_FromStringAndSize(zero + 41,
100 flags = PyBytes_FromStringAndSize(zero + 41,
101 nlen - 40);
101 nlen - 40);
102 if (!flags)
102 if (!flags)
103 goto bail;
103 goto bail;
104
104
105 if (PyDict_SetItem(fdict, file, flags) == -1)
105 if (PyDict_SetItem(fdict, file, flags) == -1)
106 goto bail;
106 goto bail;
107 }
107 }
108
108
109 if (PyDict_SetItem(mfdict, file, node) == -1)
109 if (PyDict_SetItem(mfdict, file, node) == -1)
110 goto bail;
110 goto bail;
111
111
112 start = cur + 1;
112 start = cur + 1;
113 zero = NULL;
113 zero = NULL;
114
114
115 Py_XDECREF(flags);
115 Py_XDECREF(flags);
116 Py_XDECREF(node);
116 Py_XDECREF(node);
117 Py_XDECREF(file);
117 Py_XDECREF(file);
118 continue;
118 continue;
119 bail:
119 bail:
120 Py_XDECREF(flags);
120 Py_XDECREF(flags);
121 Py_XDECREF(node);
121 Py_XDECREF(node);
122 Py_XDECREF(file);
122 Py_XDECREF(file);
123 goto quit;
123 goto quit;
124 }
124 }
125
125
126 if (len > 0 && *(cur - 1) != '\n') {
126 if (len > 0 && *(cur - 1) != '\n') {
127 PyErr_SetString(PyExc_ValueError,
127 PyErr_SetString(PyExc_ValueError,
128 "manifest contains trailing garbage");
128 "manifest contains trailing garbage");
129 goto quit;
129 goto quit;
130 }
130 }
131
131
132 Py_INCREF(Py_None);
132 Py_INCREF(Py_None);
133 return Py_None;
133 return Py_None;
134 quit:
134 quit:
135 return NULL;
135 return NULL;
136 }
136 }
137
137
138 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
138 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
139 {
139 {
140 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
140 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
141 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
141 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
142 char *str, *cur, *end, *cpos;
142 char *str, *cur, *end, *cpos;
143 int state, mode, size, mtime;
143 int state, mode, size, mtime;
144 unsigned int flen;
144 unsigned int flen;
145 int len;
145 int len;
146 uint32_t decode[4]; /* for alignment */
147
146
148 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
147 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
149 &PyDict_Type, &dmap,
148 &PyDict_Type, &dmap,
150 &PyDict_Type, &cmap,
149 &PyDict_Type, &cmap,
151 &str, &len))
150 &str, &len))
152 goto quit;
151 goto quit;
153
152
154 /* read parents */
153 /* read parents */
155 if (len < 40)
154 if (len < 40)
156 goto quit;
155 goto quit;
157
156
158 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
157 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
159 if (!parents)
158 if (!parents)
160 goto quit;
159 goto quit;
161
160
162 /* read filenames */
161 /* read filenames */
163 cur = str + 40;
162 cur = str + 40;
164 end = str + len;
163 end = str + len;
165
164
166 while (cur < end - 17) {
165 while (cur < end - 17) {
167 /* unpack header */
166 /* unpack header */
168 state = *cur;
167 state = *cur;
169 memcpy(decode, cur + 1, 16);
168 mode = getbe32(cur + 1);
170 mode = ntohl(decode[0]);
169 size = getbe32(cur + 5);
171 size = ntohl(decode[1]);
170 mtime = getbe32(cur + 9);
172 mtime = ntohl(decode[2]);
171 flen = getbe32(cur + 13);
173 flen = ntohl(decode[3]);
174 cur += 17;
172 cur += 17;
175 if (cur + flen > end || cur + flen < cur) {
173 if (cur + flen > end || cur + flen < cur) {
176 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
174 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
177 goto quit;
175 goto quit;
178 }
176 }
179
177
180 entry = Py_BuildValue("ciii", state, mode, size, mtime);
178 entry = Py_BuildValue("ciii", state, mode, size, mtime);
181 if (!entry)
179 if (!entry)
182 goto quit;
180 goto quit;
183 PyObject_GC_UnTrack(entry); /* don't waste time with this */
181 PyObject_GC_UnTrack(entry); /* don't waste time with this */
184
182
185 cpos = memchr(cur, 0, flen);
183 cpos = memchr(cur, 0, flen);
186 if (cpos) {
184 if (cpos) {
187 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
185 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
188 cname = PyBytes_FromStringAndSize(cpos + 1,
186 cname = PyBytes_FromStringAndSize(cpos + 1,
189 flen - (cpos - cur) - 1);
187 flen - (cpos - cur) - 1);
190 if (!fname || !cname ||
188 if (!fname || !cname ||
191 PyDict_SetItem(cmap, fname, cname) == -1 ||
189 PyDict_SetItem(cmap, fname, cname) == -1 ||
192 PyDict_SetItem(dmap, fname, entry) == -1)
190 PyDict_SetItem(dmap, fname, entry) == -1)
193 goto quit;
191 goto quit;
194 Py_DECREF(cname);
192 Py_DECREF(cname);
195 } else {
193 } else {
196 fname = PyBytes_FromStringAndSize(cur, flen);
194 fname = PyBytes_FromStringAndSize(cur, flen);
197 if (!fname ||
195 if (!fname ||
198 PyDict_SetItem(dmap, fname, entry) == -1)
196 PyDict_SetItem(dmap, fname, entry) == -1)
199 goto quit;
197 goto quit;
200 }
198 }
201 cur += flen;
199 cur += flen;
202 Py_DECREF(fname);
200 Py_DECREF(fname);
203 Py_DECREF(entry);
201 Py_DECREF(entry);
204 fname = cname = entry = NULL;
202 fname = cname = entry = NULL;
205 }
203 }
206
204
207 ret = parents;
205 ret = parents;
208 Py_INCREF(ret);
206 Py_INCREF(ret);
209 quit:
207 quit:
210 Py_XDECREF(fname);
208 Py_XDECREF(fname);
211 Py_XDECREF(cname);
209 Py_XDECREF(cname);
212 Py_XDECREF(entry);
210 Py_XDECREF(entry);
213 Py_XDECREF(parents);
211 Py_XDECREF(parents);
214 return ret;
212 return ret;
215 }
213 }
216
214
217 /*
215 /*
218 * A base-16 trie for fast node->rev mapping.
216 * A base-16 trie for fast node->rev mapping.
219 *
217 *
220 * Positive value is index of the next node in the trie
218 * Positive value is index of the next node in the trie
221 * Negative value is a leaf: -(rev + 1)
219 * Negative value is a leaf: -(rev + 1)
222 * Zero is empty
220 * Zero is empty
223 */
221 */
224 typedef struct {
222 typedef struct {
225 int children[16];
223 int children[16];
226 } nodetree;
224 } nodetree;
227
225
228 /*
226 /*
229 * This class has two behaviours.
227 * This class has two behaviours.
230 *
228 *
231 * When used in a list-like way (with integer keys), we decode an
229 * When used in a list-like way (with integer keys), we decode an
232 * entry in a RevlogNG index file on demand. Our last entry is a
230 * entry in a RevlogNG index file on demand. Our last entry is a
233 * sentinel, always a nullid. We have limited support for
231 * sentinel, always a nullid. We have limited support for
234 * integer-keyed insert and delete, only at elements right before the
232 * integer-keyed insert and delete, only at elements right before the
235 * sentinel.
233 * sentinel.
236 *
234 *
237 * With string keys, we lazily perform a reverse mapping from node to
235 * With string keys, we lazily perform a reverse mapping from node to
238 * rev, using a base-16 trie.
236 * rev, using a base-16 trie.
239 */
237 */
240 typedef struct {
238 typedef struct {
241 PyObject_HEAD
239 PyObject_HEAD
242 /* Type-specific fields go here. */
240 /* Type-specific fields go here. */
243 PyObject *data; /* raw bytes of index */
241 PyObject *data; /* raw bytes of index */
244 PyObject **cache; /* cached tuples */
242 PyObject **cache; /* cached tuples */
245 const char **offsets; /* populated on demand */
243 const char **offsets; /* populated on demand */
246 Py_ssize_t raw_length; /* original number of elements */
244 Py_ssize_t raw_length; /* original number of elements */
247 Py_ssize_t length; /* current number of elements */
245 Py_ssize_t length; /* current number of elements */
248 PyObject *added; /* populated on demand */
246 PyObject *added; /* populated on demand */
249 nodetree *nt; /* base-16 trie */
247 nodetree *nt; /* base-16 trie */
250 int ntlength; /* # nodes in use */
248 int ntlength; /* # nodes in use */
251 int ntcapacity; /* # nodes allocated */
249 int ntcapacity; /* # nodes allocated */
252 int ntdepth; /* maximum depth of tree */
250 int ntdepth; /* maximum depth of tree */
253 int ntsplits; /* # splits performed */
251 int ntsplits; /* # splits performed */
254 int ntrev; /* last rev scanned */
252 int ntrev; /* last rev scanned */
255 int ntlookups; /* # lookups */
253 int ntlookups; /* # lookups */
256 int ntmisses; /* # lookups that miss the cache */
254 int ntmisses; /* # lookups that miss the cache */
257 int inlined;
255 int inlined;
258 } indexObject;
256 } indexObject;
259
257
260 static Py_ssize_t index_length(const indexObject *self)
258 static Py_ssize_t index_length(const indexObject *self)
261 {
259 {
262 if (self->added == NULL)
260 if (self->added == NULL)
263 return self->length;
261 return self->length;
264 return self->length + PyList_GET_SIZE(self->added);
262 return self->length + PyList_GET_SIZE(self->added);
265 }
263 }
266
264
267 static PyObject *nullentry;
265 static PyObject *nullentry;
268 static const char nullid[20];
266 static const char nullid[20];
269
267
270 static long inline_scan(indexObject *self, const char **offsets);
268 static long inline_scan(indexObject *self, const char **offsets);
271
269
272 #if LONG_MAX == 0x7fffffffL
270 #if LONG_MAX == 0x7fffffffL
273 static char *tuple_format = "Kiiiiiis#";
271 static char *tuple_format = "Kiiiiiis#";
274 #else
272 #else
275 static char *tuple_format = "kiiiiiis#";
273 static char *tuple_format = "kiiiiiis#";
276 #endif
274 #endif
277
275
278 /*
276 /*
279 * Return a pointer to the beginning of a RevlogNG record.
277 * Return a pointer to the beginning of a RevlogNG record.
280 */
278 */
281 static const char *index_deref(indexObject *self, Py_ssize_t pos)
279 static const char *index_deref(indexObject *self, Py_ssize_t pos)
282 {
280 {
283 if (self->inlined && pos > 0) {
281 if (self->inlined && pos > 0) {
284 if (self->offsets == NULL) {
282 if (self->offsets == NULL) {
285 self->offsets = malloc(self->raw_length *
283 self->offsets = malloc(self->raw_length *
286 sizeof(*self->offsets));
284 sizeof(*self->offsets));
287 if (self->offsets == NULL)
285 if (self->offsets == NULL)
288 return (const char *)PyErr_NoMemory();
286 return (const char *)PyErr_NoMemory();
289 inline_scan(self, self->offsets);
287 inline_scan(self, self->offsets);
290 }
288 }
291 return self->offsets[pos];
289 return self->offsets[pos];
292 }
290 }
293
291
294 return PyString_AS_STRING(self->data) + pos * 64;
292 return PyString_AS_STRING(self->data) + pos * 64;
295 }
293 }
296
294
297 /*
295 /*
298 * RevlogNG format (all in big endian, data may be inlined):
296 * RevlogNG format (all in big endian, data may be inlined):
299 * 6 bytes: offset
297 * 6 bytes: offset
300 * 2 bytes: flags
298 * 2 bytes: flags
301 * 4 bytes: compressed length
299 * 4 bytes: compressed length
302 * 4 bytes: uncompressed length
300 * 4 bytes: uncompressed length
303 * 4 bytes: base revision
301 * 4 bytes: base revision
304 * 4 bytes: link revision
302 * 4 bytes: link revision
305 * 4 bytes: parent 1 revision
303 * 4 bytes: parent 1 revision
306 * 4 bytes: parent 2 revision
304 * 4 bytes: parent 2 revision
307 * 32 bytes: nodeid (only 20 bytes used)
305 * 32 bytes: nodeid (only 20 bytes used)
308 */
306 */
309 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
307 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
310 {
308 {
311 uint32_t decode[8]; /* to enforce alignment with inline data */
312 uint64_t offset_flags;
309 uint64_t offset_flags;
313 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
310 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
314 const char *c_node_id;
311 const char *c_node_id;
315 const char *data;
312 const char *data;
316 Py_ssize_t length = index_length(self);
313 Py_ssize_t length = index_length(self);
317 PyObject *entry;
314 PyObject *entry;
318
315
319 if (pos < 0)
316 if (pos < 0)
320 pos += length;
317 pos += length;
321
318
322 if (pos < 0 || pos >= length) {
319 if (pos < 0 || pos >= length) {
323 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
320 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
324 return NULL;
321 return NULL;
325 }
322 }
326
323
327 if (pos == length - 1) {
324 if (pos == length - 1) {
328 Py_INCREF(nullentry);
325 Py_INCREF(nullentry);
329 return nullentry;
326 return nullentry;
330 }
327 }
331
328
332 if (pos >= self->length - 1) {
329 if (pos >= self->length - 1) {
333 PyObject *obj;
330 PyObject *obj;
334 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
331 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
335 Py_INCREF(obj);
332 Py_INCREF(obj);
336 return obj;
333 return obj;
337 }
334 }
338
335
339 if (self->cache) {
336 if (self->cache) {
340 if (self->cache[pos]) {
337 if (self->cache[pos]) {
341 Py_INCREF(self->cache[pos]);
338 Py_INCREF(self->cache[pos]);
342 return self->cache[pos];
339 return self->cache[pos];
343 }
340 }
344 } else {
341 } else {
345 self->cache = calloc(self->raw_length, sizeof(PyObject *));
342 self->cache = calloc(self->raw_length, sizeof(PyObject *));
346 if (self->cache == NULL)
343 if (self->cache == NULL)
347 return PyErr_NoMemory();
344 return PyErr_NoMemory();
348 }
345 }
349
346
350 data = index_deref(self, pos);
347 data = index_deref(self, pos);
351 if (data == NULL)
348 if (data == NULL)
352 return NULL;
349 return NULL;
353
350
354 memcpy(decode, data, 8 * sizeof(uint32_t));
351 offset_flags = getbe32(data + 4);
355
356 offset_flags = ntohl(decode[1]);
357 if (pos == 0) /* mask out version number for the first entry */
352 if (pos == 0) /* mask out version number for the first entry */
358 offset_flags &= 0xFFFF;
353 offset_flags &= 0xFFFF;
359 else {
354 else {
360 uint32_t offset_high = ntohl(decode[0]);
355 uint32_t offset_high = getbe32(data);
361 offset_flags |= ((uint64_t)offset_high) << 32;
356 offset_flags |= ((uint64_t)offset_high) << 32;
362 }
357 }
363
358
364 comp_len = ntohl(decode[2]);
359 comp_len = getbe32(data + 8);
365 uncomp_len = ntohl(decode[3]);
360 uncomp_len = getbe32(data + 12);
366 base_rev = ntohl(decode[4]);
361 base_rev = getbe32(data + 16);
367 link_rev = ntohl(decode[5]);
362 link_rev = getbe32(data + 20);
368 parent_1 = ntohl(decode[6]);
363 parent_1 = getbe32(data + 24);
369 parent_2 = ntohl(decode[7]);
364 parent_2 = getbe32(data + 28);
370 c_node_id = data + 32;
365 c_node_id = data + 32;
371
366
372 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
367 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
373 uncomp_len, base_rev, link_rev,
368 uncomp_len, base_rev, link_rev,
374 parent_1, parent_2, c_node_id, 20);
369 parent_1, parent_2, c_node_id, 20);
375
370
376 if (entry)
371 if (entry)
377 PyObject_GC_UnTrack(entry);
372 PyObject_GC_UnTrack(entry);
378
373
379 self->cache[pos] = entry;
374 self->cache[pos] = entry;
380 Py_INCREF(entry);
375 Py_INCREF(entry);
381
376
382 return entry;
377 return entry;
383 }
378 }
384
379
385 /*
380 /*
386 * Return the 20-byte SHA of the node corresponding to the given rev.
381 * Return the 20-byte SHA of the node corresponding to the given rev.
387 */
382 */
388 static const char *index_node(indexObject *self, Py_ssize_t pos)
383 static const char *index_node(indexObject *self, Py_ssize_t pos)
389 {
384 {
390 Py_ssize_t length = index_length(self);
385 Py_ssize_t length = index_length(self);
391 const char *data;
386 const char *data;
392
387
393 if (pos == length - 1)
388 if (pos == length - 1)
394 return nullid;
389 return nullid;
395
390
396 if (pos >= length)
391 if (pos >= length)
397 return NULL;
392 return NULL;
398
393
399 if (pos >= self->length - 1) {
394 if (pos >= self->length - 1) {
400 PyObject *tuple, *str;
395 PyObject *tuple, *str;
401 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
396 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
402 str = PyTuple_GetItem(tuple, 7);
397 str = PyTuple_GetItem(tuple, 7);
403 return str ? PyString_AS_STRING(str) : NULL;
398 return str ? PyString_AS_STRING(str) : NULL;
404 }
399 }
405
400
406 data = index_deref(self, pos);
401 data = index_deref(self, pos);
407 return data ? data + 32 : NULL;
402 return data ? data + 32 : NULL;
408 }
403 }
409
404
410 static int nt_insert(indexObject *self, const char *node, int rev);
405 static int nt_insert(indexObject *self, const char *node, int rev);
411
406
412 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
407 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
413 {
408 {
414 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
409 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
415 return -1;
410 return -1;
416 if (*nodelen == 20)
411 if (*nodelen == 20)
417 return 0;
412 return 0;
418 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
413 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
419 return -1;
414 return -1;
420 }
415 }
421
416
422 static PyObject *index_insert(indexObject *self, PyObject *args)
417 static PyObject *index_insert(indexObject *self, PyObject *args)
423 {
418 {
424 PyObject *obj;
419 PyObject *obj;
425 char *node;
420 char *node;
426 long offset;
421 long offset;
427 Py_ssize_t len, nodelen;
422 Py_ssize_t len, nodelen;
428
423
429 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
424 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
430 return NULL;
425 return NULL;
431
426
432 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
427 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
433 PyErr_SetString(PyExc_TypeError, "8-tuple required");
428 PyErr_SetString(PyExc_TypeError, "8-tuple required");
434 return NULL;
429 return NULL;
435 }
430 }
436
431
437 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
432 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
438 return NULL;
433 return NULL;
439
434
440 len = index_length(self);
435 len = index_length(self);
441
436
442 if (offset < 0)
437 if (offset < 0)
443 offset += len;
438 offset += len;
444
439
445 if (offset != len - 1) {
440 if (offset != len - 1) {
446 PyErr_SetString(PyExc_IndexError,
441 PyErr_SetString(PyExc_IndexError,
447 "insert only supported at index -1");
442 "insert only supported at index -1");
448 return NULL;
443 return NULL;
449 }
444 }
450
445
451 if (offset > INT_MAX) {
446 if (offset > INT_MAX) {
452 PyErr_SetString(PyExc_ValueError,
447 PyErr_SetString(PyExc_ValueError,
453 "currently only 2**31 revs supported");
448 "currently only 2**31 revs supported");
454 return NULL;
449 return NULL;
455 }
450 }
456
451
457 if (self->added == NULL) {
452 if (self->added == NULL) {
458 self->added = PyList_New(0);
453 self->added = PyList_New(0);
459 if (self->added == NULL)
454 if (self->added == NULL)
460 return NULL;
455 return NULL;
461 }
456 }
462
457
463 if (PyList_Append(self->added, obj) == -1)
458 if (PyList_Append(self->added, obj) == -1)
464 return NULL;
459 return NULL;
465
460
466 if (self->nt)
461 if (self->nt)
467 nt_insert(self, node, (int)offset);
462 nt_insert(self, node, (int)offset);
468
463
469 Py_RETURN_NONE;
464 Py_RETURN_NONE;
470 }
465 }
471
466
472 static void _index_clearcaches(indexObject *self)
467 static void _index_clearcaches(indexObject *self)
473 {
468 {
474 if (self->cache) {
469 if (self->cache) {
475 Py_ssize_t i;
470 Py_ssize_t i;
476
471
477 for (i = 0; i < self->raw_length; i++) {
472 for (i = 0; i < self->raw_length; i++) {
478 Py_XDECREF(self->cache[i]);
473 Py_XDECREF(self->cache[i]);
479 self->cache[i] = NULL;
474 self->cache[i] = NULL;
480 }
475 }
481 free(self->cache);
476 free(self->cache);
482 self->cache = NULL;
477 self->cache = NULL;
483 }
478 }
484 if (self->offsets) {
479 if (self->offsets) {
485 free(self->offsets);
480 free(self->offsets);
486 self->offsets = NULL;
481 self->offsets = NULL;
487 }
482 }
488 if (self->nt) {
483 if (self->nt) {
489 free(self->nt);
484 free(self->nt);
490 self->nt = NULL;
485 self->nt = NULL;
491 }
486 }
492 }
487 }
493
488
494 static PyObject *index_clearcaches(indexObject *self)
489 static PyObject *index_clearcaches(indexObject *self)
495 {
490 {
496 _index_clearcaches(self);
491 _index_clearcaches(self);
497 self->ntlength = self->ntcapacity = 0;
492 self->ntlength = self->ntcapacity = 0;
498 self->ntdepth = self->ntsplits = 0;
493 self->ntdepth = self->ntsplits = 0;
499 self->ntrev = -1;
494 self->ntrev = -1;
500 self->ntlookups = self->ntmisses = 0;
495 self->ntlookups = self->ntmisses = 0;
501 Py_RETURN_NONE;
496 Py_RETURN_NONE;
502 }
497 }
503
498
504 static PyObject *index_stats(indexObject *self)
499 static PyObject *index_stats(indexObject *self)
505 {
500 {
506 PyObject *obj = PyDict_New();
501 PyObject *obj = PyDict_New();
507
502
508 if (obj == NULL)
503 if (obj == NULL)
509 return NULL;
504 return NULL;
510
505
511 #define istat(__n, __d) \
506 #define istat(__n, __d) \
512 if (PyDict_SetItemString(obj, __d, PyInt_FromLong(self->__n)) == -1) \
507 if (PyDict_SetItemString(obj, __d, PyInt_FromLong(self->__n)) == -1) \
513 goto bail;
508 goto bail;
514
509
515 if (self->added) {
510 if (self->added) {
516 Py_ssize_t len = PyList_GET_SIZE(self->added);
511 Py_ssize_t len = PyList_GET_SIZE(self->added);
517 if (PyDict_SetItemString(obj, "index entries added",
512 if (PyDict_SetItemString(obj, "index entries added",
518 PyInt_FromLong(len)) == -1)
513 PyInt_FromLong(len)) == -1)
519 goto bail;
514 goto bail;
520 }
515 }
521
516
522 if (self->raw_length != self->length - 1)
517 if (self->raw_length != self->length - 1)
523 istat(raw_length, "revs on disk");
518 istat(raw_length, "revs on disk");
524 istat(length, "revs in memory");
519 istat(length, "revs in memory");
525 istat(ntcapacity, "node trie capacity");
520 istat(ntcapacity, "node trie capacity");
526 istat(ntdepth, "node trie depth");
521 istat(ntdepth, "node trie depth");
527 istat(ntlength, "node trie count");
522 istat(ntlength, "node trie count");
528 istat(ntlookups, "node trie lookups");
523 istat(ntlookups, "node trie lookups");
529 istat(ntmisses, "node trie misses");
524 istat(ntmisses, "node trie misses");
530 istat(ntrev, "node trie last rev scanned");
525 istat(ntrev, "node trie last rev scanned");
531 istat(ntsplits, "node trie splits");
526 istat(ntsplits, "node trie splits");
532
527
533 #undef istat
528 #undef istat
534
529
535 return obj;
530 return obj;
536
531
537 bail:
532 bail:
538 Py_XDECREF(obj);
533 Py_XDECREF(obj);
539 return NULL;
534 return NULL;
540 }
535 }
541
536
542 static inline int nt_level(const char *node, int level)
537 static inline int nt_level(const char *node, int level)
543 {
538 {
544 int v = node[level>>1];
539 int v = node[level>>1];
545 if (!(level & 1))
540 if (!(level & 1))
546 v >>= 4;
541 v >>= 4;
547 return v & 0xf;
542 return v & 0xf;
548 }
543 }
549
544
550 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen)
545 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen)
551 {
546 {
552 int level, off;
547 int level, off;
553
548
554 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
549 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
555 return -1;
550 return -1;
556
551
557 if (self->nt == NULL)
552 if (self->nt == NULL)
558 return -2;
553 return -2;
559
554
560 for (level = off = 0; level < nodelen; level++) {
555 for (level = off = 0; level < nodelen; level++) {
561 int k = nt_level(node, level);
556 int k = nt_level(node, level);
562 nodetree *n = &self->nt[off];
557 nodetree *n = &self->nt[off];
563 int v = n->children[k];
558 int v = n->children[k];
564
559
565 if (v < 0) {
560 if (v < 0) {
566 const char *n;
561 const char *n;
567 v = -v - 1;
562 v = -v - 1;
568 n = index_node(self, v);
563 n = index_node(self, v);
569 if (n == NULL)
564 if (n == NULL)
570 return -2;
565 return -2;
571 return memcmp(node, n, nodelen > 20 ? 20 : nodelen)
566 return memcmp(node, n, nodelen > 20 ? 20 : nodelen)
572 ? -2 : v;
567 ? -2 : v;
573 }
568 }
574 if (v == 0)
569 if (v == 0)
575 return -2;
570 return -2;
576 off = v;
571 off = v;
577 }
572 }
578 return -2;
573 return -2;
579 }
574 }
580
575
581 static int nt_new(indexObject *self)
576 static int nt_new(indexObject *self)
582 {
577 {
583 if (self->ntlength == self->ntcapacity) {
578 if (self->ntlength == self->ntcapacity) {
584 self->ntcapacity *= 2;
579 self->ntcapacity *= 2;
585 self->nt = realloc(self->nt,
580 self->nt = realloc(self->nt,
586 self->ntcapacity * sizeof(nodetree));
581 self->ntcapacity * sizeof(nodetree));
587 if (self->nt == NULL) {
582 if (self->nt == NULL) {
588 PyErr_SetString(PyExc_MemoryError, "out of memory");
583 PyErr_SetString(PyExc_MemoryError, "out of memory");
589 return -1;
584 return -1;
590 }
585 }
591 memset(&self->nt[self->ntlength], 0,
586 memset(&self->nt[self->ntlength], 0,
592 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
587 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
593 }
588 }
594 return self->ntlength++;
589 return self->ntlength++;
595 }
590 }
596
591
597 static int nt_insert(indexObject *self, const char *node, int rev)
592 static int nt_insert(indexObject *self, const char *node, int rev)
598 {
593 {
599 int level = 0;
594 int level = 0;
600 int off = 0;
595 int off = 0;
601
596
602 while (level < 20) {
597 while (level < 20) {
603 int k = nt_level(node, level);
598 int k = nt_level(node, level);
604 nodetree *n;
599 nodetree *n;
605 int v;
600 int v;
606
601
607 n = &self->nt[off];
602 n = &self->nt[off];
608 v = n->children[k];
603 v = n->children[k];
609
604
610 if (v == 0) {
605 if (v == 0) {
611 n->children[k] = -rev - 1;
606 n->children[k] = -rev - 1;
612 return 0;
607 return 0;
613 }
608 }
614 if (v < 0) {
609 if (v < 0) {
615 const char *oldnode = index_node(self, -v - 1);
610 const char *oldnode = index_node(self, -v - 1);
616 int noff;
611 int noff;
617
612
618 if (!oldnode || !memcmp(oldnode, node, 20)) {
613 if (!oldnode || !memcmp(oldnode, node, 20)) {
619 n->children[k] = -rev - 1;
614 n->children[k] = -rev - 1;
620 return 0;
615 return 0;
621 }
616 }
622 noff = nt_new(self);
617 noff = nt_new(self);
623 if (noff == -1)
618 if (noff == -1)
624 return -1;
619 return -1;
625 /* self->nt may have been changed by realloc */
620 /* self->nt may have been changed by realloc */
626 self->nt[off].children[k] = noff;
621 self->nt[off].children[k] = noff;
627 off = noff;
622 off = noff;
628 n = &self->nt[off];
623 n = &self->nt[off];
629 n->children[nt_level(oldnode, ++level)] = v;
624 n->children[nt_level(oldnode, ++level)] = v;
630 if (level > self->ntdepth)
625 if (level > self->ntdepth)
631 self->ntdepth = level;
626 self->ntdepth = level;
632 self->ntsplits += 1;
627 self->ntsplits += 1;
633 } else {
628 } else {
634 level += 1;
629 level += 1;
635 off = v;
630 off = v;
636 }
631 }
637 }
632 }
638
633
639 return -1;
634 return -1;
640 }
635 }
641
636
642 /*
637 /*
643 * Return values:
638 * Return values:
644 *
639 *
645 * -3: error (exception set)
640 * -3: error (exception set)
646 * -2: not found (no exception set)
641 * -2: not found (no exception set)
647 * rest: valid rev
642 * rest: valid rev
648 */
643 */
649 static int index_find_node(indexObject *self,
644 static int index_find_node(indexObject *self,
650 const char *node, Py_ssize_t nodelen)
645 const char *node, Py_ssize_t nodelen)
651 {
646 {
652 int rev;
647 int rev;
653
648
654 self->ntlookups++;
649 self->ntlookups++;
655 rev = nt_find(self, node, nodelen);
650 rev = nt_find(self, node, nodelen);
656 if (rev >= -1)
651 if (rev >= -1)
657 return rev;
652 return rev;
658
653
659 if (self->nt == NULL) {
654 if (self->nt == NULL) {
660 self->ntcapacity = self->raw_length < 4
655 self->ntcapacity = self->raw_length < 4
661 ? 4 : self->raw_length / 2;
656 ? 4 : self->raw_length / 2;
662 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
657 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
663 if (self->nt == NULL) {
658 if (self->nt == NULL) {
664 PyErr_SetString(PyExc_MemoryError, "out of memory");
659 PyErr_SetString(PyExc_MemoryError, "out of memory");
665 return -3;
660 return -3;
666 }
661 }
667 self->ntlength = 1;
662 self->ntlength = 1;
668 self->ntrev = (int)index_length(self) - 1;
663 self->ntrev = (int)index_length(self) - 1;
669 self->ntlookups = 1;
664 self->ntlookups = 1;
670 self->ntmisses = 0;
665 self->ntmisses = 0;
671 }
666 }
672
667
673 /*
668 /*
674 * For the first handful of lookups, we scan the entire index,
669 * For the first handful of lookups, we scan the entire index,
675 * and cache only the matching nodes. This optimizes for cases
670 * and cache only the matching nodes. This optimizes for cases
676 * like "hg tip", where only a few nodes are accessed.
671 * like "hg tip", where only a few nodes are accessed.
677 *
672 *
678 * After that, we cache every node we visit, using a single
673 * After that, we cache every node we visit, using a single
679 * scan amortized over multiple lookups. This gives the best
674 * scan amortized over multiple lookups. This gives the best
680 * bulk performance, e.g. for "hg log".
675 * bulk performance, e.g. for "hg log".
681 */
676 */
682 if (self->ntmisses++ < 4) {
677 if (self->ntmisses++ < 4) {
683 for (rev = self->ntrev - 1; rev >= 0; rev--) {
678 for (rev = self->ntrev - 1; rev >= 0; rev--) {
684 const char *n = index_node(self, rev);
679 const char *n = index_node(self, rev);
685 if (n == NULL)
680 if (n == NULL)
686 return -2;
681 return -2;
687 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
682 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
688 if (nt_insert(self, n, rev) == -1)
683 if (nt_insert(self, n, rev) == -1)
689 return -3;
684 return -3;
690 break;
685 break;
691 }
686 }
692 }
687 }
693 } else {
688 } else {
694 for (rev = self->ntrev - 1; rev >= 0; rev--) {
689 for (rev = self->ntrev - 1; rev >= 0; rev--) {
695 const char *n = index_node(self, rev);
690 const char *n = index_node(self, rev);
696 if (n == NULL)
691 if (n == NULL)
697 return -2;
692 return -2;
698 if (nt_insert(self, n, rev) == -1)
693 if (nt_insert(self, n, rev) == -1)
699 return -3;
694 return -3;
700 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
695 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
701 break;
696 break;
702 }
697 }
703 }
698 }
704 self->ntrev = rev;
699 self->ntrev = rev;
705 }
700 }
706
701
707 if (rev >= 0)
702 if (rev >= 0)
708 return rev;
703 return rev;
709 return -2;
704 return -2;
710 }
705 }
711
706
712 static PyObject *raise_revlog_error(void)
707 static PyObject *raise_revlog_error(void)
713 {
708 {
714 static PyObject *errclass;
709 static PyObject *errclass;
715 PyObject *mod = NULL, *errobj;
710 PyObject *mod = NULL, *errobj;
716
711
717 if (errclass == NULL) {
712 if (errclass == NULL) {
718 PyObject *dict;
713 PyObject *dict;
719
714
720 mod = PyImport_ImportModule("mercurial.error");
715 mod = PyImport_ImportModule("mercurial.error");
721 if (mod == NULL)
716 if (mod == NULL)
722 goto classfail;
717 goto classfail;
723
718
724 dict = PyModule_GetDict(mod);
719 dict = PyModule_GetDict(mod);
725 if (dict == NULL)
720 if (dict == NULL)
726 goto classfail;
721 goto classfail;
727
722
728 errclass = PyDict_GetItemString(dict, "RevlogError");
723 errclass = PyDict_GetItemString(dict, "RevlogError");
729 if (errclass == NULL) {
724 if (errclass == NULL) {
730 PyErr_SetString(PyExc_SystemError,
725 PyErr_SetString(PyExc_SystemError,
731 "could not find RevlogError");
726 "could not find RevlogError");
732 goto classfail;
727 goto classfail;
733 }
728 }
734 Py_INCREF(errclass);
729 Py_INCREF(errclass);
735 }
730 }
736
731
737 errobj = PyObject_CallFunction(errclass, NULL);
732 errobj = PyObject_CallFunction(errclass, NULL);
738 if (errobj == NULL)
733 if (errobj == NULL)
739 return NULL;
734 return NULL;
740 PyErr_SetObject(errclass, errobj);
735 PyErr_SetObject(errclass, errobj);
741 return errobj;
736 return errobj;
742
737
743 classfail:
738 classfail:
744 Py_XDECREF(mod);
739 Py_XDECREF(mod);
745 return NULL;
740 return NULL;
746 }
741 }
747
742
748 static PyObject *index_getitem(indexObject *self, PyObject *value)
743 static PyObject *index_getitem(indexObject *self, PyObject *value)
749 {
744 {
750 char *node;
745 char *node;
751 Py_ssize_t nodelen;
746 Py_ssize_t nodelen;
752 int rev;
747 int rev;
753
748
754 if (PyInt_Check(value))
749 if (PyInt_Check(value))
755 return index_get(self, PyInt_AS_LONG(value));
750 return index_get(self, PyInt_AS_LONG(value));
756
751
757 if (PyString_AsStringAndSize(value, &node, &nodelen) == -1)
752 if (PyString_AsStringAndSize(value, &node, &nodelen) == -1)
758 return NULL;
753 return NULL;
759 rev = index_find_node(self, node, nodelen);
754 rev = index_find_node(self, node, nodelen);
760 if (rev >= -1)
755 if (rev >= -1)
761 return PyInt_FromLong(rev);
756 return PyInt_FromLong(rev);
762 if (rev == -2)
757 if (rev == -2)
763 raise_revlog_error();
758 raise_revlog_error();
764 return NULL;
759 return NULL;
765 }
760 }
766
761
767 static PyObject *index_m_get(indexObject *self, PyObject *args)
762 static PyObject *index_m_get(indexObject *self, PyObject *args)
768 {
763 {
769 char *node;
764 char *node;
770 int nodelen, rev;
765 int nodelen, rev;
771
766
772 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
767 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
773 return NULL;
768 return NULL;
774
769
775 rev = index_find_node(self, node, nodelen);
770 rev = index_find_node(self, node, nodelen);
776 if (rev == -3)
771 if (rev == -3)
777 return NULL;
772 return NULL;
778 if (rev == -2)
773 if (rev == -2)
779 Py_RETURN_NONE;
774 Py_RETURN_NONE;
780 return PyInt_FromLong(rev);
775 return PyInt_FromLong(rev);
781 }
776 }
782
777
783 static int index_contains(indexObject *self, PyObject *value)
778 static int index_contains(indexObject *self, PyObject *value)
784 {
779 {
785 char *node;
780 char *node;
786 Py_ssize_t nodelen;
781 Py_ssize_t nodelen;
787
782
788 if (PyInt_Check(value)) {
783 if (PyInt_Check(value)) {
789 long rev = PyInt_AS_LONG(value);
784 long rev = PyInt_AS_LONG(value);
790 return rev >= -1 && rev < index_length(self);
785 return rev >= -1 && rev < index_length(self);
791 }
786 }
792
787
793 if (!PyString_Check(value))
788 if (!PyString_Check(value))
794 return 0;
789 return 0;
795
790
796 node = PyString_AS_STRING(value);
791 node = PyString_AS_STRING(value);
797 nodelen = PyString_GET_SIZE(value);
792 nodelen = PyString_GET_SIZE(value);
798
793
799 switch (index_find_node(self, node, nodelen)) {
794 switch (index_find_node(self, node, nodelen)) {
800 case -3:
795 case -3:
801 return -1;
796 return -1;
802 case -2:
797 case -2:
803 return 0;
798 return 0;
804 default:
799 default:
805 return 1;
800 return 1;
806 }
801 }
807 }
802 }
808
803
809 /*
804 /*
810 * Invalidate any trie entries introduced by added revs.
805 * Invalidate any trie entries introduced by added revs.
811 */
806 */
812 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
807 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
813 {
808 {
814 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
809 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
815
810
816 for (i = start; i < len; i++) {
811 for (i = start; i < len; i++) {
817 PyObject *tuple = PyList_GET_ITEM(self->added, i);
812 PyObject *tuple = PyList_GET_ITEM(self->added, i);
818 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
813 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
819
814
820 nt_insert(self, PyString_AS_STRING(node), -1);
815 nt_insert(self, PyString_AS_STRING(node), -1);
821 }
816 }
822
817
823 if (start == 0) {
818 if (start == 0) {
824 Py_DECREF(self->added);
819 Py_DECREF(self->added);
825 self->added = NULL;
820 self->added = NULL;
826 }
821 }
827 }
822 }
828
823
829 /*
824 /*
830 * Delete a numeric range of revs, which must be at the end of the
825 * Delete a numeric range of revs, which must be at the end of the
831 * range, but exclude the sentinel nullid entry.
826 * range, but exclude the sentinel nullid entry.
832 */
827 */
833 static int index_slice_del(indexObject *self, PyObject *item)
828 static int index_slice_del(indexObject *self, PyObject *item)
834 {
829 {
835 Py_ssize_t start, stop, step, slicelength;
830 Py_ssize_t start, stop, step, slicelength;
836 Py_ssize_t length = index_length(self);
831 Py_ssize_t length = index_length(self);
837
832
838 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
833 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
839 &start, &stop, &step, &slicelength) < 0)
834 &start, &stop, &step, &slicelength) < 0)
840 return -1;
835 return -1;
841
836
842 if (slicelength <= 0)
837 if (slicelength <= 0)
843 return 0;
838 return 0;
844
839
845 if ((step < 0 && start < stop) || (step > 0 && start > stop))
840 if ((step < 0 && start < stop) || (step > 0 && start > stop))
846 stop = start;
841 stop = start;
847
842
848 if (step < 0) {
843 if (step < 0) {
849 stop = start + 1;
844 stop = start + 1;
850 start = stop + step*(slicelength - 1) - 1;
845 start = stop + step*(slicelength - 1) - 1;
851 step = -step;
846 step = -step;
852 }
847 }
853
848
854 if (step != 1) {
849 if (step != 1) {
855 PyErr_SetString(PyExc_ValueError,
850 PyErr_SetString(PyExc_ValueError,
856 "revlog index delete requires step size of 1");
851 "revlog index delete requires step size of 1");
857 return -1;
852 return -1;
858 }
853 }
859
854
860 if (stop != length - 1) {
855 if (stop != length - 1) {
861 PyErr_SetString(PyExc_IndexError,
856 PyErr_SetString(PyExc_IndexError,
862 "revlog index deletion indices are invalid");
857 "revlog index deletion indices are invalid");
863 return -1;
858 return -1;
864 }
859 }
865
860
866 if (start < self->length - 1) {
861 if (start < self->length - 1) {
867 if (self->nt) {
862 if (self->nt) {
868 Py_ssize_t i;
863 Py_ssize_t i;
869
864
870 for (i = start + 1; i < self->length - 1; i++) {
865 for (i = start + 1; i < self->length - 1; i++) {
871 const char *node = index_node(self, i);
866 const char *node = index_node(self, i);
872
867
873 if (node)
868 if (node)
874 nt_insert(self, node, -1);
869 nt_insert(self, node, -1);
875 }
870 }
876 if (self->added)
871 if (self->added)
877 nt_invalidate_added(self, 0);
872 nt_invalidate_added(self, 0);
878 if (self->ntrev > start)
873 if (self->ntrev > start)
879 self->ntrev = (int)start;
874 self->ntrev = (int)start;
880 }
875 }
881 self->length = start + 1;
876 self->length = start + 1;
882 return 0;
877 return 0;
883 }
878 }
884
879
885 if (self->nt) {
880 if (self->nt) {
886 nt_invalidate_added(self, start - self->length + 1);
881 nt_invalidate_added(self, start - self->length + 1);
887 if (self->ntrev > start)
882 if (self->ntrev > start)
888 self->ntrev = (int)start;
883 self->ntrev = (int)start;
889 }
884 }
890 return self->added
885 return self->added
891 ? PyList_SetSlice(self->added, start - self->length + 1,
886 ? PyList_SetSlice(self->added, start - self->length + 1,
892 PyList_GET_SIZE(self->added), NULL)
887 PyList_GET_SIZE(self->added), NULL)
893 : 0;
888 : 0;
894 }
889 }
895
890
896 /*
891 /*
897 * Supported ops:
892 * Supported ops:
898 *
893 *
899 * slice deletion
894 * slice deletion
900 * string assignment (extend node->rev mapping)
895 * string assignment (extend node->rev mapping)
901 * string deletion (shrink node->rev mapping)
896 * string deletion (shrink node->rev mapping)
902 */
897 */
903 static int index_assign_subscript(indexObject *self, PyObject *item,
898 static int index_assign_subscript(indexObject *self, PyObject *item,
904 PyObject *value)
899 PyObject *value)
905 {
900 {
906 char *node;
901 char *node;
907 Py_ssize_t nodelen;
902 Py_ssize_t nodelen;
908 long rev;
903 long rev;
909
904
910 if (PySlice_Check(item) && value == NULL)
905 if (PySlice_Check(item) && value == NULL)
911 return index_slice_del(self, item);
906 return index_slice_del(self, item);
912
907
913 if (node_check(item, &node, &nodelen) == -1)
908 if (node_check(item, &node, &nodelen) == -1)
914 return -1;
909 return -1;
915
910
916 if (value == NULL)
911 if (value == NULL)
917 return self->nt ? nt_insert(self, node, -1) : 0;
912 return self->nt ? nt_insert(self, node, -1) : 0;
918 rev = PyInt_AsLong(value);
913 rev = PyInt_AsLong(value);
919 if (rev > INT_MAX || rev < 0) {
914 if (rev > INT_MAX || rev < 0) {
920 if (!PyErr_Occurred())
915 if (!PyErr_Occurred())
921 PyErr_SetString(PyExc_ValueError, "rev out of range");
916 PyErr_SetString(PyExc_ValueError, "rev out of range");
922 return -1;
917 return -1;
923 }
918 }
924 return nt_insert(self, node, (int)rev);
919 return nt_insert(self, node, (int)rev);
925 }
920 }
926
921
927 /*
922 /*
928 * Find all RevlogNG entries in an index that has inline data. Update
923 * Find all RevlogNG entries in an index that has inline data. Update
929 * the optional "offsets" table with those entries.
924 * the optional "offsets" table with those entries.
930 */
925 */
931 static long inline_scan(indexObject *self, const char **offsets)
926 static long inline_scan(indexObject *self, const char **offsets)
932 {
927 {
933 const char *data = PyString_AS_STRING(self->data);
928 const char *data = PyString_AS_STRING(self->data);
934 const char *end = data + PyString_GET_SIZE(self->data);
929 const char *end = data + PyString_GET_SIZE(self->data);
935 const long hdrsize = 64;
930 const long hdrsize = 64;
936 long incr = hdrsize;
931 long incr = hdrsize;
937 Py_ssize_t len = 0;
932 Py_ssize_t len = 0;
938
933
939 while (data + hdrsize <= end) {
934 while (data + hdrsize <= end) {
940 uint32_t comp_len;
935 uint32_t comp_len;
941 const char *old_data;
936 const char *old_data;
942 /* 3rd element of header is length of compressed inline data */
937 /* 3rd element of header is length of compressed inline data */
943 memcpy(&comp_len, data + 8, sizeof(uint32_t));
938 comp_len = getbe32(data + 8);
944 incr = hdrsize + ntohl(comp_len);
939 incr = hdrsize + comp_len;
945 if (incr < hdrsize)
940 if (incr < hdrsize)
946 break;
941 break;
947 if (offsets)
942 if (offsets)
948 offsets[len] = data;
943 offsets[len] = data;
949 len++;
944 len++;
950 old_data = data;
945 old_data = data;
951 data += incr;
946 data += incr;
952 if (data <= old_data)
947 if (data <= old_data)
953 break;
948 break;
954 }
949 }
955
950
956 if (data != end && data + hdrsize != end) {
951 if (data != end && data + hdrsize != end) {
957 if (!PyErr_Occurred())
952 if (!PyErr_Occurred())
958 PyErr_SetString(PyExc_ValueError, "corrupt index file");
953 PyErr_SetString(PyExc_ValueError, "corrupt index file");
959 return -1;
954 return -1;
960 }
955 }
961
956
962 return len;
957 return len;
963 }
958 }
964
959
965 static int index_real_init(indexObject *self, const char *data, int size,
960 static int index_real_init(indexObject *self, const char *data, int size,
966 PyObject *inlined_obj, PyObject *data_obj)
961 PyObject *inlined_obj, PyObject *data_obj)
967 {
962 {
968 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
963 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
969 self->data = data_obj;
964 self->data = data_obj;
970 self->cache = NULL;
965 self->cache = NULL;
971
966
972 self->added = NULL;
967 self->added = NULL;
973 self->offsets = NULL;
968 self->offsets = NULL;
974 self->nt = NULL;
969 self->nt = NULL;
975 self->ntlength = self->ntcapacity = 0;
970 self->ntlength = self->ntcapacity = 0;
976 self->ntdepth = self->ntsplits = 0;
971 self->ntdepth = self->ntsplits = 0;
977 self->ntlookups = self->ntmisses = 0;
972 self->ntlookups = self->ntmisses = 0;
978 self->ntrev = -1;
973 self->ntrev = -1;
979 Py_INCREF(self->data);
974 Py_INCREF(self->data);
980
975
981 if (self->inlined) {
976 if (self->inlined) {
982 long len = inline_scan(self, NULL);
977 long len = inline_scan(self, NULL);
983 if (len == -1)
978 if (len == -1)
984 goto bail;
979 goto bail;
985 self->raw_length = len;
980 self->raw_length = len;
986 self->length = len + 1;
981 self->length = len + 1;
987 } else {
982 } else {
988 if (size % 64) {
983 if (size % 64) {
989 PyErr_SetString(PyExc_ValueError, "corrupt index file");
984 PyErr_SetString(PyExc_ValueError, "corrupt index file");
990 goto bail;
985 goto bail;
991 }
986 }
992 self->raw_length = size / 64;
987 self->raw_length = size / 64;
993 self->length = self->raw_length + 1;
988 self->length = self->raw_length + 1;
994 }
989 }
995
990
996 return 0;
991 return 0;
997 bail:
992 bail:
998 return -1;
993 return -1;
999 }
994 }
1000
995
1001 static int index_init(indexObject *self, PyObject *args, PyObject *kwds)
996 static int index_init(indexObject *self, PyObject *args, PyObject *kwds)
1002 {
997 {
1003 const char *data;
998 const char *data;
1004 int size;
999 int size;
1005 PyObject *inlined_obj;
1000 PyObject *inlined_obj;
1006
1001
1007 if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
1002 if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
1008 return -1;
1003 return -1;
1009
1004
1010 return index_real_init(self, data, size, inlined_obj,
1005 return index_real_init(self, data, size, inlined_obj,
1011 PyTuple_GET_ITEM(args, 0));
1006 PyTuple_GET_ITEM(args, 0));
1012 }
1007 }
1013
1008
1014 static PyObject *index_nodemap(indexObject *self)
1009 static PyObject *index_nodemap(indexObject *self)
1015 {
1010 {
1016 return (PyObject *)self;
1011 return (PyObject *)self;
1017 }
1012 }
1018
1013
1019 static void index_dealloc(indexObject *self)
1014 static void index_dealloc(indexObject *self)
1020 {
1015 {
1021 _index_clearcaches(self);
1016 _index_clearcaches(self);
1022 Py_DECREF(self->data);
1017 Py_DECREF(self->data);
1023 Py_XDECREF(self->added);
1018 Py_XDECREF(self->added);
1024 PyObject_Del(self);
1019 PyObject_Del(self);
1025 }
1020 }
1026
1021
1027 static PySequenceMethods index_sequence_methods = {
1022 static PySequenceMethods index_sequence_methods = {
1028 (lenfunc)index_length, /* sq_length */
1023 (lenfunc)index_length, /* sq_length */
1029 0, /* sq_concat */
1024 0, /* sq_concat */
1030 0, /* sq_repeat */
1025 0, /* sq_repeat */
1031 (ssizeargfunc)index_get, /* sq_item */
1026 (ssizeargfunc)index_get, /* sq_item */
1032 0, /* sq_slice */
1027 0, /* sq_slice */
1033 0, /* sq_ass_item */
1028 0, /* sq_ass_item */
1034 0, /* sq_ass_slice */
1029 0, /* sq_ass_slice */
1035 (objobjproc)index_contains, /* sq_contains */
1030 (objobjproc)index_contains, /* sq_contains */
1036 };
1031 };
1037
1032
1038 static PyMappingMethods index_mapping_methods = {
1033 static PyMappingMethods index_mapping_methods = {
1039 (lenfunc)index_length, /* mp_length */
1034 (lenfunc)index_length, /* mp_length */
1040 (binaryfunc)index_getitem, /* mp_subscript */
1035 (binaryfunc)index_getitem, /* mp_subscript */
1041 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1036 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1042 };
1037 };
1043
1038
1044 static PyMethodDef index_methods[] = {
1039 static PyMethodDef index_methods[] = {
1045 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1040 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1046 "clear the index caches"},
1041 "clear the index caches"},
1047 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1042 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1048 "get an index entry"},
1043 "get an index entry"},
1049 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1044 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1050 "insert an index entry"},
1045 "insert an index entry"},
1051 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1046 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1052 "stats for the index"},
1047 "stats for the index"},
1053 {NULL} /* Sentinel */
1048 {NULL} /* Sentinel */
1054 };
1049 };
1055
1050
1056 static PyGetSetDef index_getset[] = {
1051 static PyGetSetDef index_getset[] = {
1057 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1052 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1058 {NULL} /* Sentinel */
1053 {NULL} /* Sentinel */
1059 };
1054 };
1060
1055
1061 static PyTypeObject indexType = {
1056 static PyTypeObject indexType = {
1062 PyObject_HEAD_INIT(NULL)
1057 PyObject_HEAD_INIT(NULL)
1063 0, /* ob_size */
1058 0, /* ob_size */
1064 "parsers.index", /* tp_name */
1059 "parsers.index", /* tp_name */
1065 sizeof(indexObject), /* tp_basicsize */
1060 sizeof(indexObject), /* tp_basicsize */
1066 0, /* tp_itemsize */
1061 0, /* tp_itemsize */
1067 (destructor)index_dealloc, /* tp_dealloc */
1062 (destructor)index_dealloc, /* tp_dealloc */
1068 0, /* tp_print */
1063 0, /* tp_print */
1069 0, /* tp_getattr */
1064 0, /* tp_getattr */
1070 0, /* tp_setattr */
1065 0, /* tp_setattr */
1071 0, /* tp_compare */
1066 0, /* tp_compare */
1072 0, /* tp_repr */
1067 0, /* tp_repr */
1073 0, /* tp_as_number */
1068 0, /* tp_as_number */
1074 &index_sequence_methods, /* tp_as_sequence */
1069 &index_sequence_methods, /* tp_as_sequence */
1075 &index_mapping_methods, /* tp_as_mapping */
1070 &index_mapping_methods, /* tp_as_mapping */
1076 0, /* tp_hash */
1071 0, /* tp_hash */
1077 0, /* tp_call */
1072 0, /* tp_call */
1078 0, /* tp_str */
1073 0, /* tp_str */
1079 0, /* tp_getattro */
1074 0, /* tp_getattro */
1080 0, /* tp_setattro */
1075 0, /* tp_setattro */
1081 0, /* tp_as_buffer */
1076 0, /* tp_as_buffer */
1082 Py_TPFLAGS_DEFAULT, /* tp_flags */
1077 Py_TPFLAGS_DEFAULT, /* tp_flags */
1083 "revlog index", /* tp_doc */
1078 "revlog index", /* tp_doc */
1084 0, /* tp_traverse */
1079 0, /* tp_traverse */
1085 0, /* tp_clear */
1080 0, /* tp_clear */
1086 0, /* tp_richcompare */
1081 0, /* tp_richcompare */
1087 0, /* tp_weaklistoffset */
1082 0, /* tp_weaklistoffset */
1088 0, /* tp_iter */
1083 0, /* tp_iter */
1089 0, /* tp_iternext */
1084 0, /* tp_iternext */
1090 index_methods, /* tp_methods */
1085 index_methods, /* tp_methods */
1091 0, /* tp_members */
1086 0, /* tp_members */
1092 index_getset, /* tp_getset */
1087 index_getset, /* tp_getset */
1093 0, /* tp_base */
1088 0, /* tp_base */
1094 0, /* tp_dict */
1089 0, /* tp_dict */
1095 0, /* tp_descr_get */
1090 0, /* tp_descr_get */
1096 0, /* tp_descr_set */
1091 0, /* tp_descr_set */
1097 0, /* tp_dictoffset */
1092 0, /* tp_dictoffset */
1098 (initproc)index_init, /* tp_init */
1093 (initproc)index_init, /* tp_init */
1099 0, /* tp_alloc */
1094 0, /* tp_alloc */
1100 PyType_GenericNew, /* tp_new */
1095 PyType_GenericNew, /* tp_new */
1101 };
1096 };
1102
1097
1103 /*
1098 /*
1104 * returns a tuple of the form (index, index, cache) with elements as
1099 * returns a tuple of the form (index, index, cache) with elements as
1105 * follows:
1100 * follows:
1106 *
1101 *
1107 * index: an index object that lazily parses RevlogNG records
1102 * index: an index object that lazily parses RevlogNG records
1108 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1103 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1109 *
1104 *
1110 * added complications are for backwards compatibility
1105 * added complications are for backwards compatibility
1111 */
1106 */
1112 static PyObject *parse_index2(PyObject *self, PyObject *args)
1107 static PyObject *parse_index2(PyObject *self, PyObject *args)
1113 {
1108 {
1114 const char *data;
1109 const char *data;
1115 int size, ret;
1110 int size, ret;
1116 PyObject *inlined_obj, *tuple = NULL, *cache = NULL;
1111 PyObject *inlined_obj, *tuple = NULL, *cache = NULL;
1117 indexObject *idx;
1112 indexObject *idx;
1118
1113
1119 if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
1114 if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
1120 return NULL;
1115 return NULL;
1121
1116
1122 idx = PyObject_New(indexObject, &indexType);
1117 idx = PyObject_New(indexObject, &indexType);
1123
1118
1124 if (idx == NULL)
1119 if (idx == NULL)
1125 goto bail;
1120 goto bail;
1126
1121
1127 ret = index_real_init(idx, data, size, inlined_obj,
1122 ret = index_real_init(idx, data, size, inlined_obj,
1128 PyTuple_GET_ITEM(args, 0));
1123 PyTuple_GET_ITEM(args, 0));
1129 if (ret)
1124 if (ret)
1130 goto bail;
1125 goto bail;
1131
1126
1132 if (idx->inlined) {
1127 if (idx->inlined) {
1133 Py_INCREF(idx->data);
1128 Py_INCREF(idx->data);
1134 cache = Py_BuildValue("iO", 0, idx->data);
1129 cache = Py_BuildValue("iO", 0, idx->data);
1135 if (cache == NULL)
1130 if (cache == NULL)
1136 goto bail;
1131 goto bail;
1137 } else {
1132 } else {
1138 cache = Py_None;
1133 cache = Py_None;
1139 Py_INCREF(cache);
1134 Py_INCREF(cache);
1140 }
1135 }
1141
1136
1142 Py_INCREF(idx);
1137 Py_INCREF(idx);
1143
1138
1144 tuple = Py_BuildValue("NN", idx, cache);
1139 tuple = Py_BuildValue("NN", idx, cache);
1145 if (!tuple)
1140 if (!tuple)
1146 goto bail;
1141 goto bail;
1147 return tuple;
1142 return tuple;
1148
1143
1149 bail:
1144 bail:
1150 Py_XDECREF(idx);
1145 Py_XDECREF(idx);
1151 Py_XDECREF(cache);
1146 Py_XDECREF(cache);
1152 Py_XDECREF(tuple);
1147 Py_XDECREF(tuple);
1153 return NULL;
1148 return NULL;
1154 }
1149 }
1155
1150
1156 static char parsers_doc[] = "Efficient content parsing.";
1151 static char parsers_doc[] = "Efficient content parsing.";
1157
1152
1158 static PyMethodDef methods[] = {
1153 static PyMethodDef methods[] = {
1159 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1154 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1160 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1155 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1161 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1156 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1162 {NULL, NULL}
1157 {NULL, NULL}
1163 };
1158 };
1164
1159
1165 static void module_init(PyObject *mod)
1160 static void module_init(PyObject *mod)
1166 {
1161 {
1167 if (PyType_Ready(&indexType) < 0)
1162 if (PyType_Ready(&indexType) < 0)
1168 return;
1163 return;
1169 Py_INCREF(&indexType);
1164 Py_INCREF(&indexType);
1170
1165
1171 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1166 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1172
1167
1173 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1168 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1174 -1, -1, -1, -1, nullid, 20);
1169 -1, -1, -1, -1, nullid, 20);
1175 if (nullentry)
1170 if (nullentry)
1176 PyObject_GC_UnTrack(nullentry);
1171 PyObject_GC_UnTrack(nullentry);
1177 }
1172 }
1178
1173
1179 #ifdef IS_PY3K
1174 #ifdef IS_PY3K
1180 static struct PyModuleDef parsers_module = {
1175 static struct PyModuleDef parsers_module = {
1181 PyModuleDef_HEAD_INIT,
1176 PyModuleDef_HEAD_INIT,
1182 "parsers",
1177 "parsers",
1183 parsers_doc,
1178 parsers_doc,
1184 -1,
1179 -1,
1185 methods
1180 methods
1186 };
1181 };
1187
1182
1188 PyMODINIT_FUNC PyInit_parsers(void)
1183 PyMODINIT_FUNC PyInit_parsers(void)
1189 {
1184 {
1190 PyObject *mod = PyModule_Create(&parsers_module);
1185 PyObject *mod = PyModule_Create(&parsers_module);
1191 module_init(mod);
1186 module_init(mod);
1192 return mod;
1187 return mod;
1193 }
1188 }
1194 #else
1189 #else
1195 PyMODINIT_FUNC initparsers(void)
1190 PyMODINIT_FUNC initparsers(void)
1196 {
1191 {
1197 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1192 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1198 module_init(mod);
1193 module_init(mod);
1199 }
1194 }
1200 #endif
1195 #endif
@@ -1,154 +1,165
1 /*
1 /*
2 util.h - utility functions for interfacing with the various python APIs.
2 util.h - utility functions for interfacing with the various python APIs.
3
3
4 This software may be used and distributed according to the terms of
4 This software may be used and distributed according to the terms of
5 the GNU General Public License, incorporated herein by reference.
5 the GNU General Public License, incorporated herein by reference.
6 */
6 */
7
7
8 #ifndef _HG_UTIL_H_
8 #ifndef _HG_UTIL_H_
9 #define _HG_UTIL_H_
9 #define _HG_UTIL_H_
10
10
11 #if PY_MAJOR_VERSION >= 3
11 #if PY_MAJOR_VERSION >= 3
12
12
13 #define IS_PY3K
13 #define IS_PY3K
14 #define PyInt_FromLong PyLong_FromLong
14 #define PyInt_FromLong PyLong_FromLong
15 #define PyInt_AsLong PyLong_AsLong
15 #define PyInt_AsLong PyLong_AsLong
16
16
17 /*
17 /*
18 Mapping of some of the python < 2.x PyString* functions to py3k's PyUnicode.
18 Mapping of some of the python < 2.x PyString* functions to py3k's PyUnicode.
19
19
20 The commented names below represent those that are present in the PyBytes
20 The commented names below represent those that are present in the PyBytes
21 definitions for python < 2.6 (below in this file) that don't have a direct
21 definitions for python < 2.6 (below in this file) that don't have a direct
22 implementation.
22 implementation.
23 */
23 */
24
24
25 #define PyStringObject PyUnicodeObject
25 #define PyStringObject PyUnicodeObject
26 #define PyString_Type PyUnicode_Type
26 #define PyString_Type PyUnicode_Type
27
27
28 #define PyString_Check PyUnicode_Check
28 #define PyString_Check PyUnicode_Check
29 #define PyString_CheckExact PyUnicode_CheckExact
29 #define PyString_CheckExact PyUnicode_CheckExact
30 #define PyString_CHECK_INTERNED PyUnicode_CHECK_INTERNED
30 #define PyString_CHECK_INTERNED PyUnicode_CHECK_INTERNED
31 #define PyString_AS_STRING PyUnicode_AsLatin1String
31 #define PyString_AS_STRING PyUnicode_AsLatin1String
32 #define PyString_GET_SIZE PyUnicode_GET_SIZE
32 #define PyString_GET_SIZE PyUnicode_GET_SIZE
33
33
34 #define PyString_FromStringAndSize PyUnicode_FromStringAndSize
34 #define PyString_FromStringAndSize PyUnicode_FromStringAndSize
35 #define PyString_FromString PyUnicode_FromString
35 #define PyString_FromString PyUnicode_FromString
36 #define PyString_FromFormatV PyUnicode_FromFormatV
36 #define PyString_FromFormatV PyUnicode_FromFormatV
37 #define PyString_FromFormat PyUnicode_FromFormat
37 #define PyString_FromFormat PyUnicode_FromFormat
38 /* #define PyString_Size PyUnicode_GET_SIZE */
38 /* #define PyString_Size PyUnicode_GET_SIZE */
39 /* #define PyString_AsString */
39 /* #define PyString_AsString */
40 /* #define PyString_Repr */
40 /* #define PyString_Repr */
41 #define PyString_Concat PyUnicode_Concat
41 #define PyString_Concat PyUnicode_Concat
42 #define PyString_ConcatAndDel PyUnicode_AppendAndDel
42 #define PyString_ConcatAndDel PyUnicode_AppendAndDel
43 #define _PyString_Resize PyUnicode_Resize
43 #define _PyString_Resize PyUnicode_Resize
44 /* #define _PyString_Eq */
44 /* #define _PyString_Eq */
45 #define PyString_Format PyUnicode_Format
45 #define PyString_Format PyUnicode_Format
46 /* #define _PyString_FormatLong */
46 /* #define _PyString_FormatLong */
47 /* #define PyString_DecodeEscape */
47 /* #define PyString_DecodeEscape */
48 #define _PyString_Join PyUnicode_Join
48 #define _PyString_Join PyUnicode_Join
49 #define PyString_Decode PyUnicode_Decode
49 #define PyString_Decode PyUnicode_Decode
50 #define PyString_Encode PyUnicode_Encode
50 #define PyString_Encode PyUnicode_Encode
51 #define PyString_AsEncodedObject PyUnicode_AsEncodedObject
51 #define PyString_AsEncodedObject PyUnicode_AsEncodedObject
52 #define PyString_AsEncodedString PyUnicode_AsEncodedString
52 #define PyString_AsEncodedString PyUnicode_AsEncodedString
53 #define PyString_AsDecodedObject PyUnicode_AsDecodedObject
53 #define PyString_AsDecodedObject PyUnicode_AsDecodedObject
54 #define PyString_AsDecodedString PyUnicode_AsDecodedUnicode
54 #define PyString_AsDecodedString PyUnicode_AsDecodedUnicode
55 /* #define PyString_AsStringAndSize */
55 /* #define PyString_AsStringAndSize */
56 #define _PyString_InsertThousandsGrouping _PyUnicode_InsertThousandsGrouping
56 #define _PyString_InsertThousandsGrouping _PyUnicode_InsertThousandsGrouping
57
57
58 #endif /* PY_MAJOR_VERSION */
58 #endif /* PY_MAJOR_VERSION */
59
59
60 /* Backports from 2.6 */
60 /* Backports from 2.6 */
61 #if PY_VERSION_HEX < 0x02060000
61 #if PY_VERSION_HEX < 0x02060000
62
62
63 #define Py_TYPE(ob) (ob)->ob_type
63 #define Py_TYPE(ob) (ob)->ob_type
64 #define Py_SIZE(ob) (ob)->ob_size
64 #define Py_SIZE(ob) (ob)->ob_size
65 #define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
65 #define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
66
66
67 /* Shamelessly stolen from bytesobject.h */
67 /* Shamelessly stolen from bytesobject.h */
68 #define PyBytesObject PyStringObject
68 #define PyBytesObject PyStringObject
69 #define PyBytes_Type PyString_Type
69 #define PyBytes_Type PyString_Type
70
70
71 #define PyBytes_Check PyString_Check
71 #define PyBytes_Check PyString_Check
72 #define PyBytes_CheckExact PyString_CheckExact
72 #define PyBytes_CheckExact PyString_CheckExact
73 #define PyBytes_CHECK_INTERNED PyString_CHECK_INTERNED
73 #define PyBytes_CHECK_INTERNED PyString_CHECK_INTERNED
74 #define PyBytes_AS_STRING PyString_AS_STRING
74 #define PyBytes_AS_STRING PyString_AS_STRING
75 #define PyBytes_GET_SIZE PyString_GET_SIZE
75 #define PyBytes_GET_SIZE PyString_GET_SIZE
76 #define Py_TPFLAGS_BYTES_SUBCLASS Py_TPFLAGS_STRING_SUBCLASS
76 #define Py_TPFLAGS_BYTES_SUBCLASS Py_TPFLAGS_STRING_SUBCLASS
77
77
78 #define PyBytes_FromStringAndSize PyString_FromStringAndSize
78 #define PyBytes_FromStringAndSize PyString_FromStringAndSize
79 #define PyBytes_FromString PyString_FromString
79 #define PyBytes_FromString PyString_FromString
80 #define PyBytes_FromFormatV PyString_FromFormatV
80 #define PyBytes_FromFormatV PyString_FromFormatV
81 #define PyBytes_FromFormat PyString_FromFormat
81 #define PyBytes_FromFormat PyString_FromFormat
82 #define PyBytes_Size PyString_Size
82 #define PyBytes_Size PyString_Size
83 #define PyBytes_AsString PyString_AsString
83 #define PyBytes_AsString PyString_AsString
84 #define PyBytes_Repr PyString_Repr
84 #define PyBytes_Repr PyString_Repr
85 #define PyBytes_Concat PyString_Concat
85 #define PyBytes_Concat PyString_Concat
86 #define PyBytes_ConcatAndDel PyString_ConcatAndDel
86 #define PyBytes_ConcatAndDel PyString_ConcatAndDel
87 #define _PyBytes_Resize _PyString_Resize
87 #define _PyBytes_Resize _PyString_Resize
88 #define _PyBytes_Eq _PyString_Eq
88 #define _PyBytes_Eq _PyString_Eq
89 #define PyBytes_Format PyString_Format
89 #define PyBytes_Format PyString_Format
90 #define _PyBytes_FormatLong _PyString_FormatLong
90 #define _PyBytes_FormatLong _PyString_FormatLong
91 #define PyBytes_DecodeEscape PyString_DecodeEscape
91 #define PyBytes_DecodeEscape PyString_DecodeEscape
92 #define _PyBytes_Join _PyString_Join
92 #define _PyBytes_Join _PyString_Join
93 #define PyBytes_Decode PyString_Decode
93 #define PyBytes_Decode PyString_Decode
94 #define PyBytes_Encode PyString_Encode
94 #define PyBytes_Encode PyString_Encode
95 #define PyBytes_AsEncodedObject PyString_AsEncodedObject
95 #define PyBytes_AsEncodedObject PyString_AsEncodedObject
96 #define PyBytes_AsEncodedString PyString_AsEncodedString
96 #define PyBytes_AsEncodedString PyString_AsEncodedString
97 #define PyBytes_AsDecodedObject PyString_AsDecodedObject
97 #define PyBytes_AsDecodedObject PyString_AsDecodedObject
98 #define PyBytes_AsDecodedString PyString_AsDecodedString
98 #define PyBytes_AsDecodedString PyString_AsDecodedString
99 #define PyBytes_AsStringAndSize PyString_AsStringAndSize
99 #define PyBytes_AsStringAndSize PyString_AsStringAndSize
100 #define _PyBytes_InsertThousandsGrouping _PyString_InsertThousandsGrouping
100 #define _PyBytes_InsertThousandsGrouping _PyString_InsertThousandsGrouping
101
101
102 #endif /* PY_VERSION_HEX */
102 #endif /* PY_VERSION_HEX */
103
103
104 #if (PY_VERSION_HEX < 0x02050000)
104 #if (PY_VERSION_HEX < 0x02050000)
105 /* Definitions to get compatibility with python 2.4 and earlier which
105 /* Definitions to get compatibility with python 2.4 and earlier which
106 does not have Py_ssize_t. See also PEP 353.
106 does not have Py_ssize_t. See also PEP 353.
107 Note: msvc (8 or earlier) does not have ssize_t, so we use Py_ssize_t.
107 Note: msvc (8 or earlier) does not have ssize_t, so we use Py_ssize_t.
108 */
108 */
109 typedef int Py_ssize_t;
109 typedef int Py_ssize_t;
110 typedef Py_ssize_t (*lenfunc)(PyObject *);
110 typedef Py_ssize_t (*lenfunc)(PyObject *);
111 typedef PyObject *(*ssizeargfunc)(PyObject *, Py_ssize_t);
111 typedef PyObject *(*ssizeargfunc)(PyObject *, Py_ssize_t);
112
112
113 #if !defined(PY_SSIZE_T_MIN)
113 #if !defined(PY_SSIZE_T_MIN)
114 #define PY_SSIZE_T_MAX INT_MAX
114 #define PY_SSIZE_T_MAX INT_MAX
115 #define PY_SSIZE_T_MIN INT_MIN
115 #define PY_SSIZE_T_MIN INT_MIN
116 #endif
116 #endif
117 #endif
117 #endif
118
118
119 #ifdef _WIN32
119 #ifdef _WIN32
120 #ifdef _MSC_VER
120 #ifdef _MSC_VER
121 /* msvc 6.0 has problems */
121 /* msvc 6.0 has problems */
122 #define inline __inline
122 #define inline __inline
123 typedef unsigned long uint32_t;
123 typedef unsigned long uint32_t;
124 typedef unsigned __int64 uint64_t;
124 typedef unsigned __int64 uint64_t;
125 #else
125 #else
126 #include <stdint.h>
126 #include <stdint.h>
127 #endif
127 #endif
128 static uint32_t ntohl(uint32_t x)
129 {
130 return ((x & 0x000000ffUL) << 24) |
131 ((x & 0x0000ff00UL) << 8) |
132 ((x & 0x00ff0000UL) >> 8) |
133 ((x & 0xff000000UL) >> 24);
134 }
135 #else
128 #else
136 /* not windows */
129 /* not windows */
137 #include <sys/types.h>
130 #include <sys/types.h>
138 #if defined __BEOS__ && !defined __HAIKU__
131 #if defined __BEOS__ && !defined __HAIKU__
139 #include <ByteOrder.h>
132 #include <ByteOrder.h>
140 #else
133 #else
141 #include <arpa/inet.h>
134 #include <arpa/inet.h>
142 #endif
135 #endif
143 #include <inttypes.h>
136 #include <inttypes.h>
144 #endif
137 #endif
145
138
146 #if defined __hpux || defined __SUNPRO_C || defined _AIX
139 #if defined __hpux || defined __SUNPRO_C || defined _AIX
147 #define inline
140 #define inline
148 #endif
141 #endif
149
142
150 #ifdef __linux
143 #ifdef __linux
151 #define inline __inline
144 #define inline __inline
152 #endif
145 #endif
153
146
147 static inline uint32_t getbe32(const char *c)
148 {
149 const unsigned char *d = (const unsigned char *)c;
150
151 return ((d[0] << 24) |
152 (d[1] << 16) |
153 (d[2] << 8) |
154 (d[3]));
155 }
156
157 static inline void putbe32(uint32_t x, char *c)
158 {
159 c[0] = (x >> 24) & 0xff;
160 c[1] = (x >> 16) & 0xff;
161 c[2] = (x >> 8) & 0xff;
162 c[3] = (x) & 0xff;
163 }
164
154 #endif /* _HG_UTIL_H_ */
165 #endif /* _HG_UTIL_H_ */
General Comments 0
You need to be logged in to leave comments. Login now