##// END OF EJS Templates
Teach bdiff to support buffer objects...
Brendan Cully -
r3335:9061613c default
parent child Browse files
Show More
@@ -1,371 +1,373
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #include <Python.h>
12 #include <Python.h>
13 #include <stdlib.h>
13 #include <stdlib.h>
14 #include <string.h>
14 #include <string.h>
15
15
16 #ifdef __hpux
16 #ifdef __hpux
17 #define inline
17 #define inline
18 #endif
18 #endif
19
19
20 #ifdef __SUNPRO_C
20 #ifdef __SUNPRO_C
21 # define inline
21 # define inline
22 #endif
22 #endif
23
23
24 #ifdef _WIN32
24 #ifdef _WIN32
25 #ifdef _MSC_VER
25 #ifdef _MSC_VER
26 #define inline __inline
26 #define inline __inline
27 typedef unsigned long uint32_t;
27 typedef unsigned long uint32_t;
28 #else
28 #else
29 #include <stdint.h>
29 #include <stdint.h>
30 #endif
30 #endif
31 static uint32_t htonl(uint32_t x)
31 static uint32_t htonl(uint32_t x)
32 {
32 {
33 return ((x & 0x000000ffUL) << 24) |
33 return ((x & 0x000000ffUL) << 24) |
34 ((x & 0x0000ff00UL) << 8) |
34 ((x & 0x0000ff00UL) << 8) |
35 ((x & 0x00ff0000UL) >> 8) |
35 ((x & 0x00ff0000UL) >> 8) |
36 ((x & 0xff000000UL) >> 24);
36 ((x & 0xff000000UL) >> 24);
37 }
37 }
38 #else
38 #else
39 #include <sys/types.h>
39 #include <sys/types.h>
40 #include <arpa/inet.h>
40 #include <arpa/inet.h>
41 #include <inttypes.h>
41 #include <inttypes.h>
42 #endif
42 #endif
43
43
44 struct line {
44 struct line {
45 int h, len, n, e;
45 int h, len, n, e;
46 const char *l;
46 const char *l;
47 };
47 };
48
48
49 struct pos {
49 struct pos {
50 int pos, len;
50 int pos, len;
51 };
51 };
52
52
53 struct hunk {
53 struct hunk {
54 int a1, a2, b1, b2;
54 int a1, a2, b1, b2;
55 };
55 };
56
56
57 struct hunklist {
57 struct hunklist {
58 struct hunk *base, *head;
58 struct hunk *base, *head;
59 };
59 };
60
60
61 static inline uint32_t rol32(uint32_t word, unsigned int shift)
61 static inline uint32_t rol32(uint32_t word, unsigned int shift)
62 {
62 {
63 return (word << shift) | (word >> (32 - shift));
63 return (word << shift) | (word >> (32 - shift));
64 }
64 }
65
65
66 int splitlines(const char *a, int len, struct line **lr)
66 int splitlines(const char *a, int len, struct line **lr)
67 {
67 {
68 int g, h, i;
68 int g, h, i;
69 const char *p, *b = a;
69 const char *p, *b = a;
70 struct line *l;
70 struct line *l;
71
71
72 /* count the lines */
72 /* count the lines */
73 i = 1; /* extra line for sentinel */
73 i = 1; /* extra line for sentinel */
74 for (p = a; p < a + len; p++)
74 for (p = a; p < a + len; p++)
75 if (*p == '\n' || p == a + len - 1)
75 if (*p == '\n' || p == a + len - 1)
76 i++;
76 i++;
77
77
78 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
78 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
79 if (!l)
79 if (!l)
80 return -1;
80 return -1;
81
81
82 /* build the line array and calculate hashes */
82 /* build the line array and calculate hashes */
83 h = 0;
83 h = 0;
84 for (p = a; p < a + len; p++) {
84 for (p = a; p < a + len; p++) {
85 /*
85 /*
86 * a simple hash from GNU diff, with better collision
86 * a simple hash from GNU diff, with better collision
87 * resistance from hashpjw. this slows down common
87 * resistance from hashpjw. this slows down common
88 * case by 10%, but speeds up worst case by 100x.
88 * case by 10%, but speeds up worst case by 100x.
89 */
89 */
90 h = *p + rol32(h, 7);
90 h = *p + rol32(h, 7);
91 if ((g = h & 0xf0000000)) {
91 if ((g = h & 0xf0000000)) {
92 h ^= g >> 24;
92 h ^= g >> 24;
93 h ^= g;
93 h ^= g;
94 }
94 }
95 if (*p == '\n' || p == a + len - 1) {
95 if (*p == '\n' || p == a + len - 1) {
96 l->len = p - b + 1;
96 l->len = p - b + 1;
97 l->h = h * l->len;
97 l->h = h * l->len;
98 l->l = b;
98 l->l = b;
99 l->n = -1;
99 l->n = -1;
100 l++;
100 l++;
101 b = p + 1;
101 b = p + 1;
102 h = 0;
102 h = 0;
103 }
103 }
104 }
104 }
105
105
106 /* set up a sentinel */
106 /* set up a sentinel */
107 l->h = l->len = 0;
107 l->h = l->len = 0;
108 l->l = a + len;
108 l->l = a + len;
109 return i - 1;
109 return i - 1;
110 }
110 }
111
111
112 int inline cmp(struct line *a, struct line *b)
112 int inline cmp(struct line *a, struct line *b)
113 {
113 {
114 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
114 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
115 }
115 }
116
116
117 static int equatelines(struct line *a, int an, struct line *b, int bn)
117 static int equatelines(struct line *a, int an, struct line *b, int bn)
118 {
118 {
119 int i, j, buckets = 1, t;
119 int i, j, buckets = 1, t;
120 struct pos *h;
120 struct pos *h;
121
121
122 /* build a hash table of the next highest power of 2 */
122 /* build a hash table of the next highest power of 2 */
123 while (buckets < bn + 1)
123 while (buckets < bn + 1)
124 buckets *= 2;
124 buckets *= 2;
125
125
126 h = (struct pos *)malloc(buckets * sizeof(struct pos));
126 h = (struct pos *)malloc(buckets * sizeof(struct pos));
127 buckets = buckets - 1;
127 buckets = buckets - 1;
128 if (!h)
128 if (!h)
129 return 0;
129 return 0;
130
130
131 /* clear the hash table */
131 /* clear the hash table */
132 for (i = 0; i <= buckets; i++) {
132 for (i = 0; i <= buckets; i++) {
133 h[i].pos = -1;
133 h[i].pos = -1;
134 h[i].len = 0;
134 h[i].len = 0;
135 }
135 }
136
136
137 /* add lines to the hash table chains */
137 /* add lines to the hash table chains */
138 for (i = bn - 1; i >= 0; i--) {
138 for (i = bn - 1; i >= 0; i--) {
139 /* find the equivalence class */
139 /* find the equivalence class */
140 for (j = b[i].h & buckets; h[j].pos != -1;
140 for (j = b[i].h & buckets; h[j].pos != -1;
141 j = (j + 1) & buckets)
141 j = (j + 1) & buckets)
142 if (!cmp(b + i, b + h[j].pos))
142 if (!cmp(b + i, b + h[j].pos))
143 break;
143 break;
144
144
145 /* add to the head of the equivalence class */
145 /* add to the head of the equivalence class */
146 b[i].n = h[j].pos;
146 b[i].n = h[j].pos;
147 b[i].e = j;
147 b[i].e = j;
148 h[j].pos = i;
148 h[j].pos = i;
149 h[j].len++; /* keep track of popularity */
149 h[j].len++; /* keep track of popularity */
150 }
150 }
151
151
152 /* compute popularity threshold */
152 /* compute popularity threshold */
153 t = (bn >= 200) ? bn / 100 : bn + 1;
153 t = (bn >= 200) ? bn / 100 : bn + 1;
154
154
155 /* match items in a to their equivalence class in b */
155 /* match items in a to their equivalence class in b */
156 for (i = 0; i < an; i++) {
156 for (i = 0; i < an; i++) {
157 /* find the equivalence class */
157 /* find the equivalence class */
158 for (j = a[i].h & buckets; h[j].pos != -1;
158 for (j = a[i].h & buckets; h[j].pos != -1;
159 j = (j + 1) & buckets)
159 j = (j + 1) & buckets)
160 if (!cmp(a + i, b + h[j].pos))
160 if (!cmp(a + i, b + h[j].pos))
161 break;
161 break;
162
162
163 a[i].e = j; /* use equivalence class for quick compare */
163 a[i].e = j; /* use equivalence class for quick compare */
164 if (h[j].len <= t)
164 if (h[j].len <= t)
165 a[i].n = h[j].pos; /* point to head of match list */
165 a[i].n = h[j].pos; /* point to head of match list */
166 else
166 else
167 a[i].n = -1; /* too popular */
167 a[i].n = -1; /* too popular */
168 }
168 }
169
169
170 /* discard hash tables */
170 /* discard hash tables */
171 free(h);
171 free(h);
172 return 1;
172 return 1;
173 }
173 }
174
174
175 static int longest_match(struct line *a, struct line *b, struct pos *pos,
175 static int longest_match(struct line *a, struct line *b, struct pos *pos,
176 int a1, int a2, int b1, int b2, int *omi, int *omj)
176 int a1, int a2, int b1, int b2, int *omi, int *omj)
177 {
177 {
178 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
178 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
179
179
180 for (i = a1; i < a2; i++) {
180 for (i = a1; i < a2; i++) {
181 /* skip things before the current block */
181 /* skip things before the current block */
182 for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
182 for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
183 ;
183 ;
184
184
185 /* loop through all lines match a[i] in b */
185 /* loop through all lines match a[i] in b */
186 for (; j != -1 && j < b2; j = b[j].n) {
186 for (; j != -1 && j < b2; j = b[j].n) {
187 /* does this extend an earlier match? */
187 /* does this extend an earlier match? */
188 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
188 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
189 k = pos[j - 1].len + 1;
189 k = pos[j - 1].len + 1;
190 else
190 else
191 k = 1;
191 k = 1;
192 pos[j].pos = i;
192 pos[j].pos = i;
193 pos[j].len = k;
193 pos[j].len = k;
194
194
195 /* best match so far? */
195 /* best match so far? */
196 if (k > mk) {
196 if (k > mk) {
197 mi = i;
197 mi = i;
198 mj = j;
198 mj = j;
199 mk = k;
199 mk = k;
200 }
200 }
201 }
201 }
202 }
202 }
203
203
204 if (mk) {
204 if (mk) {
205 mi = mi - mk + 1;
205 mi = mi - mk + 1;
206 mj = mj - mk + 1;
206 mj = mj - mk + 1;
207 }
207 }
208
208
209 /* expand match to include neighboring popular lines */
209 /* expand match to include neighboring popular lines */
210 while (mi - mb > a1 && mj - mb > b1 &&
210 while (mi - mb > a1 && mj - mb > b1 &&
211 a[mi - mb - 1].e == b[mj - mb - 1].e)
211 a[mi - mb - 1].e == b[mj - mb - 1].e)
212 mb++;
212 mb++;
213 while (mi + mk < a2 && mj + mk < b2 &&
213 while (mi + mk < a2 && mj + mk < b2 &&
214 a[mi + mk].e == b[mj + mk].e)
214 a[mi + mk].e == b[mj + mk].e)
215 mk++;
215 mk++;
216
216
217 *omi = mi - mb;
217 *omi = mi - mb;
218 *omj = mj - mb;
218 *omj = mj - mb;
219 return mk + mb;
219 return mk + mb;
220 }
220 }
221
221
222 static void recurse(struct line *a, struct line *b, struct pos *pos,
222 static void recurse(struct line *a, struct line *b, struct pos *pos,
223 int a1, int a2, int b1, int b2, struct hunklist *l)
223 int a1, int a2, int b1, int b2, struct hunklist *l)
224 {
224 {
225 int i, j, k;
225 int i, j, k;
226
226
227 /* find the longest match in this chunk */
227 /* find the longest match in this chunk */
228 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
228 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
229 if (!k)
229 if (!k)
230 return;
230 return;
231
231
232 /* and recurse on the remaining chunks on either side */
232 /* and recurse on the remaining chunks on either side */
233 recurse(a, b, pos, a1, i, b1, j, l);
233 recurse(a, b, pos, a1, i, b1, j, l);
234 l->head->a1 = i;
234 l->head->a1 = i;
235 l->head->a2 = i + k;
235 l->head->a2 = i + k;
236 l->head->b1 = j;
236 l->head->b1 = j;
237 l->head->b2 = j + k;
237 l->head->b2 = j + k;
238 l->head++;
238 l->head++;
239 recurse(a, b, pos, i + k, a2, j + k, b2, l);
239 recurse(a, b, pos, i + k, a2, j + k, b2, l);
240 }
240 }
241
241
242 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
242 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
243 {
243 {
244 struct hunklist l;
244 struct hunklist l;
245 struct pos *pos;
245 struct pos *pos;
246 int t;
246 int t;
247
247
248 /* allocate and fill arrays */
248 /* allocate and fill arrays */
249 t = equatelines(a, an, b, bn);
249 t = equatelines(a, an, b, bn);
250 pos = (struct pos *)calloc(bn, sizeof(struct pos));
250 pos = (struct pos *)calloc(bn, sizeof(struct pos));
251 /* we can't have more matches than lines in the shorter file */
251 /* we can't have more matches than lines in the shorter file */
252 l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) *
252 l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) *
253 ((an<bn ? an:bn) + 1));
253 ((an<bn ? an:bn) + 1));
254
254
255 if (pos && l.base && t) {
255 if (pos && l.base && t) {
256 /* generate the matching block list */
256 /* generate the matching block list */
257 recurse(a, b, pos, 0, an, 0, bn, &l);
257 recurse(a, b, pos, 0, an, 0, bn, &l);
258 l.head->a1 = an;
258 l.head->a1 = an;
259 l.head->b1 = bn;
259 l.head->b1 = bn;
260 l.head++;
260 l.head++;
261 }
261 }
262
262
263 free(pos);
263 free(pos);
264 return l;
264 return l;
265 }
265 }
266
266
267 static PyObject *blocks(PyObject *self, PyObject *args)
267 static PyObject *blocks(PyObject *self, PyObject *args)
268 {
268 {
269 PyObject *sa, *sb, *rl = NULL, *m;
269 PyObject *sa, *sb, *rl = NULL, *m;
270 struct line *a, *b;
270 struct line *a, *b;
271 struct hunklist l = {NULL, NULL};
271 struct hunklist l = {NULL, NULL};
272 struct hunk *h;
272 struct hunk *h;
273 int an, bn, pos = 0;
273 int an, bn, pos = 0;
274
274
275 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
275 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
276 return NULL;
276 return NULL;
277
277
278 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
278 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
279 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
279 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
280 if (!a || !b)
280 if (!a || !b)
281 goto nomem;
281 goto nomem;
282
282
283 l = diff(a, an, b, bn);
283 l = diff(a, an, b, bn);
284 rl = PyList_New(l.head - l.base);
284 rl = PyList_New(l.head - l.base);
285 if (!l.head || !rl)
285 if (!l.head || !rl)
286 goto nomem;
286 goto nomem;
287
287
288 for (h = l.base; h != l.head; h++) {
288 for (h = l.base; h != l.head; h++) {
289 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
289 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
290 PyList_SetItem(rl, pos, m);
290 PyList_SetItem(rl, pos, m);
291 pos++;
291 pos++;
292 }
292 }
293
293
294 nomem:
294 nomem:
295 free(a);
295 free(a);
296 free(b);
296 free(b);
297 free(l.base);
297 free(l.base);
298 return rl ? rl : PyErr_NoMemory();
298 return rl ? rl : PyErr_NoMemory();
299 }
299 }
300
300
301 static PyObject *bdiff(PyObject *self, PyObject *args)
301 static PyObject *bdiff(PyObject *self, PyObject *args)
302 {
302 {
303 PyObject *sa, *sb, *result = NULL;
303 char *sa, *sb;
304 PyObject *result = NULL;
304 struct line *al, *bl;
305 struct line *al, *bl;
305 struct hunklist l = {NULL, NULL};
306 struct hunklist l = {NULL, NULL};
306 struct hunk *h;
307 struct hunk *h;
307 char encode[12], *rb;
308 char encode[12], *rb;
308 int an, bn, len = 0, la = 0, lb = 0;
309 int an, bn, len = 0, la, lb;
309
310
310 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
311 if (!PyArg_ParseTuple(args, "t#t#:bdiff", &sa, &la, &sb, &lb))
311 return NULL;
312 return NULL;
312
313
313 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
314 an = splitlines(sa, la, &al);
314 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
315 bn = splitlines(sb, lb, &bl);
315 if (!al || !bl)
316 if (!al || !bl)
316 goto nomem;
317 goto nomem;
317
318
318 l = diff(al, an, bl, bn);
319 l = diff(al, an, bl, bn);
319 if (!l.head)
320 if (!l.head)
320 goto nomem;
321 goto nomem;
321
322
322 /* calculate length of output */
323 /* calculate length of output */
324 la = lb = 0;
323 for (h = l.base; h != l.head; h++) {
325 for (h = l.base; h != l.head; h++) {
324 if (h->a1 != la || h->b1 != lb)
326 if (h->a1 != la || h->b1 != lb)
325 len += 12 + bl[h->b1].l - bl[lb].l;
327 len += 12 + bl[h->b1].l - bl[lb].l;
326 la = h->a2;
328 la = h->a2;
327 lb = h->b2;
329 lb = h->b2;
328 }
330 }
329
331
330 result = PyString_FromStringAndSize(NULL, len);
332 result = PyString_FromStringAndSize(NULL, len);
331 if (!result)
333 if (!result)
332 goto nomem;
334 goto nomem;
333
335
334 /* build binary patch */
336 /* build binary patch */
335 rb = PyString_AsString(result);
337 rb = PyString_AsString(result);
336 la = lb = 0;
338 la = lb = 0;
337
339
338 for (h = l.base; h != l.head; h++) {
340 for (h = l.base; h != l.head; h++) {
339 if (h->a1 != la || h->b1 != lb) {
341 if (h->a1 != la || h->b1 != lb) {
340 len = bl[h->b1].l - bl[lb].l;
342 len = bl[h->b1].l - bl[lb].l;
341 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
343 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
342 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
344 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
343 *(uint32_t *)(encode + 8) = htonl(len);
345 *(uint32_t *)(encode + 8) = htonl(len);
344 memcpy(rb, encode, 12);
346 memcpy(rb, encode, 12);
345 memcpy(rb + 12, bl[lb].l, len);
347 memcpy(rb + 12, bl[lb].l, len);
346 rb += 12 + len;
348 rb += 12 + len;
347 }
349 }
348 la = h->a2;
350 la = h->a2;
349 lb = h->b2;
351 lb = h->b2;
350 }
352 }
351
353
352 nomem:
354 nomem:
353 free(al);
355 free(al);
354 free(bl);
356 free(bl);
355 free(l.base);
357 free(l.base);
356 return result ? result : PyErr_NoMemory();
358 return result ? result : PyErr_NoMemory();
357 }
359 }
358
360
359 static char mdiff_doc[] = "Efficient binary diff.";
361 static char mdiff_doc[] = "Efficient binary diff.";
360
362
361 static PyMethodDef methods[] = {
363 static PyMethodDef methods[] = {
362 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
364 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
363 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
365 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
364 {NULL, NULL}
366 {NULL, NULL}
365 };
367 };
366
368
367 PyMODINIT_FUNC initbdiff(void)
369 PyMODINIT_FUNC initbdiff(void)
368 {
370 {
369 Py_InitModule3("bdiff", methods, mdiff_doc);
371 Py_InitModule3("bdiff", methods, mdiff_doc);
370 }
372 }
371
373
@@ -1,1255 +1,1255
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
7 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import *
13 from node import *
14 from i18n import gettext as _
14 from i18n import gettext as _
15 from demandload import demandload
15 from demandload import demandload
16 demandload(globals(), "binascii changegroup errno ancestor mdiff os")
16 demandload(globals(), "binascii changegroup errno ancestor mdiff os")
17 demandload(globals(), "sha struct util zlib")
17 demandload(globals(), "sha struct util zlib")
18
18
19 # revlog version strings
19 # revlog version strings
20 REVLOGV0 = 0
20 REVLOGV0 = 0
21 REVLOGNG = 1
21 REVLOGNG = 1
22
22
23 # revlog flags
23 # revlog flags
24 REVLOGNGINLINEDATA = (1 << 16)
24 REVLOGNGINLINEDATA = (1 << 16)
25 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
25 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
26
26
27 REVLOG_DEFAULT_FORMAT = REVLOGNG
27 REVLOG_DEFAULT_FORMAT = REVLOGNG
28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
29
29
30 def flagstr(flag):
30 def flagstr(flag):
31 if flag == "inline":
31 if flag == "inline":
32 return REVLOGNGINLINEDATA
32 return REVLOGNGINLINEDATA
33 raise RevlogError(_("unknown revlog flag %s" % flag))
33 raise RevlogError(_("unknown revlog flag %s" % flag))
34
34
35 def hash(text, p1, p2):
35 def hash(text, p1, p2):
36 """generate a hash from the given text and its parent hashes
36 """generate a hash from the given text and its parent hashes
37
37
38 This hash combines both the current file contents and its history
38 This hash combines both the current file contents and its history
39 in a manner that makes it easy to distinguish nodes with the same
39 in a manner that makes it easy to distinguish nodes with the same
40 content in the revision graph.
40 content in the revision graph.
41 """
41 """
42 l = [p1, p2]
42 l = [p1, p2]
43 l.sort()
43 l.sort()
44 s = sha.new(l[0])
44 s = sha.new(l[0])
45 s.update(l[1])
45 s.update(l[1])
46 s.update(text)
46 s.update(text)
47 return s.digest()
47 return s.digest()
48
48
49 def compress(text):
49 def compress(text):
50 """ generate a possibly-compressed representation of text """
50 """ generate a possibly-compressed representation of text """
51 if not text: return ("", text)
51 if not text: return ("", text)
52 if len(text) < 44:
52 if len(text) < 44:
53 if text[0] == '\0': return ("", text)
53 if text[0] == '\0': return ("", text)
54 return ('u', text)
54 return ('u', text)
55 bin = zlib.compress(text)
55 bin = zlib.compress(text)
56 if len(bin) > len(text):
56 if len(bin) > len(text):
57 if text[0] == '\0': return ("", text)
57 if text[0] == '\0': return ("", text)
58 return ('u', text)
58 return ('u', text)
59 return ("", bin)
59 return ("", bin)
60
60
61 def decompress(bin):
61 def decompress(bin):
62 """ decompress the given input """
62 """ decompress the given input """
63 if not bin: return bin
63 if not bin: return bin
64 t = bin[0]
64 t = bin[0]
65 if t == '\0': return bin
65 if t == '\0': return bin
66 if t == 'x': return zlib.decompress(bin)
66 if t == 'x': return zlib.decompress(bin)
67 if t == 'u': return bin[1:]
67 if t == 'u': return bin[1:]
68 raise RevlogError(_("unknown compression type %r") % t)
68 raise RevlogError(_("unknown compression type %r") % t)
69
69
70 indexformatv0 = ">4l20s20s20s"
70 indexformatv0 = ">4l20s20s20s"
71 v0shaoffset = 56
71 v0shaoffset = 56
72 # index ng:
72 # index ng:
73 # 6 bytes offset
73 # 6 bytes offset
74 # 2 bytes flags
74 # 2 bytes flags
75 # 4 bytes compressed length
75 # 4 bytes compressed length
76 # 4 bytes uncompressed length
76 # 4 bytes uncompressed length
77 # 4 bytes: base rev
77 # 4 bytes: base rev
78 # 4 bytes link rev
78 # 4 bytes link rev
79 # 4 bytes parent 1 rev
79 # 4 bytes parent 1 rev
80 # 4 bytes parent 2 rev
80 # 4 bytes parent 2 rev
81 # 32 bytes: nodeid
81 # 32 bytes: nodeid
82 indexformatng = ">Qiiiiii20s12x"
82 indexformatng = ">Qiiiiii20s12x"
83 ngshaoffset = 32
83 ngshaoffset = 32
84 versionformat = ">i"
84 versionformat = ">i"
85
85
86 class lazyparser(object):
86 class lazyparser(object):
87 """
87 """
88 this class avoids the need to parse the entirety of large indices
88 this class avoids the need to parse the entirety of large indices
89 """
89 """
90
90
91 # lazyparser is not safe to use on windows if win32 extensions not
91 # lazyparser is not safe to use on windows if win32 extensions not
92 # available. it keeps file handle open, which make it not possible
92 # available. it keeps file handle open, which make it not possible
93 # to break hardlinks on local cloned repos.
93 # to break hardlinks on local cloned repos.
94 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
94 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
95 hasattr(util, 'win32api'))
95 hasattr(util, 'win32api'))
96
96
97 def __init__(self, dataf, size, indexformat, shaoffset):
97 def __init__(self, dataf, size, indexformat, shaoffset):
98 self.dataf = dataf
98 self.dataf = dataf
99 self.format = indexformat
99 self.format = indexformat
100 self.s = struct.calcsize(indexformat)
100 self.s = struct.calcsize(indexformat)
101 self.indexformat = indexformat
101 self.indexformat = indexformat
102 self.datasize = size
102 self.datasize = size
103 self.l = size/self.s
103 self.l = size/self.s
104 self.index = [None] * self.l
104 self.index = [None] * self.l
105 self.map = {nullid: -1}
105 self.map = {nullid: -1}
106 self.allmap = 0
106 self.allmap = 0
107 self.all = 0
107 self.all = 0
108 self.mapfind_count = 0
108 self.mapfind_count = 0
109 self.shaoffset = shaoffset
109 self.shaoffset = shaoffset
110
110
111 def loadmap(self):
111 def loadmap(self):
112 """
112 """
113 during a commit, we need to make sure the rev being added is
113 during a commit, we need to make sure the rev being added is
114 not a duplicate. This requires loading the entire index,
114 not a duplicate. This requires loading the entire index,
115 which is fairly slow. loadmap can load up just the node map,
115 which is fairly slow. loadmap can load up just the node map,
116 which takes much less time.
116 which takes much less time.
117 """
117 """
118 if self.allmap: return
118 if self.allmap: return
119 end = self.datasize
119 end = self.datasize
120 self.allmap = 1
120 self.allmap = 1
121 cur = 0
121 cur = 0
122 count = 0
122 count = 0
123 blocksize = self.s * 256
123 blocksize = self.s * 256
124 self.dataf.seek(0)
124 self.dataf.seek(0)
125 while cur < end:
125 while cur < end:
126 data = self.dataf.read(blocksize)
126 data = self.dataf.read(blocksize)
127 off = 0
127 off = 0
128 for x in xrange(256):
128 for x in xrange(256):
129 n = data[off + self.shaoffset:off + self.shaoffset + 20]
129 n = data[off + self.shaoffset:off + self.shaoffset + 20]
130 self.map[n] = count
130 self.map[n] = count
131 count += 1
131 count += 1
132 if count >= self.l:
132 if count >= self.l:
133 break
133 break
134 off += self.s
134 off += self.s
135 cur += blocksize
135 cur += blocksize
136
136
137 def loadblock(self, blockstart, blocksize, data=None):
137 def loadblock(self, blockstart, blocksize, data=None):
138 if self.all: return
138 if self.all: return
139 if data is None:
139 if data is None:
140 self.dataf.seek(blockstart)
140 self.dataf.seek(blockstart)
141 if blockstart + blocksize > self.datasize:
141 if blockstart + blocksize > self.datasize:
142 # the revlog may have grown since we've started running,
142 # the revlog may have grown since we've started running,
143 # but we don't have space in self.index for more entries.
143 # but we don't have space in self.index for more entries.
144 # limit blocksize so that we don't get too much data.
144 # limit blocksize so that we don't get too much data.
145 blocksize = max(self.datasize - blockstart, 0)
145 blocksize = max(self.datasize - blockstart, 0)
146 data = self.dataf.read(blocksize)
146 data = self.dataf.read(blocksize)
147 lend = len(data) / self.s
147 lend = len(data) / self.s
148 i = blockstart / self.s
148 i = blockstart / self.s
149 off = 0
149 off = 0
150 for x in xrange(lend):
150 for x in xrange(lend):
151 if self.index[i + x] == None:
151 if self.index[i + x] == None:
152 b = data[off : off + self.s]
152 b = data[off : off + self.s]
153 self.index[i + x] = b
153 self.index[i + x] = b
154 n = b[self.shaoffset:self.shaoffset + 20]
154 n = b[self.shaoffset:self.shaoffset + 20]
155 self.map[n] = i + x
155 self.map[n] = i + x
156 off += self.s
156 off += self.s
157
157
158 def findnode(self, node):
158 def findnode(self, node):
159 """search backwards through the index file for a specific node"""
159 """search backwards through the index file for a specific node"""
160 if self.allmap: return None
160 if self.allmap: return None
161
161
162 # hg log will cause many many searches for the manifest
162 # hg log will cause many many searches for the manifest
163 # nodes. After we get called a few times, just load the whole
163 # nodes. After we get called a few times, just load the whole
164 # thing.
164 # thing.
165 if self.mapfind_count > 8:
165 if self.mapfind_count > 8:
166 self.loadmap()
166 self.loadmap()
167 if node in self.map:
167 if node in self.map:
168 return node
168 return node
169 return None
169 return None
170 self.mapfind_count += 1
170 self.mapfind_count += 1
171 last = self.l - 1
171 last = self.l - 1
172 while self.index[last] != None:
172 while self.index[last] != None:
173 if last == 0:
173 if last == 0:
174 self.all = 1
174 self.all = 1
175 self.allmap = 1
175 self.allmap = 1
176 return None
176 return None
177 last -= 1
177 last -= 1
178 end = (last + 1) * self.s
178 end = (last + 1) * self.s
179 blocksize = self.s * 256
179 blocksize = self.s * 256
180 while end >= 0:
180 while end >= 0:
181 start = max(end - blocksize, 0)
181 start = max(end - blocksize, 0)
182 self.dataf.seek(start)
182 self.dataf.seek(start)
183 data = self.dataf.read(end - start)
183 data = self.dataf.read(end - start)
184 findend = end - start
184 findend = end - start
185 while True:
185 while True:
186 # we're searching backwards, so weh have to make sure
186 # we're searching backwards, so weh have to make sure
187 # we don't find a changeset where this node is a parent
187 # we don't find a changeset where this node is a parent
188 off = data.rfind(node, 0, findend)
188 off = data.rfind(node, 0, findend)
189 findend = off
189 findend = off
190 if off >= 0:
190 if off >= 0:
191 i = off / self.s
191 i = off / self.s
192 off = i * self.s
192 off = i * self.s
193 n = data[off + self.shaoffset:off + self.shaoffset + 20]
193 n = data[off + self.shaoffset:off + self.shaoffset + 20]
194 if n == node:
194 if n == node:
195 self.map[n] = i + start / self.s
195 self.map[n] = i + start / self.s
196 return node
196 return node
197 else:
197 else:
198 break
198 break
199 end -= blocksize
199 end -= blocksize
200 return None
200 return None
201
201
202 def loadindex(self, i=None, end=None):
202 def loadindex(self, i=None, end=None):
203 if self.all: return
203 if self.all: return
204 all = False
204 all = False
205 if i == None:
205 if i == None:
206 blockstart = 0
206 blockstart = 0
207 blocksize = (512 / self.s) * self.s
207 blocksize = (512 / self.s) * self.s
208 end = self.datasize
208 end = self.datasize
209 all = True
209 all = True
210 else:
210 else:
211 if end:
211 if end:
212 blockstart = i * self.s
212 blockstart = i * self.s
213 end = end * self.s
213 end = end * self.s
214 blocksize = end - blockstart
214 blocksize = end - blockstart
215 else:
215 else:
216 blockstart = (i & ~(32)) * self.s
216 blockstart = (i & ~(32)) * self.s
217 blocksize = self.s * 64
217 blocksize = self.s * 64
218 end = blockstart + blocksize
218 end = blockstart + blocksize
219 while blockstart < end:
219 while blockstart < end:
220 self.loadblock(blockstart, blocksize)
220 self.loadblock(blockstart, blocksize)
221 blockstart += blocksize
221 blockstart += blocksize
222 if all: self.all = True
222 if all: self.all = True
223
223
224 class lazyindex(object):
224 class lazyindex(object):
225 """a lazy version of the index array"""
225 """a lazy version of the index array"""
226 def __init__(self, parser):
226 def __init__(self, parser):
227 self.p = parser
227 self.p = parser
228 def __len__(self):
228 def __len__(self):
229 return len(self.p.index)
229 return len(self.p.index)
230 def load(self, pos):
230 def load(self, pos):
231 if pos < 0:
231 if pos < 0:
232 pos += len(self.p.index)
232 pos += len(self.p.index)
233 self.p.loadindex(pos)
233 self.p.loadindex(pos)
234 return self.p.index[pos]
234 return self.p.index[pos]
235 def __getitem__(self, pos):
235 def __getitem__(self, pos):
236 ret = self.p.index[pos] or self.load(pos)
236 ret = self.p.index[pos] or self.load(pos)
237 if isinstance(ret, str):
237 if isinstance(ret, str):
238 ret = struct.unpack(self.p.indexformat, ret)
238 ret = struct.unpack(self.p.indexformat, ret)
239 return ret
239 return ret
240 def __setitem__(self, pos, item):
240 def __setitem__(self, pos, item):
241 self.p.index[pos] = item
241 self.p.index[pos] = item
242 def __delitem__(self, pos):
242 def __delitem__(self, pos):
243 del self.p.index[pos]
243 del self.p.index[pos]
244 def append(self, e):
244 def append(self, e):
245 self.p.index.append(e)
245 self.p.index.append(e)
246
246
247 class lazymap(object):
247 class lazymap(object):
248 """a lazy version of the node map"""
248 """a lazy version of the node map"""
249 def __init__(self, parser):
249 def __init__(self, parser):
250 self.p = parser
250 self.p = parser
251 def load(self, key):
251 def load(self, key):
252 n = self.p.findnode(key)
252 n = self.p.findnode(key)
253 if n == None:
253 if n == None:
254 raise KeyError(key)
254 raise KeyError(key)
255 def __contains__(self, key):
255 def __contains__(self, key):
256 if key in self.p.map:
256 if key in self.p.map:
257 return True
257 return True
258 self.p.loadmap()
258 self.p.loadmap()
259 return key in self.p.map
259 return key in self.p.map
260 def __iter__(self):
260 def __iter__(self):
261 yield nullid
261 yield nullid
262 for i in xrange(self.p.l):
262 for i in xrange(self.p.l):
263 ret = self.p.index[i]
263 ret = self.p.index[i]
264 if not ret:
264 if not ret:
265 self.p.loadindex(i)
265 self.p.loadindex(i)
266 ret = self.p.index[i]
266 ret = self.p.index[i]
267 if isinstance(ret, str):
267 if isinstance(ret, str):
268 ret = struct.unpack(self.p.indexformat, ret)
268 ret = struct.unpack(self.p.indexformat, ret)
269 yield ret[-1]
269 yield ret[-1]
270 def __getitem__(self, key):
270 def __getitem__(self, key):
271 try:
271 try:
272 return self.p.map[key]
272 return self.p.map[key]
273 except KeyError:
273 except KeyError:
274 try:
274 try:
275 self.load(key)
275 self.load(key)
276 return self.p.map[key]
276 return self.p.map[key]
277 except KeyError:
277 except KeyError:
278 raise KeyError("node " + hex(key))
278 raise KeyError("node " + hex(key))
279 def __setitem__(self, key, val):
279 def __setitem__(self, key, val):
280 self.p.map[key] = val
280 self.p.map[key] = val
281 def __delitem__(self, key):
281 def __delitem__(self, key):
282 del self.p.map[key]
282 del self.p.map[key]
283
283
284 class RevlogError(Exception): pass
284 class RevlogError(Exception): pass
285
285
286 class revlog(object):
286 class revlog(object):
287 """
287 """
288 the underlying revision storage object
288 the underlying revision storage object
289
289
290 A revlog consists of two parts, an index and the revision data.
290 A revlog consists of two parts, an index and the revision data.
291
291
292 The index is a file with a fixed record size containing
292 The index is a file with a fixed record size containing
293 information on each revision, includings its nodeid (hash), the
293 information on each revision, includings its nodeid (hash), the
294 nodeids of its parents, the position and offset of its data within
294 nodeids of its parents, the position and offset of its data within
295 the data file, and the revision it's based on. Finally, each entry
295 the data file, and the revision it's based on. Finally, each entry
296 contains a linkrev entry that can serve as a pointer to external
296 contains a linkrev entry that can serve as a pointer to external
297 data.
297 data.
298
298
299 The revision data itself is a linear collection of data chunks.
299 The revision data itself is a linear collection of data chunks.
300 Each chunk represents a revision and is usually represented as a
300 Each chunk represents a revision and is usually represented as a
301 delta against the previous chunk. To bound lookup time, runs of
301 delta against the previous chunk. To bound lookup time, runs of
302 deltas are limited to about 2 times the length of the original
302 deltas are limited to about 2 times the length of the original
303 version data. This makes retrieval of a version proportional to
303 version data. This makes retrieval of a version proportional to
304 its size, or O(1) relative to the number of revisions.
304 its size, or O(1) relative to the number of revisions.
305
305
306 Both pieces of the revlog are written to in an append-only
306 Both pieces of the revlog are written to in an append-only
307 fashion, which means we never need to rewrite a file to insert or
307 fashion, which means we never need to rewrite a file to insert or
308 remove data, and can use some simple techniques to avoid the need
308 remove data, and can use some simple techniques to avoid the need
309 for locking while reading.
309 for locking while reading.
310 """
310 """
311 def __init__(self, opener, indexfile, datafile,
311 def __init__(self, opener, indexfile, datafile,
312 defversion=REVLOG_DEFAULT_VERSION):
312 defversion=REVLOG_DEFAULT_VERSION):
313 """
313 """
314 create a revlog object
314 create a revlog object
315
315
316 opener is a function that abstracts the file opening operation
316 opener is a function that abstracts the file opening operation
317 and can be used to implement COW semantics or the like.
317 and can be used to implement COW semantics or the like.
318 """
318 """
319 self.indexfile = indexfile
319 self.indexfile = indexfile
320 self.datafile = datafile
320 self.datafile = datafile
321 self.opener = opener
321 self.opener = opener
322
322
323 self.indexstat = None
323 self.indexstat = None
324 self.cache = None
324 self.cache = None
325 self.chunkcache = None
325 self.chunkcache = None
326 self.defversion = defversion
326 self.defversion = defversion
327 self.load()
327 self.load()
328
328
329 def load(self):
329 def load(self):
330 v = self.defversion
330 v = self.defversion
331 try:
331 try:
332 f = self.opener(self.indexfile)
332 f = self.opener(self.indexfile)
333 i = f.read(4)
333 i = f.read(4)
334 f.seek(0)
334 f.seek(0)
335 except IOError, inst:
335 except IOError, inst:
336 if inst.errno != errno.ENOENT:
336 if inst.errno != errno.ENOENT:
337 raise
337 raise
338 i = ""
338 i = ""
339 else:
339 else:
340 try:
340 try:
341 st = util.fstat(f)
341 st = util.fstat(f)
342 except AttributeError, inst:
342 except AttributeError, inst:
343 st = None
343 st = None
344 else:
344 else:
345 oldst = self.indexstat
345 oldst = self.indexstat
346 if (oldst and st.st_dev == oldst.st_dev
346 if (oldst and st.st_dev == oldst.st_dev
347 and st.st_ino == oldst.st_ino
347 and st.st_ino == oldst.st_ino
348 and st.st_mtime == oldst.st_mtime
348 and st.st_mtime == oldst.st_mtime
349 and st.st_ctime == oldst.st_ctime):
349 and st.st_ctime == oldst.st_ctime):
350 return
350 return
351 self.indexstat = st
351 self.indexstat = st
352 if len(i) > 0:
352 if len(i) > 0:
353 v = struct.unpack(versionformat, i)[0]
353 v = struct.unpack(versionformat, i)[0]
354 flags = v & ~0xFFFF
354 flags = v & ~0xFFFF
355 fmt = v & 0xFFFF
355 fmt = v & 0xFFFF
356 if fmt == REVLOGV0:
356 if fmt == REVLOGV0:
357 if flags:
357 if flags:
358 raise RevlogError(_("index %s invalid flags %x for format v0" %
358 raise RevlogError(_("index %s invalid flags %x for format v0" %
359 (self.indexfile, flags)))
359 (self.indexfile, flags)))
360 elif fmt == REVLOGNG:
360 elif fmt == REVLOGNG:
361 if flags & ~REVLOGNGINLINEDATA:
361 if flags & ~REVLOGNGINLINEDATA:
362 raise RevlogError(_("index %s invalid flags %x for revlogng" %
362 raise RevlogError(_("index %s invalid flags %x for revlogng" %
363 (self.indexfile, flags)))
363 (self.indexfile, flags)))
364 else:
364 else:
365 raise RevlogError(_("index %s invalid format %d" %
365 raise RevlogError(_("index %s invalid format %d" %
366 (self.indexfile, fmt)))
366 (self.indexfile, fmt)))
367 self.version = v
367 self.version = v
368 if v == REVLOGV0:
368 if v == REVLOGV0:
369 self.indexformat = indexformatv0
369 self.indexformat = indexformatv0
370 shaoffset = v0shaoffset
370 shaoffset = v0shaoffset
371 else:
371 else:
372 self.indexformat = indexformatng
372 self.indexformat = indexformatng
373 shaoffset = ngshaoffset
373 shaoffset = ngshaoffset
374
374
375 if i:
375 if i:
376 if (lazyparser.safe_to_use and not self.inlinedata() and
376 if (lazyparser.safe_to_use and not self.inlinedata() and
377 st and st.st_size > 10000):
377 st and st.st_size > 10000):
378 # big index, let's parse it on demand
378 # big index, let's parse it on demand
379 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
379 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
380 self.index = lazyindex(parser)
380 self.index = lazyindex(parser)
381 self.nodemap = lazymap(parser)
381 self.nodemap = lazymap(parser)
382 else:
382 else:
383 self.parseindex(f, st)
383 self.parseindex(f, st)
384 if self.version != REVLOGV0:
384 if self.version != REVLOGV0:
385 e = list(self.index[0])
385 e = list(self.index[0])
386 type = self.ngtype(e[0])
386 type = self.ngtype(e[0])
387 e[0] = self.offset_type(0, type)
387 e[0] = self.offset_type(0, type)
388 self.index[0] = e
388 self.index[0] = e
389 else:
389 else:
390 self.nodemap = { nullid: -1}
390 self.nodemap = { nullid: -1}
391 self.index = []
391 self.index = []
392
392
393
393
394 def parseindex(self, fp, st):
394 def parseindex(self, fp, st):
395 s = struct.calcsize(self.indexformat)
395 s = struct.calcsize(self.indexformat)
396 self.index = []
396 self.index = []
397 self.nodemap = {nullid: -1}
397 self.nodemap = {nullid: -1}
398 inline = self.inlinedata()
398 inline = self.inlinedata()
399 n = 0
399 n = 0
400 leftover = None
400 leftover = None
401 while True:
401 while True:
402 if st:
402 if st:
403 data = fp.read(65536)
403 data = fp.read(65536)
404 else:
404 else:
405 # hack for httprangereader, it doesn't do partial reads well
405 # hack for httprangereader, it doesn't do partial reads well
406 data = fp.read()
406 data = fp.read()
407 if not data:
407 if not data:
408 break
408 break
409 if n == 0 and self.inlinedata():
409 if n == 0 and self.inlinedata():
410 # cache the first chunk
410 # cache the first chunk
411 self.chunkcache = (0, data)
411 self.chunkcache = (0, data)
412 if leftover:
412 if leftover:
413 data = leftover + data
413 data = leftover + data
414 leftover = None
414 leftover = None
415 off = 0
415 off = 0
416 l = len(data)
416 l = len(data)
417 while off < l:
417 while off < l:
418 if l - off < s:
418 if l - off < s:
419 leftover = data[off:]
419 leftover = data[off:]
420 break
420 break
421 cur = data[off:off + s]
421 cur = data[off:off + s]
422 off += s
422 off += s
423 e = struct.unpack(self.indexformat, cur)
423 e = struct.unpack(self.indexformat, cur)
424 self.index.append(e)
424 self.index.append(e)
425 self.nodemap[e[-1]] = n
425 self.nodemap[e[-1]] = n
426 n += 1
426 n += 1
427 if inline:
427 if inline:
428 off += e[1]
428 off += e[1]
429 if off > l:
429 if off > l:
430 # some things don't seek well, just read it
430 # some things don't seek well, just read it
431 fp.read(off - l)
431 fp.read(off - l)
432 if not st:
432 if not st:
433 break
433 break
434
434
435
435
436 def ngoffset(self, q):
436 def ngoffset(self, q):
437 if q & 0xFFFF:
437 if q & 0xFFFF:
438 raise RevlogError(_('%s: incompatible revision flag %x') %
438 raise RevlogError(_('%s: incompatible revision flag %x') %
439 (self.indexfile, q))
439 (self.indexfile, q))
440 return long(q >> 16)
440 return long(q >> 16)
441
441
442 def ngtype(self, q):
442 def ngtype(self, q):
443 return int(q & 0xFFFF)
443 return int(q & 0xFFFF)
444
444
445 def offset_type(self, offset, type):
445 def offset_type(self, offset, type):
446 return long(long(offset) << 16 | type)
446 return long(long(offset) << 16 | type)
447
447
448 def loadindex(self, start, end):
448 def loadindex(self, start, end):
449 """load a block of indexes all at once from the lazy parser"""
449 """load a block of indexes all at once from the lazy parser"""
450 if isinstance(self.index, lazyindex):
450 if isinstance(self.index, lazyindex):
451 self.index.p.loadindex(start, end)
451 self.index.p.loadindex(start, end)
452
452
453 def loadindexmap(self):
453 def loadindexmap(self):
454 """loads both the map and the index from the lazy parser"""
454 """loads both the map and the index from the lazy parser"""
455 if isinstance(self.index, lazyindex):
455 if isinstance(self.index, lazyindex):
456 p = self.index.p
456 p = self.index.p
457 p.loadindex()
457 p.loadindex()
458 self.nodemap = p.map
458 self.nodemap = p.map
459
459
460 def loadmap(self):
460 def loadmap(self):
461 """loads the map from the lazy parser"""
461 """loads the map from the lazy parser"""
462 if isinstance(self.nodemap, lazymap):
462 if isinstance(self.nodemap, lazymap):
463 self.nodemap.p.loadmap()
463 self.nodemap.p.loadmap()
464 self.nodemap = self.nodemap.p.map
464 self.nodemap = self.nodemap.p.map
465
465
466 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
466 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
467 def tip(self): return self.node(len(self.index) - 1)
467 def tip(self): return self.node(len(self.index) - 1)
468 def count(self): return len(self.index)
468 def count(self): return len(self.index)
469 def node(self, rev):
469 def node(self, rev):
470 return (rev < 0) and nullid or self.index[rev][-1]
470 return (rev < 0) and nullid or self.index[rev][-1]
471 def rev(self, node):
471 def rev(self, node):
472 try:
472 try:
473 return self.nodemap[node]
473 return self.nodemap[node]
474 except KeyError:
474 except KeyError:
475 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
475 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
476 def linkrev(self, node):
476 def linkrev(self, node):
477 return (node == nullid) and -1 or self.index[self.rev(node)][-4]
477 return (node == nullid) and -1 or self.index[self.rev(node)][-4]
478 def parents(self, node):
478 def parents(self, node):
479 if node == nullid: return (nullid, nullid)
479 if node == nullid: return (nullid, nullid)
480 r = self.rev(node)
480 r = self.rev(node)
481 d = self.index[r][-3:-1]
481 d = self.index[r][-3:-1]
482 if self.version == REVLOGV0:
482 if self.version == REVLOGV0:
483 return d
483 return d
484 return [ self.node(x) for x in d ]
484 return [ self.node(x) for x in d ]
485 def parentrevs(self, rev):
485 def parentrevs(self, rev):
486 if rev == -1:
486 if rev == -1:
487 return (-1, -1)
487 return (-1, -1)
488 d = self.index[rev][-3:-1]
488 d = self.index[rev][-3:-1]
489 if self.version == REVLOGV0:
489 if self.version == REVLOGV0:
490 return [ self.rev(x) for x in d ]
490 return [ self.rev(x) for x in d ]
491 return d
491 return d
492 def start(self, rev):
492 def start(self, rev):
493 if rev < 0:
493 if rev < 0:
494 return -1
494 return -1
495 if self.version != REVLOGV0:
495 if self.version != REVLOGV0:
496 return self.ngoffset(self.index[rev][0])
496 return self.ngoffset(self.index[rev][0])
497 return self.index[rev][0]
497 return self.index[rev][0]
498
498
499 def end(self, rev): return self.start(rev) + self.length(rev)
499 def end(self, rev): return self.start(rev) + self.length(rev)
500
500
501 def size(self, rev):
501 def size(self, rev):
502 """return the length of the uncompressed text for a given revision"""
502 """return the length of the uncompressed text for a given revision"""
503 l = -1
503 l = -1
504 if self.version != REVLOGV0:
504 if self.version != REVLOGV0:
505 l = self.index[rev][2]
505 l = self.index[rev][2]
506 if l >= 0:
506 if l >= 0:
507 return l
507 return l
508
508
509 t = self.revision(self.node(rev))
509 t = self.revision(self.node(rev))
510 return len(t)
510 return len(t)
511
511
512 # alternate implementation, The advantage to this code is it
512 # alternate implementation, The advantage to this code is it
513 # will be faster for a single revision. But, the results are not
513 # will be faster for a single revision. But, the results are not
514 # cached, so finding the size of every revision will be slower.
514 # cached, so finding the size of every revision will be slower.
515 """
515 """
516 if self.cache and self.cache[1] == rev:
516 if self.cache and self.cache[1] == rev:
517 return len(self.cache[2])
517 return len(self.cache[2])
518
518
519 base = self.base(rev)
519 base = self.base(rev)
520 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
520 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
521 base = self.cache[1]
521 base = self.cache[1]
522 text = self.cache[2]
522 text = self.cache[2]
523 else:
523 else:
524 text = self.revision(self.node(base))
524 text = self.revision(self.node(base))
525
525
526 l = len(text)
526 l = len(text)
527 for x in xrange(base + 1, rev + 1):
527 for x in xrange(base + 1, rev + 1):
528 l = mdiff.patchedsize(l, self.chunk(x))
528 l = mdiff.patchedsize(l, self.chunk(x))
529 return l
529 return l
530 """
530 """
531
531
532 def length(self, rev):
532 def length(self, rev):
533 if rev < 0:
533 if rev < 0:
534 return 0
534 return 0
535 else:
535 else:
536 return self.index[rev][1]
536 return self.index[rev][1]
537 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
537 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
538
538
539 def reachable(self, rev, stop=None):
539 def reachable(self, rev, stop=None):
540 reachable = {}
540 reachable = {}
541 visit = [rev]
541 visit = [rev]
542 reachable[rev] = 1
542 reachable[rev] = 1
543 if stop:
543 if stop:
544 stopn = self.rev(stop)
544 stopn = self.rev(stop)
545 else:
545 else:
546 stopn = 0
546 stopn = 0
547 while visit:
547 while visit:
548 n = visit.pop(0)
548 n = visit.pop(0)
549 if n == stop:
549 if n == stop:
550 continue
550 continue
551 if n == nullid:
551 if n == nullid:
552 continue
552 continue
553 for p in self.parents(n):
553 for p in self.parents(n):
554 if self.rev(p) < stopn:
554 if self.rev(p) < stopn:
555 continue
555 continue
556 if p not in reachable:
556 if p not in reachable:
557 reachable[p] = 1
557 reachable[p] = 1
558 visit.append(p)
558 visit.append(p)
559 return reachable
559 return reachable
560
560
561 def nodesbetween(self, roots=None, heads=None):
561 def nodesbetween(self, roots=None, heads=None):
562 """Return a tuple containing three elements. Elements 1 and 2 contain
562 """Return a tuple containing three elements. Elements 1 and 2 contain
563 a final list bases and heads after all the unreachable ones have been
563 a final list bases and heads after all the unreachable ones have been
564 pruned. Element 0 contains a topologically sorted list of all
564 pruned. Element 0 contains a topologically sorted list of all
565
565
566 nodes that satisfy these constraints:
566 nodes that satisfy these constraints:
567 1. All nodes must be descended from a node in roots (the nodes on
567 1. All nodes must be descended from a node in roots (the nodes on
568 roots are considered descended from themselves).
568 roots are considered descended from themselves).
569 2. All nodes must also be ancestors of a node in heads (the nodes in
569 2. All nodes must also be ancestors of a node in heads (the nodes in
570 heads are considered to be their own ancestors).
570 heads are considered to be their own ancestors).
571
571
572 If roots is unspecified, nullid is assumed as the only root.
572 If roots is unspecified, nullid is assumed as the only root.
573 If heads is unspecified, it is taken to be the output of the
573 If heads is unspecified, it is taken to be the output of the
574 heads method (i.e. a list of all nodes in the repository that
574 heads method (i.e. a list of all nodes in the repository that
575 have no children)."""
575 have no children)."""
576 nonodes = ([], [], [])
576 nonodes = ([], [], [])
577 if roots is not None:
577 if roots is not None:
578 roots = list(roots)
578 roots = list(roots)
579 if not roots:
579 if not roots:
580 return nonodes
580 return nonodes
581 lowestrev = min([self.rev(n) for n in roots])
581 lowestrev = min([self.rev(n) for n in roots])
582 else:
582 else:
583 roots = [nullid] # Everybody's a descendent of nullid
583 roots = [nullid] # Everybody's a descendent of nullid
584 lowestrev = -1
584 lowestrev = -1
585 if (lowestrev == -1) and (heads is None):
585 if (lowestrev == -1) and (heads is None):
586 # We want _all_ the nodes!
586 # We want _all_ the nodes!
587 return ([self.node(r) for r in xrange(0, self.count())],
587 return ([self.node(r) for r in xrange(0, self.count())],
588 [nullid], list(self.heads()))
588 [nullid], list(self.heads()))
589 if heads is None:
589 if heads is None:
590 # All nodes are ancestors, so the latest ancestor is the last
590 # All nodes are ancestors, so the latest ancestor is the last
591 # node.
591 # node.
592 highestrev = self.count() - 1
592 highestrev = self.count() - 1
593 # Set ancestors to None to signal that every node is an ancestor.
593 # Set ancestors to None to signal that every node is an ancestor.
594 ancestors = None
594 ancestors = None
595 # Set heads to an empty dictionary for later discovery of heads
595 # Set heads to an empty dictionary for later discovery of heads
596 heads = {}
596 heads = {}
597 else:
597 else:
598 heads = list(heads)
598 heads = list(heads)
599 if not heads:
599 if not heads:
600 return nonodes
600 return nonodes
601 ancestors = {}
601 ancestors = {}
602 # Start at the top and keep marking parents until we're done.
602 # Start at the top and keep marking parents until we're done.
603 nodestotag = heads[:]
603 nodestotag = heads[:]
604 # Turn heads into a dictionary so we can remove 'fake' heads.
604 # Turn heads into a dictionary so we can remove 'fake' heads.
605 # Also, later we will be using it to filter out the heads we can't
605 # Also, later we will be using it to filter out the heads we can't
606 # find from roots.
606 # find from roots.
607 heads = dict.fromkeys(heads, 0)
607 heads = dict.fromkeys(heads, 0)
608 # Remember where the top was so we can use it as a limit later.
608 # Remember where the top was so we can use it as a limit later.
609 highestrev = max([self.rev(n) for n in nodestotag])
609 highestrev = max([self.rev(n) for n in nodestotag])
610 while nodestotag:
610 while nodestotag:
611 # grab a node to tag
611 # grab a node to tag
612 n = nodestotag.pop()
612 n = nodestotag.pop()
613 # Never tag nullid
613 # Never tag nullid
614 if n == nullid:
614 if n == nullid:
615 continue
615 continue
616 # A node's revision number represents its place in a
616 # A node's revision number represents its place in a
617 # topologically sorted list of nodes.
617 # topologically sorted list of nodes.
618 r = self.rev(n)
618 r = self.rev(n)
619 if r >= lowestrev:
619 if r >= lowestrev:
620 if n not in ancestors:
620 if n not in ancestors:
621 # If we are possibly a descendent of one of the roots
621 # If we are possibly a descendent of one of the roots
622 # and we haven't already been marked as an ancestor
622 # and we haven't already been marked as an ancestor
623 ancestors[n] = 1 # Mark as ancestor
623 ancestors[n] = 1 # Mark as ancestor
624 # Add non-nullid parents to list of nodes to tag.
624 # Add non-nullid parents to list of nodes to tag.
625 nodestotag.extend([p for p in self.parents(n) if
625 nodestotag.extend([p for p in self.parents(n) if
626 p != nullid])
626 p != nullid])
627 elif n in heads: # We've seen it before, is it a fake head?
627 elif n in heads: # We've seen it before, is it a fake head?
628 # So it is, real heads should not be the ancestors of
628 # So it is, real heads should not be the ancestors of
629 # any other heads.
629 # any other heads.
630 heads.pop(n)
630 heads.pop(n)
631 if not ancestors:
631 if not ancestors:
632 return nonodes
632 return nonodes
633 # Now that we have our set of ancestors, we want to remove any
633 # Now that we have our set of ancestors, we want to remove any
634 # roots that are not ancestors.
634 # roots that are not ancestors.
635
635
636 # If one of the roots was nullid, everything is included anyway.
636 # If one of the roots was nullid, everything is included anyway.
637 if lowestrev > -1:
637 if lowestrev > -1:
638 # But, since we weren't, let's recompute the lowest rev to not
638 # But, since we weren't, let's recompute the lowest rev to not
639 # include roots that aren't ancestors.
639 # include roots that aren't ancestors.
640
640
641 # Filter out roots that aren't ancestors of heads
641 # Filter out roots that aren't ancestors of heads
642 roots = [n for n in roots if n in ancestors]
642 roots = [n for n in roots if n in ancestors]
643 # Recompute the lowest revision
643 # Recompute the lowest revision
644 if roots:
644 if roots:
645 lowestrev = min([self.rev(n) for n in roots])
645 lowestrev = min([self.rev(n) for n in roots])
646 else:
646 else:
647 # No more roots? Return empty list
647 # No more roots? Return empty list
648 return nonodes
648 return nonodes
649 else:
649 else:
650 # We are descending from nullid, and don't need to care about
650 # We are descending from nullid, and don't need to care about
651 # any other roots.
651 # any other roots.
652 lowestrev = -1
652 lowestrev = -1
653 roots = [nullid]
653 roots = [nullid]
654 # Transform our roots list into a 'set' (i.e. a dictionary where the
654 # Transform our roots list into a 'set' (i.e. a dictionary where the
655 # values don't matter.
655 # values don't matter.
656 descendents = dict.fromkeys(roots, 1)
656 descendents = dict.fromkeys(roots, 1)
657 # Also, keep the original roots so we can filter out roots that aren't
657 # Also, keep the original roots so we can filter out roots that aren't
658 # 'real' roots (i.e. are descended from other roots).
658 # 'real' roots (i.e. are descended from other roots).
659 roots = descendents.copy()
659 roots = descendents.copy()
660 # Our topologically sorted list of output nodes.
660 # Our topologically sorted list of output nodes.
661 orderedout = []
661 orderedout = []
662 # Don't start at nullid since we don't want nullid in our output list,
662 # Don't start at nullid since we don't want nullid in our output list,
663 # and if nullid shows up in descedents, empty parents will look like
663 # and if nullid shows up in descedents, empty parents will look like
664 # they're descendents.
664 # they're descendents.
665 for r in xrange(max(lowestrev, 0), highestrev + 1):
665 for r in xrange(max(lowestrev, 0), highestrev + 1):
666 n = self.node(r)
666 n = self.node(r)
667 isdescendent = False
667 isdescendent = False
668 if lowestrev == -1: # Everybody is a descendent of nullid
668 if lowestrev == -1: # Everybody is a descendent of nullid
669 isdescendent = True
669 isdescendent = True
670 elif n in descendents:
670 elif n in descendents:
671 # n is already a descendent
671 # n is already a descendent
672 isdescendent = True
672 isdescendent = True
673 # This check only needs to be done here because all the roots
673 # This check only needs to be done here because all the roots
674 # will start being marked is descendents before the loop.
674 # will start being marked is descendents before the loop.
675 if n in roots:
675 if n in roots:
676 # If n was a root, check if it's a 'real' root.
676 # If n was a root, check if it's a 'real' root.
677 p = tuple(self.parents(n))
677 p = tuple(self.parents(n))
678 # If any of its parents are descendents, it's not a root.
678 # If any of its parents are descendents, it's not a root.
679 if (p[0] in descendents) or (p[1] in descendents):
679 if (p[0] in descendents) or (p[1] in descendents):
680 roots.pop(n)
680 roots.pop(n)
681 else:
681 else:
682 p = tuple(self.parents(n))
682 p = tuple(self.parents(n))
683 # A node is a descendent if either of its parents are
683 # A node is a descendent if either of its parents are
684 # descendents. (We seeded the dependents list with the roots
684 # descendents. (We seeded the dependents list with the roots
685 # up there, remember?)
685 # up there, remember?)
686 if (p[0] in descendents) or (p[1] in descendents):
686 if (p[0] in descendents) or (p[1] in descendents):
687 descendents[n] = 1
687 descendents[n] = 1
688 isdescendent = True
688 isdescendent = True
689 if isdescendent and ((ancestors is None) or (n in ancestors)):
689 if isdescendent and ((ancestors is None) or (n in ancestors)):
690 # Only include nodes that are both descendents and ancestors.
690 # Only include nodes that are both descendents and ancestors.
691 orderedout.append(n)
691 orderedout.append(n)
692 if (ancestors is not None) and (n in heads):
692 if (ancestors is not None) and (n in heads):
693 # We're trying to figure out which heads are reachable
693 # We're trying to figure out which heads are reachable
694 # from roots.
694 # from roots.
695 # Mark this head as having been reached
695 # Mark this head as having been reached
696 heads[n] = 1
696 heads[n] = 1
697 elif ancestors is None:
697 elif ancestors is None:
698 # Otherwise, we're trying to discover the heads.
698 # Otherwise, we're trying to discover the heads.
699 # Assume this is a head because if it isn't, the next step
699 # Assume this is a head because if it isn't, the next step
700 # will eventually remove it.
700 # will eventually remove it.
701 heads[n] = 1
701 heads[n] = 1
702 # But, obviously its parents aren't.
702 # But, obviously its parents aren't.
703 for p in self.parents(n):
703 for p in self.parents(n):
704 heads.pop(p, None)
704 heads.pop(p, None)
705 heads = [n for n in heads.iterkeys() if heads[n] != 0]
705 heads = [n for n in heads.iterkeys() if heads[n] != 0]
706 roots = roots.keys()
706 roots = roots.keys()
707 assert orderedout
707 assert orderedout
708 assert roots
708 assert roots
709 assert heads
709 assert heads
710 return (orderedout, roots, heads)
710 return (orderedout, roots, heads)
711
711
712 def heads(self, start=None):
712 def heads(self, start=None):
713 """return the list of all nodes that have no children
713 """return the list of all nodes that have no children
714
714
715 if start is specified, only heads that are descendants of
715 if start is specified, only heads that are descendants of
716 start will be returned
716 start will be returned
717
717
718 """
718 """
719 if start is None:
719 if start is None:
720 start = nullid
720 start = nullid
721 startrev = self.rev(start)
721 startrev = self.rev(start)
722 reachable = {startrev: 1}
722 reachable = {startrev: 1}
723 heads = {startrev: 1}
723 heads = {startrev: 1}
724
724
725 parentrevs = self.parentrevs
725 parentrevs = self.parentrevs
726 for r in xrange(startrev + 1, self.count()):
726 for r in xrange(startrev + 1, self.count()):
727 for p in parentrevs(r):
727 for p in parentrevs(r):
728 if p in reachable:
728 if p in reachable:
729 reachable[r] = 1
729 reachable[r] = 1
730 heads[r] = 1
730 heads[r] = 1
731 if p in heads:
731 if p in heads:
732 del heads[p]
732 del heads[p]
733 return [self.node(r) for r in heads]
733 return [self.node(r) for r in heads]
734
734
735 def children(self, node):
735 def children(self, node):
736 """find the children of a given node"""
736 """find the children of a given node"""
737 c = []
737 c = []
738 p = self.rev(node)
738 p = self.rev(node)
739 for r in range(p + 1, self.count()):
739 for r in range(p + 1, self.count()):
740 n = self.node(r)
740 n = self.node(r)
741 for pn in self.parents(n):
741 for pn in self.parents(n):
742 if pn == node:
742 if pn == node:
743 c.append(n)
743 c.append(n)
744 continue
744 continue
745 elif pn == nullid:
745 elif pn == nullid:
746 continue
746 continue
747 return c
747 return c
748
748
749 def lookup(self, id):
749 def lookup(self, id):
750 """locate a node based on:
750 """locate a node based on:
751 - revision number or str(revision number)
751 - revision number or str(revision number)
752 - nodeid or subset of hex nodeid
752 - nodeid or subset of hex nodeid
753 """
753 """
754 if isinstance(id, (long, int)):
754 if isinstance(id, (long, int)):
755 # rev
755 # rev
756 return self.node(id)
756 return self.node(id)
757 try:
757 try:
758 # str(rev)
758 # str(rev)
759 rev = int(id)
759 rev = int(id)
760 if str(rev) != id: raise ValueError
760 if str(rev) != id: raise ValueError
761 if rev < 0: rev = self.count() + rev
761 if rev < 0: rev = self.count() + rev
762 if rev < 0 or rev >= self.count(): raise ValueError
762 if rev < 0 or rev >= self.count(): raise ValueError
763 return self.node(rev)
763 return self.node(rev)
764 except (ValueError, OverflowError):
764 except (ValueError, OverflowError):
765 pass
765 pass
766 try:
766 try:
767 # hex(node)[:...]
767 # hex(node)[:...]
768 if len(id) % 2 == 0:
768 if len(id) % 2 == 0:
769 bin_id = bin(id)
769 bin_id = bin(id)
770 else:
770 else:
771 bin_id = bin(id[:-1])
771 bin_id = bin(id[:-1])
772 node = None
772 node = None
773 for n in self.nodemap:
773 for n in self.nodemap:
774 if n.startswith(bin_id) and hex(n).startswith(id):
774 if n.startswith(bin_id) and hex(n).startswith(id):
775 if node is not None:
775 if node is not None:
776 raise RevlogError(_("Ambiguous identifier"))
776 raise RevlogError(_("Ambiguous identifier"))
777 node = n
777 node = n
778 if node is not None:
778 if node is not None:
779 return node
779 return node
780 except TypeError:
780 except TypeError:
781 pass
781 pass
782
782
783 # might need fixing if we change hash lengths
783 # might need fixing if we change hash lengths
784 if len(id) == 20 and id in self.nodemap:
784 if len(id) == 20 and id in self.nodemap:
785 # node
785 # node
786 return id
786 return id
787
787
788 raise RevlogError(_("No match found"))
788 raise RevlogError(_("No match found"))
789
789
790 def cmp(self, node, text):
790 def cmp(self, node, text):
791 """compare text with a given file revision"""
791 """compare text with a given file revision"""
792 p1, p2 = self.parents(node)
792 p1, p2 = self.parents(node)
793 return hash(text, p1, p2) != node
793 return hash(text, p1, p2) != node
794
794
795 def makenode(self, node, text):
795 def makenode(self, node, text):
796 """calculate a file nodeid for text, descended or possibly
796 """calculate a file nodeid for text, descended or possibly
797 unchanged from node"""
797 unchanged from node"""
798
798
799 if self.cmp(node, text):
799 if self.cmp(node, text):
800 return hash(text, node, nullid)
800 return hash(text, node, nullid)
801 return node
801 return node
802
802
803 def diff(self, a, b):
803 def diff(self, a, b):
804 """return a delta between two revisions"""
804 """return a delta between two revisions"""
805 return mdiff.textdiff(a, b)
805 return mdiff.textdiff(a, b)
806
806
807 def patches(self, t, pl):
807 def patches(self, t, pl):
808 """apply a list of patches to a string"""
808 """apply a list of patches to a string"""
809 return mdiff.patches(t, pl)
809 return mdiff.patches(t, pl)
810
810
811 def chunk(self, rev, df=None, cachelen=4096):
811 def chunk(self, rev, df=None, cachelen=4096):
812 start, length = self.start(rev), self.length(rev)
812 start, length = self.start(rev), self.length(rev)
813 inline = self.inlinedata()
813 inline = self.inlinedata()
814 if inline:
814 if inline:
815 start += (rev + 1) * struct.calcsize(self.indexformat)
815 start += (rev + 1) * struct.calcsize(self.indexformat)
816 end = start + length
816 end = start + length
817 def loadcache(df):
817 def loadcache(df):
818 cache_length = max(cachelen, length) # 4k
818 cache_length = max(cachelen, length) # 4k
819 if not df:
819 if not df:
820 if inline:
820 if inline:
821 df = self.opener(self.indexfile)
821 df = self.opener(self.indexfile)
822 else:
822 else:
823 df = self.opener(self.datafile)
823 df = self.opener(self.datafile)
824 df.seek(start)
824 df.seek(start)
825 self.chunkcache = (start, df.read(cache_length))
825 self.chunkcache = (start, df.read(cache_length))
826
826
827 if not self.chunkcache:
827 if not self.chunkcache:
828 loadcache(df)
828 loadcache(df)
829
829
830 cache_start = self.chunkcache[0]
830 cache_start = self.chunkcache[0]
831 cache_end = cache_start + len(self.chunkcache[1])
831 cache_end = cache_start + len(self.chunkcache[1])
832 if start >= cache_start and end <= cache_end:
832 if start >= cache_start and end <= cache_end:
833 # it is cached
833 # it is cached
834 offset = start - cache_start
834 offset = start - cache_start
835 else:
835 else:
836 loadcache(df)
836 loadcache(df)
837 offset = 0
837 offset = 0
838
838
839 #def checkchunk():
839 #def checkchunk():
840 # df = self.opener(self.datafile)
840 # df = self.opener(self.datafile)
841 # df.seek(start)
841 # df.seek(start)
842 # return df.read(length)
842 # return df.read(length)
843 #assert s == checkchunk()
843 #assert s == checkchunk()
844 return decompress(self.chunkcache[1][offset:offset + length])
844 return decompress(self.chunkcache[1][offset:offset + length])
845
845
846 def delta(self, node):
846 def delta(self, node):
847 """return or calculate a delta between a node and its predecessor"""
847 """return or calculate a delta between a node and its predecessor"""
848 r = self.rev(node)
848 r = self.rev(node)
849 return self.revdiff(r - 1, r)
849 return self.revdiff(r - 1, r)
850
850
851 def revdiff(self, rev1, rev2):
851 def revdiff(self, rev1, rev2):
852 """return or calculate a delta between two revisions"""
852 """return or calculate a delta between two revisions"""
853 b1 = self.base(rev1)
853 b1 = self.base(rev1)
854 b2 = self.base(rev2)
854 b2 = self.base(rev2)
855 if b1 == b2 and rev1 + 1 == rev2:
855 if b1 == b2 and rev1 + 1 == rev2:
856 return self.chunk(rev2)
856 return self.chunk(rev2)
857 else:
857 else:
858 return self.diff(self.revision(self.node(rev1)),
858 return self.diff(self.revision(self.node(rev1)),
859 self.revision(self.node(rev2)))
859 self.revision(self.node(rev2)))
860
860
861 def revision(self, node):
861 def revision(self, node):
862 """return an uncompressed revision of a given"""
862 """return an uncompressed revision of a given"""
863 if node == nullid: return ""
863 if node == nullid: return ""
864 if self.cache and self.cache[0] == node: return self.cache[2]
864 if self.cache and self.cache[0] == node: return self.cache[2]
865
865
866 # look up what we need to read
866 # look up what we need to read
867 text = None
867 text = None
868 rev = self.rev(node)
868 rev = self.rev(node)
869 base = self.base(rev)
869 base = self.base(rev)
870
870
871 if self.inlinedata():
871 if self.inlinedata():
872 # we probably have the whole chunk cached
872 # we probably have the whole chunk cached
873 df = None
873 df = None
874 else:
874 else:
875 df = self.opener(self.datafile)
875 df = self.opener(self.datafile)
876
876
877 # do we have useful data cached?
877 # do we have useful data cached?
878 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
878 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
879 base = self.cache[1]
879 base = self.cache[1]
880 text = self.cache[2]
880 text = self.cache[2]
881 self.loadindex(base, rev + 1)
881 self.loadindex(base, rev + 1)
882 else:
882 else:
883 self.loadindex(base, rev + 1)
883 self.loadindex(base, rev + 1)
884 text = self.chunk(base, df=df)
884 text = self.chunk(base, df=df)
885
885
886 bins = []
886 bins = []
887 for r in xrange(base + 1, rev + 1):
887 for r in xrange(base + 1, rev + 1):
888 bins.append(self.chunk(r, df=df))
888 bins.append(self.chunk(r, df=df))
889
889
890 text = self.patches(text, bins)
890 text = self.patches(text, bins)
891
891
892 p1, p2 = self.parents(node)
892 p1, p2 = self.parents(node)
893 if node != hash(text, p1, p2):
893 if node != hash(text, p1, p2):
894 raise RevlogError(_("integrity check failed on %s:%d")
894 raise RevlogError(_("integrity check failed on %s:%d")
895 % (self.datafile, rev))
895 % (self.datafile, rev))
896
896
897 self.cache = (node, rev, text)
897 self.cache = (node, rev, text)
898 return text
898 return text
899
899
900 def checkinlinesize(self, tr, fp=None):
900 def checkinlinesize(self, tr, fp=None):
901 if not self.inlinedata():
901 if not self.inlinedata():
902 return
902 return
903 if not fp:
903 if not fp:
904 fp = self.opener(self.indexfile, 'r')
904 fp = self.opener(self.indexfile, 'r')
905 fp.seek(0, 2)
905 fp.seek(0, 2)
906 size = fp.tell()
906 size = fp.tell()
907 if size < 131072:
907 if size < 131072:
908 return
908 return
909 trinfo = tr.find(self.indexfile)
909 trinfo = tr.find(self.indexfile)
910 if trinfo == None:
910 if trinfo == None:
911 raise RevlogError(_("%s not found in the transaction" %
911 raise RevlogError(_("%s not found in the transaction" %
912 self.indexfile))
912 self.indexfile))
913
913
914 trindex = trinfo[2]
914 trindex = trinfo[2]
915 dataoff = self.start(trindex)
915 dataoff = self.start(trindex)
916
916
917 tr.add(self.datafile, dataoff)
917 tr.add(self.datafile, dataoff)
918 df = self.opener(self.datafile, 'w')
918 df = self.opener(self.datafile, 'w')
919 calc = struct.calcsize(self.indexformat)
919 calc = struct.calcsize(self.indexformat)
920 for r in xrange(self.count()):
920 for r in xrange(self.count()):
921 start = self.start(r) + (r + 1) * calc
921 start = self.start(r) + (r + 1) * calc
922 length = self.length(r)
922 length = self.length(r)
923 fp.seek(start)
923 fp.seek(start)
924 d = fp.read(length)
924 d = fp.read(length)
925 df.write(d)
925 df.write(d)
926 fp.close()
926 fp.close()
927 df.close()
927 df.close()
928 fp = self.opener(self.indexfile, 'w', atomictemp=True)
928 fp = self.opener(self.indexfile, 'w', atomictemp=True)
929 self.version &= ~(REVLOGNGINLINEDATA)
929 self.version &= ~(REVLOGNGINLINEDATA)
930 if self.count():
930 if self.count():
931 x = self.index[0]
931 x = self.index[0]
932 e = struct.pack(self.indexformat, *x)[4:]
932 e = struct.pack(self.indexformat, *x)[4:]
933 l = struct.pack(versionformat, self.version)
933 l = struct.pack(versionformat, self.version)
934 fp.write(l)
934 fp.write(l)
935 fp.write(e)
935 fp.write(e)
936
936
937 for i in xrange(1, self.count()):
937 for i in xrange(1, self.count()):
938 x = self.index[i]
938 x = self.index[i]
939 e = struct.pack(self.indexformat, *x)
939 e = struct.pack(self.indexformat, *x)
940 fp.write(e)
940 fp.write(e)
941
941
942 # if we don't call rename, the temp file will never replace the
942 # if we don't call rename, the temp file will never replace the
943 # real index
943 # real index
944 fp.rename()
944 fp.rename()
945
945
946 tr.replace(self.indexfile, trindex * calc)
946 tr.replace(self.indexfile, trindex * calc)
947 self.chunkcache = None
947 self.chunkcache = None
948
948
949 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
949 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
950 """add a revision to the log
950 """add a revision to the log
951
951
952 text - the revision data to add
952 text - the revision data to add
953 transaction - the transaction object used for rollback
953 transaction - the transaction object used for rollback
954 link - the linkrev data to add
954 link - the linkrev data to add
955 p1, p2 - the parent nodeids of the revision
955 p1, p2 - the parent nodeids of the revision
956 d - an optional precomputed delta
956 d - an optional precomputed delta
957 """
957 """
958 if text is None: text = ""
958 if text is None: text = ""
959 if p1 is None: p1 = self.tip()
959 if p1 is None: p1 = self.tip()
960 if p2 is None: p2 = nullid
960 if p2 is None: p2 = nullid
961
961
962 node = hash(text, p1, p2)
962 node = hash(text, p1, p2)
963
963
964 if node in self.nodemap:
964 if node in self.nodemap:
965 return node
965 return node
966
966
967 n = self.count()
967 n = self.count()
968 t = n - 1
968 t = n - 1
969
969
970 if n:
970 if n:
971 base = self.base(t)
971 base = self.base(t)
972 start = self.start(base)
972 start = self.start(base)
973 end = self.end(t)
973 end = self.end(t)
974 if not d:
974 if not d:
975 prev = self.revision(self.tip())
975 prev = self.revision(self.tip())
976 d = self.diff(prev, str(text))
976 d = self.diff(prev, text)
977 data = compress(d)
977 data = compress(d)
978 l = len(data[1]) + len(data[0])
978 l = len(data[1]) + len(data[0])
979 dist = end - start + l
979 dist = end - start + l
980
980
981 # full versions are inserted when the needed deltas
981 # full versions are inserted when the needed deltas
982 # become comparable to the uncompressed text
982 # become comparable to the uncompressed text
983 if not n or dist > len(text) * 2:
983 if not n or dist > len(text) * 2:
984 data = compress(text)
984 data = compress(text)
985 l = len(data[1]) + len(data[0])
985 l = len(data[1]) + len(data[0])
986 base = n
986 base = n
987 else:
987 else:
988 base = self.base(t)
988 base = self.base(t)
989
989
990 offset = 0
990 offset = 0
991 if t >= 0:
991 if t >= 0:
992 offset = self.end(t)
992 offset = self.end(t)
993
993
994 if self.version == REVLOGV0:
994 if self.version == REVLOGV0:
995 e = (offset, l, base, link, p1, p2, node)
995 e = (offset, l, base, link, p1, p2, node)
996 else:
996 else:
997 e = (self.offset_type(offset, 0), l, len(text),
997 e = (self.offset_type(offset, 0), l, len(text),
998 base, link, self.rev(p1), self.rev(p2), node)
998 base, link, self.rev(p1), self.rev(p2), node)
999
999
1000 self.index.append(e)
1000 self.index.append(e)
1001 self.nodemap[node] = n
1001 self.nodemap[node] = n
1002 entry = struct.pack(self.indexformat, *e)
1002 entry = struct.pack(self.indexformat, *e)
1003
1003
1004 if not self.inlinedata():
1004 if not self.inlinedata():
1005 transaction.add(self.datafile, offset)
1005 transaction.add(self.datafile, offset)
1006 transaction.add(self.indexfile, n * len(entry))
1006 transaction.add(self.indexfile, n * len(entry))
1007 f = self.opener(self.datafile, "a")
1007 f = self.opener(self.datafile, "a")
1008 if data[0]:
1008 if data[0]:
1009 f.write(data[0])
1009 f.write(data[0])
1010 f.write(data[1])
1010 f.write(data[1])
1011 f.close()
1011 f.close()
1012 f = self.opener(self.indexfile, "a")
1012 f = self.opener(self.indexfile, "a")
1013 else:
1013 else:
1014 f = self.opener(self.indexfile, "a+")
1014 f = self.opener(self.indexfile, "a+")
1015 f.seek(0, 2)
1015 f.seek(0, 2)
1016 transaction.add(self.indexfile, f.tell(), self.count() - 1)
1016 transaction.add(self.indexfile, f.tell(), self.count() - 1)
1017
1017
1018 if len(self.index) == 1 and self.version != REVLOGV0:
1018 if len(self.index) == 1 and self.version != REVLOGV0:
1019 l = struct.pack(versionformat, self.version)
1019 l = struct.pack(versionformat, self.version)
1020 f.write(l)
1020 f.write(l)
1021 entry = entry[4:]
1021 entry = entry[4:]
1022
1022
1023 f.write(entry)
1023 f.write(entry)
1024
1024
1025 if self.inlinedata():
1025 if self.inlinedata():
1026 f.write(data[0])
1026 f.write(data[0])
1027 f.write(data[1])
1027 f.write(data[1])
1028 self.checkinlinesize(transaction, f)
1028 self.checkinlinesize(transaction, f)
1029
1029
1030 self.cache = (node, n, text)
1030 self.cache = (node, n, text)
1031 return node
1031 return node
1032
1032
1033 def ancestor(self, a, b):
1033 def ancestor(self, a, b):
1034 """calculate the least common ancestor of nodes a and b"""
1034 """calculate the least common ancestor of nodes a and b"""
1035
1035
1036 def parents(rev):
1036 def parents(rev):
1037 return [p for p in self.parentrevs(rev) if p != -1]
1037 return [p for p in self.parentrevs(rev) if p != -1]
1038
1038
1039 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1039 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1040 if c is None:
1040 if c is None:
1041 return nullid
1041 return nullid
1042
1042
1043 return self.node(c)
1043 return self.node(c)
1044
1044
1045 def group(self, nodelist, lookup, infocollect=None):
1045 def group(self, nodelist, lookup, infocollect=None):
1046 """calculate a delta group
1046 """calculate a delta group
1047
1047
1048 Given a list of changeset revs, return a set of deltas and
1048 Given a list of changeset revs, return a set of deltas and
1049 metadata corresponding to nodes. the first delta is
1049 metadata corresponding to nodes. the first delta is
1050 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1050 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1051 have this parent as it has all history before these
1051 have this parent as it has all history before these
1052 changesets. parent is parent[0]
1052 changesets. parent is parent[0]
1053 """
1053 """
1054 revs = [self.rev(n) for n in nodelist]
1054 revs = [self.rev(n) for n in nodelist]
1055
1055
1056 # if we don't have any revisions touched by these changesets, bail
1056 # if we don't have any revisions touched by these changesets, bail
1057 if not revs:
1057 if not revs:
1058 yield changegroup.closechunk()
1058 yield changegroup.closechunk()
1059 return
1059 return
1060
1060
1061 # add the parent of the first rev
1061 # add the parent of the first rev
1062 p = self.parents(self.node(revs[0]))[0]
1062 p = self.parents(self.node(revs[0]))[0]
1063 revs.insert(0, self.rev(p))
1063 revs.insert(0, self.rev(p))
1064
1064
1065 # build deltas
1065 # build deltas
1066 for d in xrange(0, len(revs) - 1):
1066 for d in xrange(0, len(revs) - 1):
1067 a, b = revs[d], revs[d + 1]
1067 a, b = revs[d], revs[d + 1]
1068 nb = self.node(b)
1068 nb = self.node(b)
1069
1069
1070 if infocollect is not None:
1070 if infocollect is not None:
1071 infocollect(nb)
1071 infocollect(nb)
1072
1072
1073 d = self.revdiff(a, b)
1073 d = self.revdiff(a, b)
1074 p = self.parents(nb)
1074 p = self.parents(nb)
1075 meta = nb + p[0] + p[1] + lookup(nb)
1075 meta = nb + p[0] + p[1] + lookup(nb)
1076 yield changegroup.genchunk("%s%s" % (meta, d))
1076 yield changegroup.genchunk("%s%s" % (meta, d))
1077
1077
1078 yield changegroup.closechunk()
1078 yield changegroup.closechunk()
1079
1079
1080 def addgroup(self, revs, linkmapper, transaction, unique=0):
1080 def addgroup(self, revs, linkmapper, transaction, unique=0):
1081 """
1081 """
1082 add a delta group
1082 add a delta group
1083
1083
1084 given a set of deltas, add them to the revision log. the
1084 given a set of deltas, add them to the revision log. the
1085 first delta is against its parent, which should be in our
1085 first delta is against its parent, which should be in our
1086 log, the rest are against the previous delta.
1086 log, the rest are against the previous delta.
1087 """
1087 """
1088
1088
1089 #track the base of the current delta log
1089 #track the base of the current delta log
1090 r = self.count()
1090 r = self.count()
1091 t = r - 1
1091 t = r - 1
1092 node = None
1092 node = None
1093
1093
1094 base = prev = -1
1094 base = prev = -1
1095 start = end = textlen = 0
1095 start = end = textlen = 0
1096 if r:
1096 if r:
1097 end = self.end(t)
1097 end = self.end(t)
1098
1098
1099 ifh = self.opener(self.indexfile, "a+")
1099 ifh = self.opener(self.indexfile, "a+")
1100 ifh.seek(0, 2)
1100 ifh.seek(0, 2)
1101 transaction.add(self.indexfile, ifh.tell(), self.count())
1101 transaction.add(self.indexfile, ifh.tell(), self.count())
1102 if self.inlinedata():
1102 if self.inlinedata():
1103 dfh = None
1103 dfh = None
1104 else:
1104 else:
1105 transaction.add(self.datafile, end)
1105 transaction.add(self.datafile, end)
1106 dfh = self.opener(self.datafile, "a")
1106 dfh = self.opener(self.datafile, "a")
1107
1107
1108 # loop through our set of deltas
1108 # loop through our set of deltas
1109 chain = None
1109 chain = None
1110 for chunk in revs:
1110 for chunk in revs:
1111 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1111 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1112 link = linkmapper(cs)
1112 link = linkmapper(cs)
1113 if node in self.nodemap:
1113 if node in self.nodemap:
1114 # this can happen if two branches make the same change
1114 # this can happen if two branches make the same change
1115 # if unique:
1115 # if unique:
1116 # raise RevlogError(_("already have %s") % hex(node[:4]))
1116 # raise RevlogError(_("already have %s") % hex(node[:4]))
1117 chain = node
1117 chain = node
1118 continue
1118 continue
1119 delta = chunk[80:]
1119 delta = chunk[80:]
1120
1120
1121 for p in (p1, p2):
1121 for p in (p1, p2):
1122 if not p in self.nodemap:
1122 if not p in self.nodemap:
1123 raise RevlogError(_("unknown parent %s") % short(p))
1123 raise RevlogError(_("unknown parent %s") % short(p))
1124
1124
1125 if not chain:
1125 if not chain:
1126 # retrieve the parent revision of the delta chain
1126 # retrieve the parent revision of the delta chain
1127 chain = p1
1127 chain = p1
1128 if not chain in self.nodemap:
1128 if not chain in self.nodemap:
1129 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1129 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1130
1130
1131 # full versions are inserted when the needed deltas become
1131 # full versions are inserted when the needed deltas become
1132 # comparable to the uncompressed text or when the previous
1132 # comparable to the uncompressed text or when the previous
1133 # version is not the one we have a delta against. We use
1133 # version is not the one we have a delta against. We use
1134 # the size of the previous full rev as a proxy for the
1134 # the size of the previous full rev as a proxy for the
1135 # current size.
1135 # current size.
1136
1136
1137 if chain == prev:
1137 if chain == prev:
1138 tempd = compress(delta)
1138 tempd = compress(delta)
1139 cdelta = tempd[0] + tempd[1]
1139 cdelta = tempd[0] + tempd[1]
1140 textlen = mdiff.patchedsize(textlen, delta)
1140 textlen = mdiff.patchedsize(textlen, delta)
1141
1141
1142 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1142 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1143 # flush our writes here so we can read it in revision
1143 # flush our writes here so we can read it in revision
1144 if dfh:
1144 if dfh:
1145 dfh.flush()
1145 dfh.flush()
1146 ifh.flush()
1146 ifh.flush()
1147 text = self.revision(chain)
1147 text = self.revision(chain)
1148 text = self.patches(text, [delta])
1148 text = self.patches(text, [delta])
1149 chk = self.addrevision(text, transaction, link, p1, p2)
1149 chk = self.addrevision(text, transaction, link, p1, p2)
1150 if chk != node:
1150 if chk != node:
1151 raise RevlogError(_("consistency error adding group"))
1151 raise RevlogError(_("consistency error adding group"))
1152 textlen = len(text)
1152 textlen = len(text)
1153 else:
1153 else:
1154 if self.version == REVLOGV0:
1154 if self.version == REVLOGV0:
1155 e = (end, len(cdelta), base, link, p1, p2, node)
1155 e = (end, len(cdelta), base, link, p1, p2, node)
1156 else:
1156 else:
1157 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1157 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1158 link, self.rev(p1), self.rev(p2), node)
1158 link, self.rev(p1), self.rev(p2), node)
1159 self.index.append(e)
1159 self.index.append(e)
1160 self.nodemap[node] = r
1160 self.nodemap[node] = r
1161 if self.inlinedata():
1161 if self.inlinedata():
1162 ifh.write(struct.pack(self.indexformat, *e))
1162 ifh.write(struct.pack(self.indexformat, *e))
1163 ifh.write(cdelta)
1163 ifh.write(cdelta)
1164 self.checkinlinesize(transaction, ifh)
1164 self.checkinlinesize(transaction, ifh)
1165 if not self.inlinedata():
1165 if not self.inlinedata():
1166 dfh = self.opener(self.datafile, "a")
1166 dfh = self.opener(self.datafile, "a")
1167 ifh = self.opener(self.indexfile, "a")
1167 ifh = self.opener(self.indexfile, "a")
1168 else:
1168 else:
1169 if not dfh:
1169 if not dfh:
1170 # addrevision switched from inline to conventional
1170 # addrevision switched from inline to conventional
1171 # reopen the index
1171 # reopen the index
1172 dfh = self.opener(self.datafile, "a")
1172 dfh = self.opener(self.datafile, "a")
1173 ifh = self.opener(self.indexfile, "a")
1173 ifh = self.opener(self.indexfile, "a")
1174 dfh.write(cdelta)
1174 dfh.write(cdelta)
1175 ifh.write(struct.pack(self.indexformat, *e))
1175 ifh.write(struct.pack(self.indexformat, *e))
1176
1176
1177 t, r, chain, prev = r, r + 1, node, node
1177 t, r, chain, prev = r, r + 1, node, node
1178 base = self.base(t)
1178 base = self.base(t)
1179 start = self.start(base)
1179 start = self.start(base)
1180 end = self.end(t)
1180 end = self.end(t)
1181
1181
1182 return node
1182 return node
1183
1183
1184 def strip(self, rev, minlink):
1184 def strip(self, rev, minlink):
1185 if self.count() == 0 or rev >= self.count():
1185 if self.count() == 0 or rev >= self.count():
1186 return
1186 return
1187
1187
1188 if isinstance(self.index, lazyindex):
1188 if isinstance(self.index, lazyindex):
1189 self.loadindexmap()
1189 self.loadindexmap()
1190
1190
1191 # When stripping away a revision, we need to make sure it
1191 # When stripping away a revision, we need to make sure it
1192 # does not actually belong to an older changeset.
1192 # does not actually belong to an older changeset.
1193 # The minlink parameter defines the oldest revision
1193 # The minlink parameter defines the oldest revision
1194 # we're allowed to strip away.
1194 # we're allowed to strip away.
1195 while minlink > self.index[rev][-4]:
1195 while minlink > self.index[rev][-4]:
1196 rev += 1
1196 rev += 1
1197 if rev >= self.count():
1197 if rev >= self.count():
1198 return
1198 return
1199
1199
1200 # first truncate the files on disk
1200 # first truncate the files on disk
1201 end = self.start(rev)
1201 end = self.start(rev)
1202 if not self.inlinedata():
1202 if not self.inlinedata():
1203 df = self.opener(self.datafile, "a")
1203 df = self.opener(self.datafile, "a")
1204 df.truncate(end)
1204 df.truncate(end)
1205 end = rev * struct.calcsize(self.indexformat)
1205 end = rev * struct.calcsize(self.indexformat)
1206 else:
1206 else:
1207 end += rev * struct.calcsize(self.indexformat)
1207 end += rev * struct.calcsize(self.indexformat)
1208
1208
1209 indexf = self.opener(self.indexfile, "a")
1209 indexf = self.opener(self.indexfile, "a")
1210 indexf.truncate(end)
1210 indexf.truncate(end)
1211
1211
1212 # then reset internal state in memory to forget those revisions
1212 # then reset internal state in memory to forget those revisions
1213 self.cache = None
1213 self.cache = None
1214 self.chunkcache = None
1214 self.chunkcache = None
1215 for x in xrange(rev, self.count()):
1215 for x in xrange(rev, self.count()):
1216 del self.nodemap[self.node(x)]
1216 del self.nodemap[self.node(x)]
1217
1217
1218 del self.index[rev:]
1218 del self.index[rev:]
1219
1219
1220 def checksize(self):
1220 def checksize(self):
1221 expected = 0
1221 expected = 0
1222 if self.count():
1222 if self.count():
1223 expected = self.end(self.count() - 1)
1223 expected = self.end(self.count() - 1)
1224
1224
1225 try:
1225 try:
1226 f = self.opener(self.datafile)
1226 f = self.opener(self.datafile)
1227 f.seek(0, 2)
1227 f.seek(0, 2)
1228 actual = f.tell()
1228 actual = f.tell()
1229 dd = actual - expected
1229 dd = actual - expected
1230 except IOError, inst:
1230 except IOError, inst:
1231 if inst.errno != errno.ENOENT:
1231 if inst.errno != errno.ENOENT:
1232 raise
1232 raise
1233 dd = 0
1233 dd = 0
1234
1234
1235 try:
1235 try:
1236 f = self.opener(self.indexfile)
1236 f = self.opener(self.indexfile)
1237 f.seek(0, 2)
1237 f.seek(0, 2)
1238 actual = f.tell()
1238 actual = f.tell()
1239 s = struct.calcsize(self.indexformat)
1239 s = struct.calcsize(self.indexformat)
1240 i = actual / s
1240 i = actual / s
1241 di = actual - (i * s)
1241 di = actual - (i * s)
1242 if self.inlinedata():
1242 if self.inlinedata():
1243 databytes = 0
1243 databytes = 0
1244 for r in xrange(self.count()):
1244 for r in xrange(self.count()):
1245 databytes += self.length(r)
1245 databytes += self.length(r)
1246 dd = 0
1246 dd = 0
1247 di = actual - self.count() * s - databytes
1247 di = actual - self.count() * s - databytes
1248 except IOError, inst:
1248 except IOError, inst:
1249 if inst.errno != errno.ENOENT:
1249 if inst.errno != errno.ENOENT:
1250 raise
1250 raise
1251 di = 0
1251 di = 0
1252
1252
1253 return (dd, di)
1253 return (dd, di)
1254
1254
1255
1255
General Comments 0
You need to be logged in to leave comments. Login now