##// END OF EJS Templates
Fill in the uncompressed size during revlog.addgroup...
mason@suse.com -
r2078:441ea218 default
parent child Browse files
Show More
@@ -1,195 +1,196 b''
1 1 # mdiff.py - diff and patch routines for mercurial
2 2 #
3 3 # Copyright 2005 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms
6 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 8 from demandload import demandload
9 9 import struct, bdiff, util, mpatch
10 10 demandload(globals(), "re")
11 11
12 12
13 13 def unidiff(a, ad, b, bd, fn, r=None, text=False,
14 14 showfunc=False, ignorews=False):
15 15
16 16 if not a and not b: return ""
17 17 epoch = util.datestr((0, 0))
18 18
19 19 if not text and (util.binary(a) or util.binary(b)):
20 20 l = ['Binary file %s has changed\n' % fn]
21 21 elif not a:
22 22 b = b.splitlines(1)
23 23 if a is None:
24 24 l1 = "--- %s\t%s\n" % ("/dev/null", epoch)
25 25 else:
26 26 l1 = "--- %s\t%s\n" % ("a/" + fn, ad)
27 27 l2 = "+++ %s\t%s\n" % ("b/" + fn, bd)
28 28 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
29 29 l = [l1, l2, l3] + ["+" + e for e in b]
30 30 elif not b:
31 31 a = a.splitlines(1)
32 32 l1 = "--- %s\t%s\n" % ("a/" + fn, ad)
33 33 if b is None:
34 34 l2 = "+++ %s\t%s\n" % ("/dev/null", epoch)
35 35 else:
36 36 l2 = "+++ %s\t%s\n" % ("b/" + fn, bd)
37 37 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
38 38 l = [l1, l2, l3] + ["-" + e for e in a]
39 39 else:
40 40 al = a.splitlines(1)
41 41 bl = b.splitlines(1)
42 42 l = list(bunidiff(a, b, al, bl, "a/" + fn, "b/" + fn,
43 43 showfunc=showfunc, ignorews=ignorews))
44 44 if not l: return ""
45 45 # difflib uses a space, rather than a tab
46 46 l[0] = "%s\t%s\n" % (l[0][:-2], ad)
47 47 l[1] = "%s\t%s\n" % (l[1][:-2], bd)
48 48
49 49 for ln in xrange(len(l)):
50 50 if l[ln][-1] != '\n':
51 51 l[ln] += "\n\ No newline at end of file\n"
52 52
53 53 if r:
54 54 l.insert(0, "diff %s %s\n" %
55 55 (' '.join(["-r %s" % rev for rev in r]), fn))
56 56
57 57 return "".join(l)
58 58
59 59 # somewhat self contained replacement for difflib.unified_diff
60 60 # t1 and t2 are the text to be diffed
61 61 # l1 and l2 are the text broken up into lines
62 62 # header1 and header2 are the filenames for the diff output
63 63 # context is the number of context lines
64 64 # showfunc enables diff -p output
65 65 # ignorews ignores all whitespace changes in the diff
66 66 def bunidiff(t1, t2, l1, l2, header1, header2, context=3, showfunc=False,
67 67 ignorews=False):
68 68 def contextend(l, len):
69 69 ret = l + context
70 70 if ret > len:
71 71 ret = len
72 72 return ret
73 73
74 74 def contextstart(l):
75 75 ret = l - context
76 76 if ret < 0:
77 77 return 0
78 78 return ret
79 79
80 80 def yieldhunk(hunk, header):
81 81 if header:
82 82 for x in header:
83 83 yield x
84 84 (astart, a2, bstart, b2, delta) = hunk
85 85 aend = contextend(a2, len(l1))
86 86 alen = aend - astart
87 87 blen = b2 - bstart + aend - a2
88 88
89 89 func = ""
90 90 if showfunc:
91 91 # walk backwards from the start of the context
92 92 # to find a line starting with an alphanumeric char.
93 93 for x in xrange(astart, -1, -1):
94 94 t = l1[x].rstrip()
95 95 if funcre.match(t):
96 96 func = ' ' + t[:40]
97 97 break
98 98
99 99 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
100 100 bstart + 1, blen, func)
101 101 for x in delta:
102 102 yield x
103 103 for x in xrange(a2, aend):
104 104 yield ' ' + l1[x]
105 105
106 106 header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ]
107 107
108 108 if showfunc:
109 109 funcre = re.compile('\w')
110 110 if ignorews:
111 111 wsre = re.compile('[ \t]')
112 112
113 113 # bdiff.blocks gives us the matching sequences in the files. The loop
114 114 # below finds the spaces between those matching sequences and translates
115 115 # them into diff output.
116 116 #
117 117 diff = bdiff.blocks(t1, t2)
118 118 hunk = None
119 119 for i in xrange(len(diff)):
120 120 # The first match is special.
121 121 # we've either found a match starting at line 0 or a match later
122 122 # in the file. If it starts later, old and new below will both be
123 123 # empty and we'll continue to the next match.
124 124 if i > 0:
125 125 s = diff[i-1]
126 126 else:
127 127 s = [0, 0, 0, 0]
128 128 delta = []
129 129 s1 = diff[i]
130 130 a1 = s[1]
131 131 a2 = s1[0]
132 132 b1 = s[3]
133 133 b2 = s1[2]
134 134
135 135 old = l1[a1:a2]
136 136 new = l2[b1:b2]
137 137
138 138 # bdiff sometimes gives huge matches past eof, this check eats them,
139 139 # and deals with the special first match case described above
140 140 if not old and not new:
141 141 continue
142 142
143 143 if ignorews:
144 144 wsold = wsre.sub('', "".join(old))
145 145 wsnew = wsre.sub('', "".join(new))
146 146 if wsold == wsnew:
147 147 continue
148 148
149 149 astart = contextstart(a1)
150 150 bstart = contextstart(b1)
151 151 prev = None
152 152 if hunk:
153 153 # join with the previous hunk if it falls inside the context
154 154 if astart < hunk[1] + context + 1:
155 155 prev = hunk
156 156 astart = hunk[1]
157 157 bstart = hunk[3]
158 158 else:
159 159 for x in yieldhunk(hunk, header):
160 160 yield x
161 161 # we only want to yield the header if the files differ, and
162 162 # we only want to yield it once.
163 163 header = None
164 164 if prev:
165 165 # we've joined the previous hunk, record the new ending points.
166 166 hunk[1] = a2
167 167 hunk[3] = b2
168 168 delta = hunk[4]
169 169 else:
170 170 # create a new hunk
171 171 hunk = [ astart, a2, bstart, b2, delta ]
172 172
173 173 delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ]
174 174 delta[len(delta):] = [ '-' + x for x in old ]
175 175 delta[len(delta):] = [ '+' + x for x in new ]
176 176
177 177 if hunk:
178 178 for x in yieldhunk(hunk, header):
179 179 yield x
180 180
181 181 def patchtext(bin):
182 182 pos = 0
183 183 t = []
184 184 while pos < len(bin):
185 185 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
186 186 pos += 12
187 187 t.append(bin[pos:pos + l])
188 188 pos += l
189 189 return "".join(t)
190 190
191 191 def patch(a, bin):
192 192 return mpatch.patches(a, [bin])
193 193
194 194 patches = mpatch.patches
195 patchedsize = mpatch.patchedsize
195 196 textdiff = bdiff.bdiff
@@ -1,368 +1,404 b''
1 1 /*
2 2 mpatch.c - efficient binary patching for Mercurial
3 3
4 4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 5 size of the output and n is the number of patches.
6 6
7 7 Given a list of binary patches, it unpacks each into a hunk list,
8 8 then combines the hunk lists with a treewise recursion to form a
9 9 single hunk list. This hunk list is then applied to the original
10 10 text.
11 11
12 12 The text (or binary) fragments are copied directly from their source
13 13 Python objects into a preallocated output string to avoid the
14 14 allocation of intermediate Python objects. Working memory is about 2x
15 15 the total number of hunks.
16 16
17 17 Copyright 2005 Matt Mackall <mpm@selenic.com>
18 18
19 19 This software may be used and distributed according to the terms
20 20 of the GNU General Public License, incorporated herein by reference.
21 21 */
22 22
23 23 #include <Python.h>
24 24 #include <stdlib.h>
25 25 #include <string.h>
26 26 #ifdef _WIN32
27 27 #ifdef _MSC_VER
28 28 #define inline __inline
29 29 typedef unsigned long uint32_t;
30 30 #else
31 31 #include <stdint.h>
32 32 #endif
33 33 static uint32_t ntohl(uint32_t x)
34 34 {
35 35 return ((x & 0x000000ffUL) << 24) |
36 36 ((x & 0x0000ff00UL) << 8) |
37 37 ((x & 0x00ff0000UL) >> 8) |
38 38 ((x & 0xff000000UL) >> 24);
39 39 }
40 40 #else
41 41 #include <sys/types.h>
42 42 #include <arpa/inet.h>
43 43 #endif
44 44
45 45 static char mpatch_doc[] = "Efficient binary patching.";
46 46 static PyObject *mpatch_Error;
47 47
48 48 struct frag {
49 49 int start, end, len;
50 50 char *data;
51 51 };
52 52
53 53 struct flist {
54 54 struct frag *base, *head, *tail;
55 55 };
56 56
57 57 static struct flist *lalloc(int size)
58 58 {
59 59 struct flist *a = NULL;
60 60
61 61 a = (struct flist *)malloc(sizeof(struct flist));
62 62 if (a) {
63 63 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
64 64 if (!a->base) {
65 65 free(a);
66 66 a = NULL;
67 67 } else
68 68 a->head = a->tail = a->base;
69 69 return a;
70 70 }
71 71 if (!PyErr_Occurred())
72 72 PyErr_NoMemory();
73 73 return NULL;
74 74 }
75 75
76 76 static void lfree(struct flist *a)
77 77 {
78 78 if (a) {
79 79 free(a->base);
80 80 free(a);
81 81 }
82 82 }
83 83
84 84 static int lsize(struct flist *a)
85 85 {
86 86 return a->tail - a->head;
87 87 }
88 88
89 89 /* move hunks in source that are less cut to dest, compensating
90 90 for changes in offset. the last hunk may be split if necessary.
91 91 */
92 92 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
93 93 {
94 94 struct frag *d = dest->tail, *s = src->head;
95 95 int postend, c, l;
96 96
97 97 while (s != src->tail) {
98 98 if (s->start + offset >= cut)
99 99 break; /* we've gone far enough */
100 100
101 101 postend = offset + s->start + s->len;
102 102 if (postend <= cut) {
103 103 /* save this hunk */
104 104 offset += s->start + s->len - s->end;
105 105 *d++ = *s++;
106 106 }
107 107 else {
108 108 /* break up this hunk */
109 109 c = cut - offset;
110 110 if (s->end < c)
111 111 c = s->end;
112 112 l = cut - offset - s->start;
113 113 if (s->len < l)
114 114 l = s->len;
115 115
116 116 offset += s->start + l - c;
117 117
118 118 d->start = s->start;
119 119 d->end = c;
120 120 d->len = l;
121 121 d->data = s->data;
122 122 d++;
123 123 s->start = c;
124 124 s->len = s->len - l;
125 125 s->data = s->data + l;
126 126
127 127 break;
128 128 }
129 129 }
130 130
131 131 dest->tail = d;
132 132 src->head = s;
133 133 return offset;
134 134 }
135 135
136 136 /* like gather, but with no output list */
137 137 static int discard(struct flist *src, int cut, int offset)
138 138 {
139 139 struct frag *s = src->head;
140 140 int postend, c, l;
141 141
142 142 while (s != src->tail) {
143 143 if (s->start + offset >= cut)
144 144 break;
145 145
146 146 postend = offset + s->start + s->len;
147 147 if (postend <= cut) {
148 148 offset += s->start + s->len - s->end;
149 149 s++;
150 150 }
151 151 else {
152 152 c = cut - offset;
153 153 if (s->end < c)
154 154 c = s->end;
155 155 l = cut - offset - s->start;
156 156 if (s->len < l)
157 157 l = s->len;
158 158
159 159 offset += s->start + l - c;
160 160 s->start = c;
161 161 s->len = s->len - l;
162 162 s->data = s->data + l;
163 163
164 164 break;
165 165 }
166 166 }
167 167
168 168 src->head = s;
169 169 return offset;
170 170 }
171 171
172 172 /* combine hunk lists a and b, while adjusting b for offset changes in a/
173 173 this deletes a and b and returns the resultant list. */
174 174 static struct flist *combine(struct flist *a, struct flist *b)
175 175 {
176 176 struct flist *c = NULL;
177 177 struct frag *bh, *ct;
178 178 int offset = 0, post;
179 179
180 180 if (a && b)
181 181 c = lalloc((lsize(a) + lsize(b)) * 2);
182 182
183 183 if (c) {
184 184
185 185 for (bh = b->head; bh != b->tail; bh++) {
186 186 /* save old hunks */
187 187 offset = gather(c, a, bh->start, offset);
188 188
189 189 /* discard replaced hunks */
190 190 post = discard(a, bh->end, offset);
191 191
192 192 /* insert new hunk */
193 193 ct = c->tail;
194 194 ct->start = bh->start - offset;
195 195 ct->end = bh->end - post;
196 196 ct->len = bh->len;
197 197 ct->data = bh->data;
198 198 c->tail++;
199 199 offset = post;
200 200 }
201 201
202 202 /* hold on to tail from a */
203 203 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
204 204 c->tail += lsize(a);
205 205 }
206 206
207 207 lfree(a);
208 208 lfree(b);
209 209 return c;
210 210 }
211 211
212 212 /* decode a binary patch into a hunk list */
213 213 static struct flist *decode(char *bin, int len)
214 214 {
215 215 struct flist *l;
216 216 struct frag *lt;
217 217 char *end = bin + len;
218 218 char decode[12]; /* for dealing with alignment issues */
219 219
220 220 /* assume worst case size, we won't have many of these lists */
221 221 l = lalloc(len / 12);
222 222 if (!l)
223 223 return NULL;
224 224
225 225 lt = l->tail;
226 226
227 227 while (bin < end) {
228 228 memcpy(decode, bin, 12);
229 229 lt->start = ntohl(*(uint32_t *)decode);
230 230 lt->end = ntohl(*(uint32_t *)(decode + 4));
231 231 lt->len = ntohl(*(uint32_t *)(decode + 8));
232 232 lt->data = bin + 12;
233 233 bin += 12 + lt->len;
234 234 lt++;
235 235 }
236 236
237 237 if (bin != end) {
238 238 if (!PyErr_Occurred())
239 239 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
240 240 lfree(l);
241 241 return NULL;
242 242 }
243 243
244 244 l->tail = lt;
245 245 return l;
246 246 }
247 247
248 248 /* calculate the size of resultant text */
249 249 static int calcsize(int len, struct flist *l)
250 250 {
251 251 int outlen = 0, last = 0;
252 252 struct frag *f = l->head;
253 253
254 254 while (f != l->tail) {
255 255 if (f->start < last || f->end > len) {
256 256 if (!PyErr_Occurred())
257 257 PyErr_SetString(mpatch_Error,
258 258 "invalid patch");
259 259 return -1;
260 260 }
261 261 outlen += f->start - last;
262 262 last = f->end;
263 263 outlen += f->len;
264 264 f++;
265 265 }
266 266
267 267 outlen += len - last;
268 268 return outlen;
269 269 }
270 270
271 271 static int apply(char *buf, char *orig, int len, struct flist *l)
272 272 {
273 273 struct frag *f = l->head;
274 274 int last = 0;
275 275 char *p = buf;
276 276
277 277 while (f != l->tail) {
278 278 if (f->start < last || f->end > len) {
279 279 if (!PyErr_Occurred())
280 280 PyErr_SetString(mpatch_Error,
281 281 "invalid patch");
282 282 return 0;
283 283 }
284 284 memcpy(p, orig + last, f->start - last);
285 285 p += f->start - last;
286 286 memcpy(p, f->data, f->len);
287 287 last = f->end;
288 288 p += f->len;
289 289 f++;
290 290 }
291 291 memcpy(p, orig + last, len - last);
292 292 return 1;
293 293 }
294 294
295 295 /* recursively generate a patch of all bins between start and end */
296 296 static struct flist *fold(PyObject *bins, int start, int end)
297 297 {
298 298 int len;
299 299
300 300 if (start + 1 == end) {
301 301 /* trivial case, output a decoded list */
302 302 PyObject *tmp = PyList_GetItem(bins, start);
303 303 if (!tmp)
304 304 return NULL;
305 305 return decode(PyString_AsString(tmp), PyString_Size(tmp));
306 306 }
307 307
308 308 /* divide and conquer, memory management is elsewhere */
309 309 len = (end - start) / 2;
310 310 return combine(fold(bins, start, start + len),
311 311 fold(bins, start + len, end));
312 312 }
313 313
314 314 static PyObject *
315 315 patches(PyObject *self, PyObject *args)
316 316 {
317 317 PyObject *text, *bins, *result;
318 318 struct flist *patch;
319 319 char *in, *out;
320 320 int len, outlen;
321 321
322 322 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
323 323 return NULL;
324 324
325 325 len = PyList_Size(bins);
326 326 if (!len) {
327 327 /* nothing to do */
328 328 Py_INCREF(text);
329 329 return text;
330 330 }
331 331
332 332 patch = fold(bins, 0, len);
333 333 if (!patch)
334 334 return NULL;
335 335
336 336 outlen = calcsize(PyString_Size(text), patch);
337 337 if (outlen < 0) {
338 338 result = NULL;
339 339 goto cleanup;
340 340 }
341 341 result = PyString_FromStringAndSize(NULL, outlen);
342 342 if (!result) {
343 343 result = NULL;
344 344 goto cleanup;
345 345 }
346 346 in = PyString_AsString(text);
347 347 out = PyString_AsString(result);
348 348 if (!apply(out, in, PyString_Size(text), patch)) {
349 349 Py_DECREF(result);
350 350 result = NULL;
351 351 }
352 352 cleanup:
353 353 lfree(patch);
354 354 return result;
355 355 }
356 356
357 /* calculate size of a patched file directly */
358 static PyObject *
359 patchedsize(PyObject *self, PyObject *args)
360 {
361 long orig, start, end, len, outlen = 0, last = 0;
362 int patchlen;
363 char *bin, *binend;
364 char decode[12]; /* for dealing with alignment issues */
365
366 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
367 return NULL;
368
369 binend = bin + patchlen;
370
371 while (bin < binend) {
372 memcpy(decode, bin, 12);
373 start = ntohl(*(uint32_t *)decode);
374 end = ntohl(*(uint32_t *)(decode + 4));
375 len = ntohl(*(uint32_t *)(decode + 8));
376 bin += 12 + len;
377 outlen += start - last;
378 last = end;
379 outlen += len;
380 }
381
382 if (bin != binend) {
383 if (!PyErr_Occurred())
384 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
385 return NULL;
386 }
387
388 outlen += orig - last;
389 return Py_BuildValue("l", outlen);
390 }
391
357 392 static PyMethodDef methods[] = {
358 393 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
394 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
359 395 {NULL, NULL}
360 396 };
361 397
362 398 PyMODINIT_FUNC
363 399 initmpatch(void)
364 400 {
365 401 Py_InitModule3("mpatch", methods, mpatch_doc);
366 402 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
367 403 }
368 404
@@ -1,1068 +1,1101 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
17 17 demandload(globals(), "sha struct zlib")
18 18
19 19 # revlog version strings
20 20 REVLOGV0 = 0
21 21 REVLOGNG = 1
22 22
23 23 # revlog flags
24 24 REVLOGNGINLINEDATA = (1 << 16)
25 25
26 26 def flagstr(flag):
27 27 if flag == "inline":
28 28 return REVLOGNGINLINEDATA
29 29 raise RevlogError(_("unknown revlog flag %s" % flag))
30 30
31 31 def hash(text, p1, p2):
32 32 """generate a hash from the given text and its parent hashes
33 33
34 34 This hash combines both the current file contents and its history
35 35 in a manner that makes it easy to distinguish nodes with the same
36 36 content in the revision graph.
37 37 """
38 38 l = [p1, p2]
39 39 l.sort()
40 40 s = sha.new(l[0])
41 41 s.update(l[1])
42 42 s.update(text)
43 43 return s.digest()
44 44
45 45 def compress(text):
46 46 """ generate a possibly-compressed representation of text """
47 47 if not text: return ("", text)
48 48 if len(text) < 44:
49 49 if text[0] == '\0': return ("", text)
50 50 return ('u', text)
51 51 bin = zlib.compress(text)
52 52 if len(bin) > len(text):
53 53 if text[0] == '\0': return ("", text)
54 54 return ('u', text)
55 55 return ("", bin)
56 56
57 57 def decompress(bin):
58 58 """ decompress the given input """
59 59 if not bin: return bin
60 60 t = bin[0]
61 61 if t == '\0': return bin
62 62 if t == 'x': return zlib.decompress(bin)
63 63 if t == 'u': return bin[1:]
64 64 raise RevlogError(_("unknown compression type %r") % t)
65 65
66 66 indexformatv0 = ">4l20s20s20s"
67 67 # index ng:
68 68 # 6 bytes offset
69 69 # 2 bytes flags
70 70 # 4 bytes compressed length
71 71 # 4 bytes uncompressed length
72 72 # 4 bytes: base rev
73 73 # 4 bytes link rev
74 74 # 4 bytes parent 1 rev
75 75 # 4 bytes parent 2 rev
76 76 # 32 bytes: nodeid
77 77 indexformatng = ">Qiiiiii20s12x"
78 78 versionformat = ">i"
79 79
80 80 class lazyparser(object):
81 81 """
82 82 this class avoids the need to parse the entirety of large indices
83 83
84 84 By default we parse and load 1000 entries at a time.
85 85
86 86 If no position is specified, we load the whole index, and replace
87 87 the lazy objects in revlog with the underlying objects for
88 88 efficiency in cases where we look at most of the nodes.
89 89 """
90 90 def __init__(self, data, revlog, indexformat):
91 91 self.data = data
92 92 self.s = struct.calcsize(indexformat)
93 93 self.indexformat = indexformat
94 94 self.l = len(data)/self.s
95 95 self.index = [None] * self.l
96 96 self.map = {nullid: -1}
97 97 self.all = 0
98 98 self.revlog = revlog
99 99
100 100 def load(self, pos=None):
101 101 if self.all: return
102 102 if pos is not None:
103 103 block = pos / 1000
104 104 i = block * 1000
105 105 end = min(self.l, i + 1000)
106 106 else:
107 107 self.all = 1
108 108 i = 0
109 109 end = self.l
110 110 self.revlog.index = self.index
111 111 self.revlog.nodemap = self.map
112 112
113 113 while i < end:
114 114 if not self.index[i]:
115 115 d = self.data[i * self.s: (i + 1) * self.s]
116 116 e = struct.unpack(self.indexformat, d)
117 117 self.index[i] = e
118 118 self.map[e[-1]] = i
119 119 i += 1
120 120
121 121 class lazyindex(object):
122 122 """a lazy version of the index array"""
123 123 def __init__(self, parser):
124 124 self.p = parser
125 125 def __len__(self):
126 126 return len(self.p.index)
127 127 def load(self, pos):
128 128 if pos < 0:
129 129 pos += len(self.p.index)
130 130 self.p.load(pos)
131 131 return self.p.index[pos]
132 132 def __getitem__(self, pos):
133 133 return self.p.index[pos] or self.load(pos)
134 134 def __setitem__(self, pos, item):
135 135 self.p.index[pos] = item
136 136 def __delitem__(self, pos):
137 137 del self.p.index[pos]
138 138 def append(self, e):
139 139 self.p.index.append(e)
140 140
141 141 class lazymap(object):
142 142 """a lazy version of the node map"""
143 143 def __init__(self, parser):
144 144 self.p = parser
145 145 def load(self, key):
146 146 if self.p.all: return
147 147 n = self.p.data.find(key)
148 148 if n < 0:
149 149 raise KeyError(key)
150 150 pos = n / self.p.s
151 151 self.p.load(pos)
152 152 def __contains__(self, key):
153 153 self.p.load()
154 154 return key in self.p.map
155 155 def __iter__(self):
156 156 yield nullid
157 157 for i in xrange(self.p.l):
158 158 try:
159 159 yield self.p.index[i][-1]
160 160 except:
161 161 self.p.load(i)
162 162 yield self.p.index[i][-1]
163 163 def __getitem__(self, key):
164 164 try:
165 165 return self.p.map[key]
166 166 except KeyError:
167 167 try:
168 168 self.load(key)
169 169 return self.p.map[key]
170 170 except KeyError:
171 171 raise KeyError("node " + hex(key))
172 172 def __setitem__(self, key, val):
173 173 self.p.map[key] = val
174 174 def __delitem__(self, key):
175 175 del self.p.map[key]
176 176
177 177 class RevlogError(Exception): pass
178 178
179 179 class revlog(object):
180 180 """
181 181 the underlying revision storage object
182 182
183 183 A revlog consists of two parts, an index and the revision data.
184 184
185 185 The index is a file with a fixed record size containing
186 186 information on each revision, includings its nodeid (hash), the
187 187 nodeids of its parents, the position and offset of its data within
188 188 the data file, and the revision it's based on. Finally, each entry
189 189 contains a linkrev entry that can serve as a pointer to external
190 190 data.
191 191
192 192 The revision data itself is a linear collection of data chunks.
193 193 Each chunk represents a revision and is usually represented as a
194 194 delta against the previous chunk. To bound lookup time, runs of
195 195 deltas are limited to about 2 times the length of the original
196 196 version data. This makes retrieval of a version proportional to
197 197 its size, or O(1) relative to the number of revisions.
198 198
199 199 Both pieces of the revlog are written to in an append-only
200 200 fashion, which means we never need to rewrite a file to insert or
201 201 remove data, and can use some simple techniques to avoid the need
202 202 for locking while reading.
203 203 """
204 204 def __init__(self, opener, indexfile, datafile, defversion=0):
205 205 """
206 206 create a revlog object
207 207
208 208 opener is a function that abstracts the file opening operation
209 209 and can be used to implement COW semantics or the like.
210 210 """
211 211 self.indexfile = indexfile
212 212 self.datafile = datafile
213 213 self.opener = opener
214 214
215 215 self.indexstat = None
216 216 self.cache = None
217 217 self.chunkcache = None
218 218 self.defversion = defversion
219 219 self.load()
220 220
221 221 def load(self):
222 222 v = self.defversion
223 223 try:
224 224 f = self.opener(self.indexfile)
225 225 i = f.read()
226 226 except IOError, inst:
227 227 if inst.errno != errno.ENOENT:
228 228 raise
229 229 i = ""
230 230 else:
231 231 try:
232 232 st = os.fstat(f.fileno())
233 233 except AttributeError, inst:
234 234 st = None
235 235 else:
236 236 oldst = self.indexstat
237 237 if (oldst and st.st_dev == oldst.st_dev
238 238 and st.st_ino == oldst.st_ino
239 239 and st.st_mtime == oldst.st_mtime
240 240 and st.st_ctime == oldst.st_ctime):
241 241 return
242 242 self.indexstat = st
243 243 if len(i) > 0:
244 244 v = struct.unpack(versionformat, i[:4])[0]
245 245 flags = v & ~0xFFFF
246 246 fmt = v & 0xFFFF
247 247 if fmt == 0:
248 248 if flags:
249 249 raise RevlogError(_("index %s invalid flags %x for format v0" %
250 250 (self.indexfile, flags)))
251 251 elif fmt == REVLOGNG:
252 252 if flags & ~REVLOGNGINLINEDATA:
253 253 raise RevlogError(_("index %s invalid flags %x for revlogng" %
254 254 (self.indexfile, flags)))
255 255 else:
256 256 raise RevlogError(_("index %s invalid format %d" %
257 257 (self.indexfile, fmt)))
258 258 self.version = v
259 259 if v == 0:
260 260 self.indexformat = indexformatv0
261 261 else:
262 262 self.indexformat = indexformatng
263 263
264 264 if i:
265 265 if not self.inlinedata() and st and st.st_size > 10000:
266 266 # big index, let's parse it on demand
267 267 parser = lazyparser(i, self, self.indexformat)
268 268 self.index = lazyindex(parser)
269 269 self.nodemap = lazymap(parser)
270 270 else:
271 271 self.parseindex(i)
272 272 if self.inlinedata():
273 273 # we've already got the entire data file read in, save it
274 274 # in the chunk data
275 275 self.chunkcache = (0, i)
276 276 if self.version != 0:
277 277 e = list(self.index[0])
278 278 type = self.ngtype(e[0])
279 279 e[0] = self.offset_type(0, type)
280 280 self.index[0] = e
281 281 else:
282 282 self.nodemap = { nullid: -1}
283 283 self.index = []
284 284
285 285
286 286 def parseindex(self, data):
287 287 s = struct.calcsize(self.indexformat)
288 288 l = len(data)
289 289 self.index = []
290 290 self.nodemap = {nullid: -1}
291 291 inline = self.inlinedata()
292 292 off = 0
293 293 n = 0
294 294 while off < l:
295 295 e = struct.unpack(self.indexformat, data[off:off + s])
296 296 self.index.append(e)
297 297 self.nodemap[e[-1]] = n
298 298 n += 1
299 299 off += s
300 300 if inline:
301 301 off += e[1]
302 302
303 303 def ngoffset(self, q):
304 304 if q & 0xFFFF:
305 305 raise RevlogError(_('%s: incompatible revision flag %x') %
306 306 (self.indexfile, type))
307 307 return long(q >> 16)
308 308
309 309 def ngtype(self, q):
310 310 return int(q & 0xFFFF)
311 311
312 312 def offset_type(self, offset, type):
313 313 return long(long(offset) << 16 | type)
314 314
315 315 def loadindexmap(self):
316 316 """loads both the map and the index from the lazy parser"""
317 317 if isinstance(self.index, lazyindex):
318 318 p = self.index.p
319 319 p.load()
320 320
321 321 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
322 322 def tip(self): return self.node(len(self.index) - 1)
323 323 def count(self): return len(self.index)
324 324 def node(self, rev):
325 325 return (rev < 0) and nullid or self.index[rev][-1]
326 326 def rev(self, node):
327 327 try:
328 328 return self.nodemap[node]
329 329 except KeyError:
330 330 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
331 331 def linkrev(self, node): return self.index[self.rev(node)][-4]
332 332 def parents(self, node):
333 333 if node == nullid: return (nullid, nullid)
334 334 r = self.rev(node)
335 335 d = self.index[r][-3:-1]
336 336 if self.version == 0:
337 337 return d
338 338 return [ self.node(x) for x in d ]
339 339 def start(self, rev):
340 340 if rev < 0:
341 341 return -1
342 342 if self.version != 0:
343 343 return self.ngoffset(self.index[rev][0])
344 344 return self.index[rev][0]
345
345 346 def end(self, rev): return self.start(rev) + self.length(rev)
346 347
348 def size(self, rev):
349 """return the length of the uncompressed text for a given revision"""
350 l = -1
351 if self.version != 0:
352 l = self.index[rev][2]
353 if l >= 0:
354 return l
355
356 t = self.revision(self.node(rev))
357 return len(t)
358
359 # alternate implementation, The advantage to this code is it
360 # will be faster for a single revision. But, the results are not
361 # cached, so finding the size of every revision will be slower.
362 """
363 if self.cache and self.cache[1] == rev:
364 return len(self.cache[2])
365
366 base = self.base(rev)
367 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
368 base = self.cache[1]
369 text = self.cache[2]
370 else:
371 text = self.revision(self.node(base))
372
373 l = len(text)
374 for x in xrange(base + 1, rev + 1):
375 l = mdiff.patchedsize(l, self.chunk(x))
376 return l
377 """
378
347 379 def length(self, rev):
348 380 if rev < 0:
349 381 return 0
350 382 else:
351 383 return self.index[rev][1]
352 384 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
353 385
354 386 def reachable(self, rev, stop=None):
355 387 reachable = {}
356 388 visit = [rev]
357 389 reachable[rev] = 1
358 390 if stop:
359 391 stopn = self.rev(stop)
360 392 else:
361 393 stopn = 0
362 394 while visit:
363 395 n = visit.pop(0)
364 396 if n == stop:
365 397 continue
366 398 if n == nullid:
367 399 continue
368 400 for p in self.parents(n):
369 401 if self.rev(p) < stopn:
370 402 continue
371 403 if p not in reachable:
372 404 reachable[p] = 1
373 405 visit.append(p)
374 406 return reachable
375 407
376 408 def nodesbetween(self, roots=None, heads=None):
377 409 """Return a tuple containing three elements. Elements 1 and 2 contain
378 410 a final list bases and heads after all the unreachable ones have been
379 411 pruned. Element 0 contains a topologically sorted list of all
380 412
381 413 nodes that satisfy these constraints:
382 414 1. All nodes must be descended from a node in roots (the nodes on
383 415 roots are considered descended from themselves).
384 416 2. All nodes must also be ancestors of a node in heads (the nodes in
385 417 heads are considered to be their own ancestors).
386 418
387 419 If roots is unspecified, nullid is assumed as the only root.
388 420 If heads is unspecified, it is taken to be the output of the
389 421 heads method (i.e. a list of all nodes in the repository that
390 422 have no children)."""
391 423 nonodes = ([], [], [])
392 424 if roots is not None:
393 425 roots = list(roots)
394 426 if not roots:
395 427 return nonodes
396 428 lowestrev = min([self.rev(n) for n in roots])
397 429 else:
398 430 roots = [nullid] # Everybody's a descendent of nullid
399 431 lowestrev = -1
400 432 if (lowestrev == -1) and (heads is None):
401 433 # We want _all_ the nodes!
402 434 return ([self.node(r) for r in xrange(0, self.count())],
403 435 [nullid], list(self.heads()))
404 436 if heads is None:
405 437 # All nodes are ancestors, so the latest ancestor is the last
406 438 # node.
407 439 highestrev = self.count() - 1
408 440 # Set ancestors to None to signal that every node is an ancestor.
409 441 ancestors = None
410 442 # Set heads to an empty dictionary for later discovery of heads
411 443 heads = {}
412 444 else:
413 445 heads = list(heads)
414 446 if not heads:
415 447 return nonodes
416 448 ancestors = {}
417 449 # Start at the top and keep marking parents until we're done.
418 450 nodestotag = heads[:]
419 451 # Turn heads into a dictionary so we can remove 'fake' heads.
420 452 # Also, later we will be using it to filter out the heads we can't
421 453 # find from roots.
422 454 heads = dict.fromkeys(heads, 0)
423 455 # Remember where the top was so we can use it as a limit later.
424 456 highestrev = max([self.rev(n) for n in nodestotag])
425 457 while nodestotag:
426 458 # grab a node to tag
427 459 n = nodestotag.pop()
428 460 # Never tag nullid
429 461 if n == nullid:
430 462 continue
431 463 # A node's revision number represents its place in a
432 464 # topologically sorted list of nodes.
433 465 r = self.rev(n)
434 466 if r >= lowestrev:
435 467 if n not in ancestors:
436 468 # If we are possibly a descendent of one of the roots
437 469 # and we haven't already been marked as an ancestor
438 470 ancestors[n] = 1 # Mark as ancestor
439 471 # Add non-nullid parents to list of nodes to tag.
440 472 nodestotag.extend([p for p in self.parents(n) if
441 473 p != nullid])
442 474 elif n in heads: # We've seen it before, is it a fake head?
443 475 # So it is, real heads should not be the ancestors of
444 476 # any other heads.
445 477 heads.pop(n)
446 478 if not ancestors:
447 479 return nonodes
448 480 # Now that we have our set of ancestors, we want to remove any
449 481 # roots that are not ancestors.
450 482
451 483 # If one of the roots was nullid, everything is included anyway.
452 484 if lowestrev > -1:
453 485 # But, since we weren't, let's recompute the lowest rev to not
454 486 # include roots that aren't ancestors.
455 487
456 488 # Filter out roots that aren't ancestors of heads
457 489 roots = [n for n in roots if n in ancestors]
458 490 # Recompute the lowest revision
459 491 if roots:
460 492 lowestrev = min([self.rev(n) for n in roots])
461 493 else:
462 494 # No more roots? Return empty list
463 495 return nonodes
464 496 else:
465 497 # We are descending from nullid, and don't need to care about
466 498 # any other roots.
467 499 lowestrev = -1
468 500 roots = [nullid]
469 501 # Transform our roots list into a 'set' (i.e. a dictionary where the
470 502 # values don't matter.
471 503 descendents = dict.fromkeys(roots, 1)
472 504 # Also, keep the original roots so we can filter out roots that aren't
473 505 # 'real' roots (i.e. are descended from other roots).
474 506 roots = descendents.copy()
475 507 # Our topologically sorted list of output nodes.
476 508 orderedout = []
477 509 # Don't start at nullid since we don't want nullid in our output list,
478 510 # and if nullid shows up in descedents, empty parents will look like
479 511 # they're descendents.
480 512 for r in xrange(max(lowestrev, 0), highestrev + 1):
481 513 n = self.node(r)
482 514 isdescendent = False
483 515 if lowestrev == -1: # Everybody is a descendent of nullid
484 516 isdescendent = True
485 517 elif n in descendents:
486 518 # n is already a descendent
487 519 isdescendent = True
488 520 # This check only needs to be done here because all the roots
489 521 # will start being marked is descendents before the loop.
490 522 if n in roots:
491 523 # If n was a root, check if it's a 'real' root.
492 524 p = tuple(self.parents(n))
493 525 # If any of its parents are descendents, it's not a root.
494 526 if (p[0] in descendents) or (p[1] in descendents):
495 527 roots.pop(n)
496 528 else:
497 529 p = tuple(self.parents(n))
498 530 # A node is a descendent if either of its parents are
499 531 # descendents. (We seeded the dependents list with the roots
500 532 # up there, remember?)
501 533 if (p[0] in descendents) or (p[1] in descendents):
502 534 descendents[n] = 1
503 535 isdescendent = True
504 536 if isdescendent and ((ancestors is None) or (n in ancestors)):
505 537 # Only include nodes that are both descendents and ancestors.
506 538 orderedout.append(n)
507 539 if (ancestors is not None) and (n in heads):
508 540 # We're trying to figure out which heads are reachable
509 541 # from roots.
510 542 # Mark this head as having been reached
511 543 heads[n] = 1
512 544 elif ancestors is None:
513 545 # Otherwise, we're trying to discover the heads.
514 546 # Assume this is a head because if it isn't, the next step
515 547 # will eventually remove it.
516 548 heads[n] = 1
517 549 # But, obviously its parents aren't.
518 550 for p in self.parents(n):
519 551 heads.pop(p, None)
520 552 heads = [n for n in heads.iterkeys() if heads[n] != 0]
521 553 roots = roots.keys()
522 554 assert orderedout
523 555 assert roots
524 556 assert heads
525 557 return (orderedout, roots, heads)
526 558
527 559 def heads(self, start=None):
528 560 """return the list of all nodes that have no children
529 561
530 562 if start is specified, only heads that are descendants of
531 563 start will be returned
532 564
533 565 """
534 566 if start is None:
535 567 start = nullid
536 568 reachable = {start: 1}
537 569 heads = {start: 1}
538 570 startrev = self.rev(start)
539 571
540 572 for r in xrange(startrev + 1, self.count()):
541 573 n = self.node(r)
542 574 for pn in self.parents(n):
543 575 if pn in reachable:
544 576 reachable[n] = 1
545 577 heads[n] = 1
546 578 if pn in heads:
547 579 del heads[pn]
548 580 return heads.keys()
549 581
550 582 def children(self, node):
551 583 """find the children of a given node"""
552 584 c = []
553 585 p = self.rev(node)
554 586 for r in range(p + 1, self.count()):
555 587 n = self.node(r)
556 588 for pn in self.parents(n):
557 589 if pn == node:
558 590 c.append(n)
559 591 continue
560 592 elif pn == nullid:
561 593 continue
562 594 return c
563 595
564 596 def lookup(self, id):
565 597 """locate a node based on revision number or subset of hex nodeid"""
566 598 try:
567 599 rev = int(id)
568 600 if str(rev) != id: raise ValueError
569 601 if rev < 0: rev = self.count() + rev
570 602 if rev < 0 or rev >= self.count(): raise ValueError
571 603 return self.node(rev)
572 604 except (ValueError, OverflowError):
573 605 c = []
574 606 for n in self.nodemap:
575 607 if hex(n).startswith(id):
576 608 c.append(n)
577 609 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
578 610 if len(c) < 1: raise RevlogError(_("No match found"))
579 611 return c[0]
580 612
581 613 return None
582 614
583 615 def diff(self, a, b):
584 616 """return a delta between two revisions"""
585 617 return mdiff.textdiff(a, b)
586 618
587 619 def patches(self, t, pl):
588 620 """apply a list of patches to a string"""
589 621 return mdiff.patches(t, pl)
590 622
591 623 def chunk(self, rev, df=None, cachelen=4096):
592 624 start, length = self.start(rev), self.length(rev)
593 625 inline = self.inlinedata()
594 626 if inline:
595 627 start += (rev + 1) * struct.calcsize(self.indexformat)
596 628 end = start + length
597 629 def loadcache(df):
598 630 cache_length = max(cachelen, length) # 4k
599 631 if not df:
600 632 if inline:
601 633 df = self.opener(self.indexfile)
602 634 else:
603 635 df = self.opener(self.datafile)
604 636 df.seek(start)
605 637 self.chunkcache = (start, df.read(cache_length))
606 638
607 639 if not self.chunkcache:
608 640 loadcache(df)
609 641
610 642 cache_start = self.chunkcache[0]
611 643 cache_end = cache_start + len(self.chunkcache[1])
612 644 if start >= cache_start and end <= cache_end:
613 645 # it is cached
614 646 offset = start - cache_start
615 647 else:
616 648 loadcache(df)
617 649 offset = 0
618 650
619 651 #def checkchunk():
620 652 # df = self.opener(self.datafile)
621 653 # df.seek(start)
622 654 # return df.read(length)
623 655 #assert s == checkchunk()
624 656 return decompress(self.chunkcache[1][offset:offset + length])
625 657
626 658 def delta(self, node):
627 659 """return or calculate a delta between a node and its predecessor"""
628 660 r = self.rev(node)
629 661 return self.revdiff(r - 1, r)
630 662
631 663 def revdiff(self, rev1, rev2):
632 664 """return or calculate a delta between two revisions"""
633 665 b1 = self.base(rev1)
634 666 b2 = self.base(rev2)
635 667 if b1 == b2 and rev1 + 1 == rev2:
636 668 return self.chunk(rev2)
637 669 else:
638 670 return self.diff(self.revision(self.node(rev1)),
639 671 self.revision(self.node(rev2)))
640 672
641 673 def revision(self, node):
642 674 """return an uncompressed revision of a given"""
643 675 if node == nullid: return ""
644 676 if self.cache and self.cache[0] == node: return self.cache[2]
645 677
646 678 # look up what we need to read
647 679 text = None
648 680 rev = self.rev(node)
649 681 base = self.base(rev)
650 682
651 683 if self.inlinedata():
652 684 # we probably have the whole chunk cached
653 685 df = None
654 686 else:
655 687 df = self.opener(self.datafile)
656 688
657 689 # do we have useful data cached?
658 690 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
659 691 base = self.cache[1]
660 692 text = self.cache[2]
661 693 else:
662 694 text = self.chunk(base, df=df)
663 695
664 696 bins = []
665 697 for r in xrange(base + 1, rev + 1):
666 698 bins.append(self.chunk(r, df=df))
667 699
668 700 text = self.patches(text, bins)
669 701
670 702 p1, p2 = self.parents(node)
671 703 if node != hash(text, p1, p2):
672 704 raise RevlogError(_("integrity check failed on %s:%d")
673 705 % (self.datafile, rev))
674 706
675 707 self.cache = (node, rev, text)
676 708 return text
677 709
678 710 def checkinlinesize(self, tr, fp=None):
679 711 if not self.inlinedata():
680 712 return
681 713 if not fp:
682 714 fp = self.opener(self.indexfile, 'r')
683 715 size = fp.tell()
684 716 if size < 131072:
685 717 return
686 718 tr.add(self.datafile, 0)
687 719 df = self.opener(self.datafile, 'w')
688 720 calc = struct.calcsize(self.indexformat)
689 721 for r in xrange(self.count()):
690 722 start = self.start(r) + (r + 1) * calc
691 723 length = self.length(r)
692 724 fp.seek(start)
693 725 d = fp.read(length)
694 726 df.write(d)
695 727 fp.close()
696 728 df.close()
697 729 fp = self.opener(self.indexfile, 'w', atomictemp=True)
698 730 self.version &= ~(REVLOGNGINLINEDATA)
699 731 if self.count():
700 732 x = self.index[0]
701 733 e = struct.pack(self.indexformat, *x)[4:]
702 734 l = struct.pack(versionformat, self.version)
703 735 fp.write(l)
704 736 fp.write(e)
705 737
706 738 for i in xrange(1, self.count()):
707 739 x = self.index[i]
708 740 e = struct.pack(self.indexformat, *x)
709 741 fp.write(e)
710 742
711 743 # if we don't call rename, the temp file will never replace the
712 744 # real index
713 745 fp.rename()
714 746 self.chunkcache = None
715 747
716 748 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
717 749 """add a revision to the log
718 750
719 751 text - the revision data to add
720 752 transaction - the transaction object used for rollback
721 753 link - the linkrev data to add
722 754 p1, p2 - the parent nodeids of the revision
723 755 d - an optional precomputed delta
724 756 """
725 757 if text is None: text = ""
726 758 if p1 is None: p1 = self.tip()
727 759 if p2 is None: p2 = nullid
728 760
729 761 node = hash(text, p1, p2)
730 762
731 763 if node in self.nodemap:
732 764 return node
733 765
734 766 n = self.count()
735 767 t = n - 1
736 768
737 769 if n:
738 770 base = self.base(t)
739 771 start = self.start(base)
740 772 end = self.end(t)
741 773 if not d:
742 774 prev = self.revision(self.tip())
743 775 d = self.diff(prev, str(text))
744 776 data = compress(d)
745 777 l = len(data[1]) + len(data[0])
746 778 dist = end - start + l
747 779
748 780 # full versions are inserted when the needed deltas
749 781 # become comparable to the uncompressed text
750 782 if not n or dist > len(text) * 2:
751 783 data = compress(text)
752 784 l = len(data[1]) + len(data[0])
753 785 base = n
754 786 else:
755 787 base = self.base(t)
756 788
757 789 offset = 0
758 790 if t >= 0:
759 791 offset = self.end(t)
760 792
761 793 if self.version == 0:
762 794 e = (offset, l, base, link, p1, p2, node)
763 795 else:
764 796 e = (self.offset_type(offset, 0), l, len(text),
765 797 base, link, self.rev(p1), self.rev(p2), node)
766 798
767 799 self.index.append(e)
768 800 self.nodemap[node] = n
769 801 entry = struct.pack(self.indexformat, *e)
770 802
771 803 if not self.inlinedata():
772 804 transaction.add(self.datafile, offset)
773 805 transaction.add(self.indexfile, n * len(entry))
774 806 f = self.opener(self.datafile, "a")
775 807 if data[0]:
776 808 f.write(data[0])
777 809 f.write(data[1])
778 810 f = self.opener(self.indexfile, "a")
779 811 else:
780 812 f = self.opener(self.indexfile, "a+")
781 813 f.seek(0, 2)
782 814 transaction.add(self.indexfile, f.tell())
783 815
784 816 if len(self.index) == 1 and self.version != 0:
785 817 l = struct.pack(versionformat, self.version)
786 818 f.write(l)
787 819 entry = entry[4:]
788 820
789 821 f.write(entry)
790 822
791 823 if self.inlinedata():
792 824 f.write(data[0])
793 825 f.write(data[1])
794 826 self.checkinlinesize(transaction, f)
795 827
796 828 self.cache = (node, n, text)
797 829 return node
798 830
799 831 def ancestor(self, a, b):
800 832 """calculate the least common ancestor of nodes a and b"""
801 833 # calculate the distance of every node from root
802 834 dist = {nullid: 0}
803 835 for i in xrange(self.count()):
804 836 n = self.node(i)
805 837 p1, p2 = self.parents(n)
806 838 dist[n] = max(dist[p1], dist[p2]) + 1
807 839
808 840 # traverse ancestors in order of decreasing distance from root
809 841 def ancestors(node):
810 842 # we store negative distances because heap returns smallest member
811 843 h = [(-dist[node], node)]
812 844 seen = {}
813 845 while h:
814 846 d, n = heapq.heappop(h)
815 847 if n not in seen:
816 848 seen[n] = 1
817 849 yield (-d, n)
818 850 for p in self.parents(n):
819 851 heapq.heappush(h, (-dist[p], p))
820 852
821 853 def generations(node):
822 854 sg, s = None, {}
823 855 for g,n in ancestors(node):
824 856 if g != sg:
825 857 if sg:
826 858 yield sg, s
827 859 sg, s = g, {n:1}
828 860 else:
829 861 s[n] = 1
830 862 yield sg, s
831 863
832 864 x = generations(a)
833 865 y = generations(b)
834 866 gx = x.next()
835 867 gy = y.next()
836 868
837 869 # increment each ancestor list until it is closer to root than
838 870 # the other, or they match
839 871 while 1:
840 872 #print "ancestor gen %s %s" % (gx[0], gy[0])
841 873 if gx[0] == gy[0]:
842 874 # find the intersection
843 875 i = [ n for n in gx[1] if n in gy[1] ]
844 876 if i:
845 877 return i[0]
846 878 else:
847 879 #print "next"
848 880 gy = y.next()
849 881 gx = x.next()
850 882 elif gx[0] < gy[0]:
851 883 #print "next y"
852 884 gy = y.next()
853 885 else:
854 886 #print "next x"
855 887 gx = x.next()
856 888
857 889 def group(self, nodelist, lookup, infocollect=None):
858 890 """calculate a delta group
859 891
860 892 Given a list of changeset revs, return a set of deltas and
861 893 metadata corresponding to nodes. the first delta is
862 894 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
863 895 have this parent as it has all history before these
864 896 changesets. parent is parent[0]
865 897 """
866 898 revs = [self.rev(n) for n in nodelist]
867 899
868 900 # if we don't have any revisions touched by these changesets, bail
869 901 if not revs:
870 902 yield changegroup.closechunk()
871 903 return
872 904
873 905 # add the parent of the first rev
874 906 p = self.parents(self.node(revs[0]))[0]
875 907 revs.insert(0, self.rev(p))
876 908
877 909 # build deltas
878 910 for d in xrange(0, len(revs) - 1):
879 911 a, b = revs[d], revs[d + 1]
880 912 nb = self.node(b)
881 913
882 914 if infocollect is not None:
883 915 infocollect(nb)
884 916
885 917 d = self.revdiff(a, b)
886 918 p = self.parents(nb)
887 919 meta = nb + p[0] + p[1] + lookup(nb)
888 920 yield changegroup.genchunk("%s%s" % (meta, d))
889 921
890 922 yield changegroup.closechunk()
891 923
892 924 def addgroup(self, revs, linkmapper, transaction, unique=0):
893 925 """
894 926 add a delta group
895 927
896 928 given a set of deltas, add them to the revision log. the
897 929 first delta is against its parent, which should be in our
898 930 log, the rest are against the previous delta.
899 931 """
900 932
901 933 #track the base of the current delta log
902 934 r = self.count()
903 935 t = r - 1
904 936 node = None
905 937
906 938 base = prev = -1
907 start = end = measure = 0
939 start = end = textlen = 0
908 940 if r:
909 941 end = self.end(t)
910 942
911 943 ifh = self.opener(self.indexfile, "a+")
912 944 ifh.seek(0, 2)
913 945 transaction.add(self.indexfile, ifh.tell())
914 946 if self.inlinedata():
915 947 dfh = None
916 948 else:
917 949 transaction.add(self.datafile, end)
918 950 dfh = self.opener(self.datafile, "a")
919 951
920 952 # loop through our set of deltas
921 953 chain = None
922 954 for chunk in revs:
923 955 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
924 956 link = linkmapper(cs)
925 957 if node in self.nodemap:
926 958 # this can happen if two branches make the same change
927 959 # if unique:
928 960 # raise RevlogError(_("already have %s") % hex(node[:4]))
929 961 chain = node
930 962 continue
931 963 delta = chunk[80:]
932 964
933 965 for p in (p1, p2):
934 966 if not p in self.nodemap:
935 967 raise RevlogError(_("unknown parent %s") % short(p1))
936 968
937 969 if not chain:
938 970 # retrieve the parent revision of the delta chain
939 971 chain = p1
940 972 if not chain in self.nodemap:
941 973 raise RevlogError(_("unknown base %s") % short(chain[:4]))
942 974
943 975 # full versions are inserted when the needed deltas become
944 976 # comparable to the uncompressed text or when the previous
945 977 # version is not the one we have a delta against. We use
946 978 # the size of the previous full rev as a proxy for the
947 979 # current size.
948 980
949 981 if chain == prev:
950 982 tempd = compress(delta)
951 983 cdelta = tempd[0] + tempd[1]
984 textlen = mdiff.patchedsize(textlen, delta)
952 985
953 if chain != prev or (end - start + len(cdelta)) > measure * 2:
986 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
954 987 # flush our writes here so we can read it in revision
955 988 if dfh:
956 989 dfh.flush()
957 990 ifh.flush()
958 991 text = self.revision(chain)
959 992 text = self.patches(text, [delta])
960 993 chk = self.addrevision(text, transaction, link, p1, p2)
961 994 if chk != node:
962 995 raise RevlogError(_("consistency error adding group"))
963 measure = len(text)
996 textlen = len(text)
964 997 else:
965 998 if self.version == 0:
966 999 e = (end, len(cdelta), base, link, p1, p2, node)
967 1000 else:
968 e = (self.offset_type(end, 0), len(cdelta), -1, base,
1001 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
969 1002 link, self.rev(p1), self.rev(p2), node)
970 1003 self.index.append(e)
971 1004 self.nodemap[node] = r
972 1005 if self.inlinedata():
973 1006 ifh.write(struct.pack(self.indexformat, *e))
974 1007 ifh.write(cdelta)
975 1008 self.checkinlinesize(transaction, ifh)
976 1009 if not self.inlinedata():
977 1010 dfh = self.opener(self.datafile, "a")
978 1011 ifh = self.opener(self.indexfile, "a")
979 1012 else:
980 1013 if not dfh:
981 1014 # addrevision switched from inline to conventional
982 1015 # reopen the index
983 1016 dfh = self.opener(self.datafile, "a")
984 1017 ifh = self.opener(self.indexfile, "a")
985 1018 dfh.write(cdelta)
986 1019 ifh.write(struct.pack(self.indexformat, *e))
987 1020
988 1021 t, r, chain, prev = r, r + 1, node, node
989 1022 base = self.base(t)
990 1023 start = self.start(base)
991 1024 end = self.end(t)
992 1025
993 1026 if node is None:
994 1027 raise RevlogError(_("group to be added is empty"))
995 1028 return node
996 1029
997 1030 def strip(self, rev, minlink):
998 1031 if self.count() == 0 or rev >= self.count():
999 1032 return
1000 1033
1001 1034 if isinstance(self.index, lazyindex):
1002 1035 self.loadindexmap()
1003 1036
1004 1037 # When stripping away a revision, we need to make sure it
1005 1038 # does not actually belong to an older changeset.
1006 1039 # The minlink parameter defines the oldest revision
1007 1040 # we're allowed to strip away.
1008 1041 while minlink > self.index[rev][-4]:
1009 1042 rev += 1
1010 1043 if rev >= self.count():
1011 1044 return
1012 1045
1013 1046 # first truncate the files on disk
1014 1047 end = self.start(rev)
1015 1048 if not self.inlinedata():
1016 1049 df = self.opener(self.datafile, "a")
1017 1050 df.truncate(end)
1018 1051 end = rev * struct.calcsize(self.indexformat)
1019 1052 else:
1020 1053 end += rev * struct.calcsize(self.indexformat)
1021 1054
1022 1055 indexf = self.opener(self.indexfile, "a")
1023 1056 indexf.truncate(end)
1024 1057
1025 1058 # then reset internal state in memory to forget those revisions
1026 1059 self.cache = None
1027 1060 self.chunkcache = None
1028 1061 for x in xrange(rev, self.count()):
1029 1062 del self.nodemap[self.node(x)]
1030 1063
1031 1064 del self.index[rev:]
1032 1065
1033 1066 def checksize(self):
1034 1067 expected = 0
1035 1068 if self.count():
1036 1069 expected = self.end(self.count() - 1)
1037 1070
1038 1071 try:
1039 1072 f = self.opener(self.datafile)
1040 1073 f.seek(0, 2)
1041 1074 actual = f.tell()
1042 1075 dd = actual - expected
1043 1076 except IOError, inst:
1044 1077 if inst.errno != errno.ENOENT:
1045 1078 raise
1046 1079 dd = 0
1047 1080
1048 1081 try:
1049 1082 f = self.opener(self.indexfile)
1050 1083 f.seek(0, 2)
1051 1084 actual = f.tell()
1052 1085 s = struct.calcsize(self.indexformat)
1053 1086 i = actual / s
1054 1087 di = actual - (i * s)
1055 1088 if self.inlinedata():
1056 1089 databytes = 0
1057 1090 for r in xrange(self.count()):
1058 1091 databytes += self.length(r)
1059 1092 dd = 0
1060 1093 di = actual - self.count() * s - databytes
1061 1094 except IOError, inst:
1062 1095 if inst.errno != errno.ENOENT:
1063 1096 raise
1064 1097 di = 0
1065 1098
1066 1099 return (dd, di)
1067 1100
1068 1101
General Comments 0
You need to be logged in to leave comments. Login now