##// END OF EJS Templates
Teach bdiff to support buffer objects...
Brendan Cully -
r3335:9061613c default
parent child Browse files
Show More
@@ -1,371 +1,373
1 1 /*
2 2 bdiff.c - efficient binary diff extension for Mercurial
3 3
4 4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8
9 9 Based roughly on Python difflib
10 10 */
11 11
12 12 #include <Python.h>
13 13 #include <stdlib.h>
14 14 #include <string.h>
15 15
16 16 #ifdef __hpux
17 17 #define inline
18 18 #endif
19 19
20 20 #ifdef __SUNPRO_C
21 21 # define inline
22 22 #endif
23 23
24 24 #ifdef _WIN32
25 25 #ifdef _MSC_VER
26 26 #define inline __inline
27 27 typedef unsigned long uint32_t;
28 28 #else
29 29 #include <stdint.h>
30 30 #endif
31 31 static uint32_t htonl(uint32_t x)
32 32 {
33 33 return ((x & 0x000000ffUL) << 24) |
34 34 ((x & 0x0000ff00UL) << 8) |
35 35 ((x & 0x00ff0000UL) >> 8) |
36 36 ((x & 0xff000000UL) >> 24);
37 37 }
38 38 #else
39 39 #include <sys/types.h>
40 40 #include <arpa/inet.h>
41 41 #include <inttypes.h>
42 42 #endif
43 43
44 44 struct line {
45 45 int h, len, n, e;
46 46 const char *l;
47 47 };
48 48
49 49 struct pos {
50 50 int pos, len;
51 51 };
52 52
53 53 struct hunk {
54 54 int a1, a2, b1, b2;
55 55 };
56 56
57 57 struct hunklist {
58 58 struct hunk *base, *head;
59 59 };
60 60
61 61 static inline uint32_t rol32(uint32_t word, unsigned int shift)
62 62 {
63 63 return (word << shift) | (word >> (32 - shift));
64 64 }
65 65
66 66 int splitlines(const char *a, int len, struct line **lr)
67 67 {
68 68 int g, h, i;
69 69 const char *p, *b = a;
70 70 struct line *l;
71 71
72 72 /* count the lines */
73 73 i = 1; /* extra line for sentinel */
74 74 for (p = a; p < a + len; p++)
75 75 if (*p == '\n' || p == a + len - 1)
76 76 i++;
77 77
78 78 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
79 79 if (!l)
80 80 return -1;
81 81
82 82 /* build the line array and calculate hashes */
83 83 h = 0;
84 84 for (p = a; p < a + len; p++) {
85 85 /*
86 86 * a simple hash from GNU diff, with better collision
87 87 * resistance from hashpjw. this slows down common
88 88 * case by 10%, but speeds up worst case by 100x.
89 89 */
90 90 h = *p + rol32(h, 7);
91 91 if ((g = h & 0xf0000000)) {
92 92 h ^= g >> 24;
93 93 h ^= g;
94 94 }
95 95 if (*p == '\n' || p == a + len - 1) {
96 96 l->len = p - b + 1;
97 97 l->h = h * l->len;
98 98 l->l = b;
99 99 l->n = -1;
100 100 l++;
101 101 b = p + 1;
102 102 h = 0;
103 103 }
104 104 }
105 105
106 106 /* set up a sentinel */
107 107 l->h = l->len = 0;
108 108 l->l = a + len;
109 109 return i - 1;
110 110 }
111 111
112 112 int inline cmp(struct line *a, struct line *b)
113 113 {
114 114 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
115 115 }
116 116
117 117 static int equatelines(struct line *a, int an, struct line *b, int bn)
118 118 {
119 119 int i, j, buckets = 1, t;
120 120 struct pos *h;
121 121
122 122 /* build a hash table of the next highest power of 2 */
123 123 while (buckets < bn + 1)
124 124 buckets *= 2;
125 125
126 126 h = (struct pos *)malloc(buckets * sizeof(struct pos));
127 127 buckets = buckets - 1;
128 128 if (!h)
129 129 return 0;
130 130
131 131 /* clear the hash table */
132 132 for (i = 0; i <= buckets; i++) {
133 133 h[i].pos = -1;
134 134 h[i].len = 0;
135 135 }
136 136
137 137 /* add lines to the hash table chains */
138 138 for (i = bn - 1; i >= 0; i--) {
139 139 /* find the equivalence class */
140 140 for (j = b[i].h & buckets; h[j].pos != -1;
141 141 j = (j + 1) & buckets)
142 142 if (!cmp(b + i, b + h[j].pos))
143 143 break;
144 144
145 145 /* add to the head of the equivalence class */
146 146 b[i].n = h[j].pos;
147 147 b[i].e = j;
148 148 h[j].pos = i;
149 149 h[j].len++; /* keep track of popularity */
150 150 }
151 151
152 152 /* compute popularity threshold */
153 153 t = (bn >= 200) ? bn / 100 : bn + 1;
154 154
155 155 /* match items in a to their equivalence class in b */
156 156 for (i = 0; i < an; i++) {
157 157 /* find the equivalence class */
158 158 for (j = a[i].h & buckets; h[j].pos != -1;
159 159 j = (j + 1) & buckets)
160 160 if (!cmp(a + i, b + h[j].pos))
161 161 break;
162 162
163 163 a[i].e = j; /* use equivalence class for quick compare */
164 164 if (h[j].len <= t)
165 165 a[i].n = h[j].pos; /* point to head of match list */
166 166 else
167 167 a[i].n = -1; /* too popular */
168 168 }
169 169
170 170 /* discard hash tables */
171 171 free(h);
172 172 return 1;
173 173 }
174 174
175 175 static int longest_match(struct line *a, struct line *b, struct pos *pos,
176 176 int a1, int a2, int b1, int b2, int *omi, int *omj)
177 177 {
178 178 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
179 179
180 180 for (i = a1; i < a2; i++) {
181 181 /* skip things before the current block */
182 182 for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
183 183 ;
184 184
185 185 /* loop through all lines match a[i] in b */
186 186 for (; j != -1 && j < b2; j = b[j].n) {
187 187 /* does this extend an earlier match? */
188 188 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
189 189 k = pos[j - 1].len + 1;
190 190 else
191 191 k = 1;
192 192 pos[j].pos = i;
193 193 pos[j].len = k;
194 194
195 195 /* best match so far? */
196 196 if (k > mk) {
197 197 mi = i;
198 198 mj = j;
199 199 mk = k;
200 200 }
201 201 }
202 202 }
203 203
204 204 if (mk) {
205 205 mi = mi - mk + 1;
206 206 mj = mj - mk + 1;
207 207 }
208 208
209 209 /* expand match to include neighboring popular lines */
210 210 while (mi - mb > a1 && mj - mb > b1 &&
211 211 a[mi - mb - 1].e == b[mj - mb - 1].e)
212 212 mb++;
213 213 while (mi + mk < a2 && mj + mk < b2 &&
214 214 a[mi + mk].e == b[mj + mk].e)
215 215 mk++;
216 216
217 217 *omi = mi - mb;
218 218 *omj = mj - mb;
219 219 return mk + mb;
220 220 }
221 221
222 222 static void recurse(struct line *a, struct line *b, struct pos *pos,
223 223 int a1, int a2, int b1, int b2, struct hunklist *l)
224 224 {
225 225 int i, j, k;
226 226
227 227 /* find the longest match in this chunk */
228 228 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
229 229 if (!k)
230 230 return;
231 231
232 232 /* and recurse on the remaining chunks on either side */
233 233 recurse(a, b, pos, a1, i, b1, j, l);
234 234 l->head->a1 = i;
235 235 l->head->a2 = i + k;
236 236 l->head->b1 = j;
237 237 l->head->b2 = j + k;
238 238 l->head++;
239 239 recurse(a, b, pos, i + k, a2, j + k, b2, l);
240 240 }
241 241
242 242 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
243 243 {
244 244 struct hunklist l;
245 245 struct pos *pos;
246 246 int t;
247 247
248 248 /* allocate and fill arrays */
249 249 t = equatelines(a, an, b, bn);
250 250 pos = (struct pos *)calloc(bn, sizeof(struct pos));
251 251 /* we can't have more matches than lines in the shorter file */
252 252 l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) *
253 253 ((an<bn ? an:bn) + 1));
254 254
255 255 if (pos && l.base && t) {
256 256 /* generate the matching block list */
257 257 recurse(a, b, pos, 0, an, 0, bn, &l);
258 258 l.head->a1 = an;
259 259 l.head->b1 = bn;
260 260 l.head++;
261 261 }
262 262
263 263 free(pos);
264 264 return l;
265 265 }
266 266
267 267 static PyObject *blocks(PyObject *self, PyObject *args)
268 268 {
269 269 PyObject *sa, *sb, *rl = NULL, *m;
270 270 struct line *a, *b;
271 271 struct hunklist l = {NULL, NULL};
272 272 struct hunk *h;
273 273 int an, bn, pos = 0;
274 274
275 275 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
276 276 return NULL;
277 277
278 278 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
279 279 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
280 280 if (!a || !b)
281 281 goto nomem;
282 282
283 283 l = diff(a, an, b, bn);
284 284 rl = PyList_New(l.head - l.base);
285 285 if (!l.head || !rl)
286 286 goto nomem;
287 287
288 288 for (h = l.base; h != l.head; h++) {
289 289 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
290 290 PyList_SetItem(rl, pos, m);
291 291 pos++;
292 292 }
293 293
294 294 nomem:
295 295 free(a);
296 296 free(b);
297 297 free(l.base);
298 298 return rl ? rl : PyErr_NoMemory();
299 299 }
300 300
301 301 static PyObject *bdiff(PyObject *self, PyObject *args)
302 302 {
303 PyObject *sa, *sb, *result = NULL;
303 char *sa, *sb;
304 PyObject *result = NULL;
304 305 struct line *al, *bl;
305 306 struct hunklist l = {NULL, NULL};
306 307 struct hunk *h;
307 308 char encode[12], *rb;
308 int an, bn, len = 0, la = 0, lb = 0;
309 int an, bn, len = 0, la, lb;
309 310
310 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
311 if (!PyArg_ParseTuple(args, "t#t#:bdiff", &sa, &la, &sb, &lb))
311 312 return NULL;
312 313
313 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
314 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
314 an = splitlines(sa, la, &al);
315 bn = splitlines(sb, lb, &bl);
315 316 if (!al || !bl)
316 317 goto nomem;
317 318
318 319 l = diff(al, an, bl, bn);
319 320 if (!l.head)
320 321 goto nomem;
321 322
322 323 /* calculate length of output */
324 la = lb = 0;
323 325 for (h = l.base; h != l.head; h++) {
324 326 if (h->a1 != la || h->b1 != lb)
325 327 len += 12 + bl[h->b1].l - bl[lb].l;
326 328 la = h->a2;
327 329 lb = h->b2;
328 330 }
329 331
330 332 result = PyString_FromStringAndSize(NULL, len);
331 333 if (!result)
332 334 goto nomem;
333 335
334 336 /* build binary patch */
335 337 rb = PyString_AsString(result);
336 338 la = lb = 0;
337 339
338 340 for (h = l.base; h != l.head; h++) {
339 341 if (h->a1 != la || h->b1 != lb) {
340 342 len = bl[h->b1].l - bl[lb].l;
341 343 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
342 344 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
343 345 *(uint32_t *)(encode + 8) = htonl(len);
344 346 memcpy(rb, encode, 12);
345 347 memcpy(rb + 12, bl[lb].l, len);
346 348 rb += 12 + len;
347 349 }
348 350 la = h->a2;
349 351 lb = h->b2;
350 352 }
351 353
352 354 nomem:
353 355 free(al);
354 356 free(bl);
355 357 free(l.base);
356 358 return result ? result : PyErr_NoMemory();
357 359 }
358 360
359 361 static char mdiff_doc[] = "Efficient binary diff.";
360 362
361 363 static PyMethodDef methods[] = {
362 364 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
363 365 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
364 366 {NULL, NULL}
365 367 };
366 368
367 369 PyMODINIT_FUNC initbdiff(void)
368 370 {
369 371 Py_InitModule3("bdiff", methods, mdiff_doc);
370 372 }
371 373
@@ -1,1255 +1,1255
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii changegroup errno ancestor mdiff os")
17 17 demandload(globals(), "sha struct util zlib")
18 18
19 19 # revlog version strings
20 20 REVLOGV0 = 0
21 21 REVLOGNG = 1
22 22
23 23 # revlog flags
24 24 REVLOGNGINLINEDATA = (1 << 16)
25 25 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
26 26
27 27 REVLOG_DEFAULT_FORMAT = REVLOGNG
28 28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
29 29
30 30 def flagstr(flag):
31 31 if flag == "inline":
32 32 return REVLOGNGINLINEDATA
33 33 raise RevlogError(_("unknown revlog flag %s" % flag))
34 34
35 35 def hash(text, p1, p2):
36 36 """generate a hash from the given text and its parent hashes
37 37
38 38 This hash combines both the current file contents and its history
39 39 in a manner that makes it easy to distinguish nodes with the same
40 40 content in the revision graph.
41 41 """
42 42 l = [p1, p2]
43 43 l.sort()
44 44 s = sha.new(l[0])
45 45 s.update(l[1])
46 46 s.update(text)
47 47 return s.digest()
48 48
49 49 def compress(text):
50 50 """ generate a possibly-compressed representation of text """
51 51 if not text: return ("", text)
52 52 if len(text) < 44:
53 53 if text[0] == '\0': return ("", text)
54 54 return ('u', text)
55 55 bin = zlib.compress(text)
56 56 if len(bin) > len(text):
57 57 if text[0] == '\0': return ("", text)
58 58 return ('u', text)
59 59 return ("", bin)
60 60
61 61 def decompress(bin):
62 62 """ decompress the given input """
63 63 if not bin: return bin
64 64 t = bin[0]
65 65 if t == '\0': return bin
66 66 if t == 'x': return zlib.decompress(bin)
67 67 if t == 'u': return bin[1:]
68 68 raise RevlogError(_("unknown compression type %r") % t)
69 69
70 70 indexformatv0 = ">4l20s20s20s"
71 71 v0shaoffset = 56
72 72 # index ng:
73 73 # 6 bytes offset
74 74 # 2 bytes flags
75 75 # 4 bytes compressed length
76 76 # 4 bytes uncompressed length
77 77 # 4 bytes: base rev
78 78 # 4 bytes link rev
79 79 # 4 bytes parent 1 rev
80 80 # 4 bytes parent 2 rev
81 81 # 32 bytes: nodeid
82 82 indexformatng = ">Qiiiiii20s12x"
83 83 ngshaoffset = 32
84 84 versionformat = ">i"
85 85
86 86 class lazyparser(object):
87 87 """
88 88 this class avoids the need to parse the entirety of large indices
89 89 """
90 90
91 91 # lazyparser is not safe to use on windows if win32 extensions not
92 92 # available. it keeps file handle open, which make it not possible
93 93 # to break hardlinks on local cloned repos.
94 94 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
95 95 hasattr(util, 'win32api'))
96 96
97 97 def __init__(self, dataf, size, indexformat, shaoffset):
98 98 self.dataf = dataf
99 99 self.format = indexformat
100 100 self.s = struct.calcsize(indexformat)
101 101 self.indexformat = indexformat
102 102 self.datasize = size
103 103 self.l = size/self.s
104 104 self.index = [None] * self.l
105 105 self.map = {nullid: -1}
106 106 self.allmap = 0
107 107 self.all = 0
108 108 self.mapfind_count = 0
109 109 self.shaoffset = shaoffset
110 110
111 111 def loadmap(self):
112 112 """
113 113 during a commit, we need to make sure the rev being added is
114 114 not a duplicate. This requires loading the entire index,
115 115 which is fairly slow. loadmap can load up just the node map,
116 116 which takes much less time.
117 117 """
118 118 if self.allmap: return
119 119 end = self.datasize
120 120 self.allmap = 1
121 121 cur = 0
122 122 count = 0
123 123 blocksize = self.s * 256
124 124 self.dataf.seek(0)
125 125 while cur < end:
126 126 data = self.dataf.read(blocksize)
127 127 off = 0
128 128 for x in xrange(256):
129 129 n = data[off + self.shaoffset:off + self.shaoffset + 20]
130 130 self.map[n] = count
131 131 count += 1
132 132 if count >= self.l:
133 133 break
134 134 off += self.s
135 135 cur += blocksize
136 136
137 137 def loadblock(self, blockstart, blocksize, data=None):
138 138 if self.all: return
139 139 if data is None:
140 140 self.dataf.seek(blockstart)
141 141 if blockstart + blocksize > self.datasize:
142 142 # the revlog may have grown since we've started running,
143 143 # but we don't have space in self.index for more entries.
144 144 # limit blocksize so that we don't get too much data.
145 145 blocksize = max(self.datasize - blockstart, 0)
146 146 data = self.dataf.read(blocksize)
147 147 lend = len(data) / self.s
148 148 i = blockstart / self.s
149 149 off = 0
150 150 for x in xrange(lend):
151 151 if self.index[i + x] == None:
152 152 b = data[off : off + self.s]
153 153 self.index[i + x] = b
154 154 n = b[self.shaoffset:self.shaoffset + 20]
155 155 self.map[n] = i + x
156 156 off += self.s
157 157
158 158 def findnode(self, node):
159 159 """search backwards through the index file for a specific node"""
160 160 if self.allmap: return None
161 161
162 162 # hg log will cause many many searches for the manifest
163 163 # nodes. After we get called a few times, just load the whole
164 164 # thing.
165 165 if self.mapfind_count > 8:
166 166 self.loadmap()
167 167 if node in self.map:
168 168 return node
169 169 return None
170 170 self.mapfind_count += 1
171 171 last = self.l - 1
172 172 while self.index[last] != None:
173 173 if last == 0:
174 174 self.all = 1
175 175 self.allmap = 1
176 176 return None
177 177 last -= 1
178 178 end = (last + 1) * self.s
179 179 blocksize = self.s * 256
180 180 while end >= 0:
181 181 start = max(end - blocksize, 0)
182 182 self.dataf.seek(start)
183 183 data = self.dataf.read(end - start)
184 184 findend = end - start
185 185 while True:
186 186 # we're searching backwards, so weh have to make sure
187 187 # we don't find a changeset where this node is a parent
188 188 off = data.rfind(node, 0, findend)
189 189 findend = off
190 190 if off >= 0:
191 191 i = off / self.s
192 192 off = i * self.s
193 193 n = data[off + self.shaoffset:off + self.shaoffset + 20]
194 194 if n == node:
195 195 self.map[n] = i + start / self.s
196 196 return node
197 197 else:
198 198 break
199 199 end -= blocksize
200 200 return None
201 201
202 202 def loadindex(self, i=None, end=None):
203 203 if self.all: return
204 204 all = False
205 205 if i == None:
206 206 blockstart = 0
207 207 blocksize = (512 / self.s) * self.s
208 208 end = self.datasize
209 209 all = True
210 210 else:
211 211 if end:
212 212 blockstart = i * self.s
213 213 end = end * self.s
214 214 blocksize = end - blockstart
215 215 else:
216 216 blockstart = (i & ~(32)) * self.s
217 217 blocksize = self.s * 64
218 218 end = blockstart + blocksize
219 219 while blockstart < end:
220 220 self.loadblock(blockstart, blocksize)
221 221 blockstart += blocksize
222 222 if all: self.all = True
223 223
224 224 class lazyindex(object):
225 225 """a lazy version of the index array"""
226 226 def __init__(self, parser):
227 227 self.p = parser
228 228 def __len__(self):
229 229 return len(self.p.index)
230 230 def load(self, pos):
231 231 if pos < 0:
232 232 pos += len(self.p.index)
233 233 self.p.loadindex(pos)
234 234 return self.p.index[pos]
235 235 def __getitem__(self, pos):
236 236 ret = self.p.index[pos] or self.load(pos)
237 237 if isinstance(ret, str):
238 238 ret = struct.unpack(self.p.indexformat, ret)
239 239 return ret
240 240 def __setitem__(self, pos, item):
241 241 self.p.index[pos] = item
242 242 def __delitem__(self, pos):
243 243 del self.p.index[pos]
244 244 def append(self, e):
245 245 self.p.index.append(e)
246 246
247 247 class lazymap(object):
248 248 """a lazy version of the node map"""
249 249 def __init__(self, parser):
250 250 self.p = parser
251 251 def load(self, key):
252 252 n = self.p.findnode(key)
253 253 if n == None:
254 254 raise KeyError(key)
255 255 def __contains__(self, key):
256 256 if key in self.p.map:
257 257 return True
258 258 self.p.loadmap()
259 259 return key in self.p.map
260 260 def __iter__(self):
261 261 yield nullid
262 262 for i in xrange(self.p.l):
263 263 ret = self.p.index[i]
264 264 if not ret:
265 265 self.p.loadindex(i)
266 266 ret = self.p.index[i]
267 267 if isinstance(ret, str):
268 268 ret = struct.unpack(self.p.indexformat, ret)
269 269 yield ret[-1]
270 270 def __getitem__(self, key):
271 271 try:
272 272 return self.p.map[key]
273 273 except KeyError:
274 274 try:
275 275 self.load(key)
276 276 return self.p.map[key]
277 277 except KeyError:
278 278 raise KeyError("node " + hex(key))
279 279 def __setitem__(self, key, val):
280 280 self.p.map[key] = val
281 281 def __delitem__(self, key):
282 282 del self.p.map[key]
283 283
284 284 class RevlogError(Exception): pass
285 285
286 286 class revlog(object):
287 287 """
288 288 the underlying revision storage object
289 289
290 290 A revlog consists of two parts, an index and the revision data.
291 291
292 292 The index is a file with a fixed record size containing
293 293 information on each revision, includings its nodeid (hash), the
294 294 nodeids of its parents, the position and offset of its data within
295 295 the data file, and the revision it's based on. Finally, each entry
296 296 contains a linkrev entry that can serve as a pointer to external
297 297 data.
298 298
299 299 The revision data itself is a linear collection of data chunks.
300 300 Each chunk represents a revision and is usually represented as a
301 301 delta against the previous chunk. To bound lookup time, runs of
302 302 deltas are limited to about 2 times the length of the original
303 303 version data. This makes retrieval of a version proportional to
304 304 its size, or O(1) relative to the number of revisions.
305 305
306 306 Both pieces of the revlog are written to in an append-only
307 307 fashion, which means we never need to rewrite a file to insert or
308 308 remove data, and can use some simple techniques to avoid the need
309 309 for locking while reading.
310 310 """
311 311 def __init__(self, opener, indexfile, datafile,
312 312 defversion=REVLOG_DEFAULT_VERSION):
313 313 """
314 314 create a revlog object
315 315
316 316 opener is a function that abstracts the file opening operation
317 317 and can be used to implement COW semantics or the like.
318 318 """
319 319 self.indexfile = indexfile
320 320 self.datafile = datafile
321 321 self.opener = opener
322 322
323 323 self.indexstat = None
324 324 self.cache = None
325 325 self.chunkcache = None
326 326 self.defversion = defversion
327 327 self.load()
328 328
329 329 def load(self):
330 330 v = self.defversion
331 331 try:
332 332 f = self.opener(self.indexfile)
333 333 i = f.read(4)
334 334 f.seek(0)
335 335 except IOError, inst:
336 336 if inst.errno != errno.ENOENT:
337 337 raise
338 338 i = ""
339 339 else:
340 340 try:
341 341 st = util.fstat(f)
342 342 except AttributeError, inst:
343 343 st = None
344 344 else:
345 345 oldst = self.indexstat
346 346 if (oldst and st.st_dev == oldst.st_dev
347 347 and st.st_ino == oldst.st_ino
348 348 and st.st_mtime == oldst.st_mtime
349 349 and st.st_ctime == oldst.st_ctime):
350 350 return
351 351 self.indexstat = st
352 352 if len(i) > 0:
353 353 v = struct.unpack(versionformat, i)[0]
354 354 flags = v & ~0xFFFF
355 355 fmt = v & 0xFFFF
356 356 if fmt == REVLOGV0:
357 357 if flags:
358 358 raise RevlogError(_("index %s invalid flags %x for format v0" %
359 359 (self.indexfile, flags)))
360 360 elif fmt == REVLOGNG:
361 361 if flags & ~REVLOGNGINLINEDATA:
362 362 raise RevlogError(_("index %s invalid flags %x for revlogng" %
363 363 (self.indexfile, flags)))
364 364 else:
365 365 raise RevlogError(_("index %s invalid format %d" %
366 366 (self.indexfile, fmt)))
367 367 self.version = v
368 368 if v == REVLOGV0:
369 369 self.indexformat = indexformatv0
370 370 shaoffset = v0shaoffset
371 371 else:
372 372 self.indexformat = indexformatng
373 373 shaoffset = ngshaoffset
374 374
375 375 if i:
376 376 if (lazyparser.safe_to_use and not self.inlinedata() and
377 377 st and st.st_size > 10000):
378 378 # big index, let's parse it on demand
379 379 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
380 380 self.index = lazyindex(parser)
381 381 self.nodemap = lazymap(parser)
382 382 else:
383 383 self.parseindex(f, st)
384 384 if self.version != REVLOGV0:
385 385 e = list(self.index[0])
386 386 type = self.ngtype(e[0])
387 387 e[0] = self.offset_type(0, type)
388 388 self.index[0] = e
389 389 else:
390 390 self.nodemap = { nullid: -1}
391 391 self.index = []
392 392
393 393
394 394 def parseindex(self, fp, st):
395 395 s = struct.calcsize(self.indexformat)
396 396 self.index = []
397 397 self.nodemap = {nullid: -1}
398 398 inline = self.inlinedata()
399 399 n = 0
400 400 leftover = None
401 401 while True:
402 402 if st:
403 403 data = fp.read(65536)
404 404 else:
405 405 # hack for httprangereader, it doesn't do partial reads well
406 406 data = fp.read()
407 407 if not data:
408 408 break
409 409 if n == 0 and self.inlinedata():
410 410 # cache the first chunk
411 411 self.chunkcache = (0, data)
412 412 if leftover:
413 413 data = leftover + data
414 414 leftover = None
415 415 off = 0
416 416 l = len(data)
417 417 while off < l:
418 418 if l - off < s:
419 419 leftover = data[off:]
420 420 break
421 421 cur = data[off:off + s]
422 422 off += s
423 423 e = struct.unpack(self.indexformat, cur)
424 424 self.index.append(e)
425 425 self.nodemap[e[-1]] = n
426 426 n += 1
427 427 if inline:
428 428 off += e[1]
429 429 if off > l:
430 430 # some things don't seek well, just read it
431 431 fp.read(off - l)
432 432 if not st:
433 433 break
434 434
435 435
436 436 def ngoffset(self, q):
437 437 if q & 0xFFFF:
438 438 raise RevlogError(_('%s: incompatible revision flag %x') %
439 439 (self.indexfile, q))
440 440 return long(q >> 16)
441 441
442 442 def ngtype(self, q):
443 443 return int(q & 0xFFFF)
444 444
445 445 def offset_type(self, offset, type):
446 446 return long(long(offset) << 16 | type)
447 447
448 448 def loadindex(self, start, end):
449 449 """load a block of indexes all at once from the lazy parser"""
450 450 if isinstance(self.index, lazyindex):
451 451 self.index.p.loadindex(start, end)
452 452
453 453 def loadindexmap(self):
454 454 """loads both the map and the index from the lazy parser"""
455 455 if isinstance(self.index, lazyindex):
456 456 p = self.index.p
457 457 p.loadindex()
458 458 self.nodemap = p.map
459 459
460 460 def loadmap(self):
461 461 """loads the map from the lazy parser"""
462 462 if isinstance(self.nodemap, lazymap):
463 463 self.nodemap.p.loadmap()
464 464 self.nodemap = self.nodemap.p.map
465 465
466 466 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
467 467 def tip(self): return self.node(len(self.index) - 1)
468 468 def count(self): return len(self.index)
469 469 def node(self, rev):
470 470 return (rev < 0) and nullid or self.index[rev][-1]
471 471 def rev(self, node):
472 472 try:
473 473 return self.nodemap[node]
474 474 except KeyError:
475 475 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
476 476 def linkrev(self, node):
477 477 return (node == nullid) and -1 or self.index[self.rev(node)][-4]
478 478 def parents(self, node):
479 479 if node == nullid: return (nullid, nullid)
480 480 r = self.rev(node)
481 481 d = self.index[r][-3:-1]
482 482 if self.version == REVLOGV0:
483 483 return d
484 484 return [ self.node(x) for x in d ]
485 485 def parentrevs(self, rev):
486 486 if rev == -1:
487 487 return (-1, -1)
488 488 d = self.index[rev][-3:-1]
489 489 if self.version == REVLOGV0:
490 490 return [ self.rev(x) for x in d ]
491 491 return d
492 492 def start(self, rev):
493 493 if rev < 0:
494 494 return -1
495 495 if self.version != REVLOGV0:
496 496 return self.ngoffset(self.index[rev][0])
497 497 return self.index[rev][0]
498 498
499 499 def end(self, rev): return self.start(rev) + self.length(rev)
500 500
501 501 def size(self, rev):
502 502 """return the length of the uncompressed text for a given revision"""
503 503 l = -1
504 504 if self.version != REVLOGV0:
505 505 l = self.index[rev][2]
506 506 if l >= 0:
507 507 return l
508 508
509 509 t = self.revision(self.node(rev))
510 510 return len(t)
511 511
512 512 # alternate implementation, The advantage to this code is it
513 513 # will be faster for a single revision. But, the results are not
514 514 # cached, so finding the size of every revision will be slower.
515 515 """
516 516 if self.cache and self.cache[1] == rev:
517 517 return len(self.cache[2])
518 518
519 519 base = self.base(rev)
520 520 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
521 521 base = self.cache[1]
522 522 text = self.cache[2]
523 523 else:
524 524 text = self.revision(self.node(base))
525 525
526 526 l = len(text)
527 527 for x in xrange(base + 1, rev + 1):
528 528 l = mdiff.patchedsize(l, self.chunk(x))
529 529 return l
530 530 """
531 531
532 532 def length(self, rev):
533 533 if rev < 0:
534 534 return 0
535 535 else:
536 536 return self.index[rev][1]
537 537 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
538 538
539 539 def reachable(self, rev, stop=None):
540 540 reachable = {}
541 541 visit = [rev]
542 542 reachable[rev] = 1
543 543 if stop:
544 544 stopn = self.rev(stop)
545 545 else:
546 546 stopn = 0
547 547 while visit:
548 548 n = visit.pop(0)
549 549 if n == stop:
550 550 continue
551 551 if n == nullid:
552 552 continue
553 553 for p in self.parents(n):
554 554 if self.rev(p) < stopn:
555 555 continue
556 556 if p not in reachable:
557 557 reachable[p] = 1
558 558 visit.append(p)
559 559 return reachable
560 560
561 561 def nodesbetween(self, roots=None, heads=None):
562 562 """Return a tuple containing three elements. Elements 1 and 2 contain
563 563 a final list bases and heads after all the unreachable ones have been
564 564 pruned. Element 0 contains a topologically sorted list of all
565 565
566 566 nodes that satisfy these constraints:
567 567 1. All nodes must be descended from a node in roots (the nodes on
568 568 roots are considered descended from themselves).
569 569 2. All nodes must also be ancestors of a node in heads (the nodes in
570 570 heads are considered to be their own ancestors).
571 571
572 572 If roots is unspecified, nullid is assumed as the only root.
573 573 If heads is unspecified, it is taken to be the output of the
574 574 heads method (i.e. a list of all nodes in the repository that
575 575 have no children)."""
576 576 nonodes = ([], [], [])
577 577 if roots is not None:
578 578 roots = list(roots)
579 579 if not roots:
580 580 return nonodes
581 581 lowestrev = min([self.rev(n) for n in roots])
582 582 else:
583 583 roots = [nullid] # Everybody's a descendent of nullid
584 584 lowestrev = -1
585 585 if (lowestrev == -1) and (heads is None):
586 586 # We want _all_ the nodes!
587 587 return ([self.node(r) for r in xrange(0, self.count())],
588 588 [nullid], list(self.heads()))
589 589 if heads is None:
590 590 # All nodes are ancestors, so the latest ancestor is the last
591 591 # node.
592 592 highestrev = self.count() - 1
593 593 # Set ancestors to None to signal that every node is an ancestor.
594 594 ancestors = None
595 595 # Set heads to an empty dictionary for later discovery of heads
596 596 heads = {}
597 597 else:
598 598 heads = list(heads)
599 599 if not heads:
600 600 return nonodes
601 601 ancestors = {}
602 602 # Start at the top and keep marking parents until we're done.
603 603 nodestotag = heads[:]
604 604 # Turn heads into a dictionary so we can remove 'fake' heads.
605 605 # Also, later we will be using it to filter out the heads we can't
606 606 # find from roots.
607 607 heads = dict.fromkeys(heads, 0)
608 608 # Remember where the top was so we can use it as a limit later.
609 609 highestrev = max([self.rev(n) for n in nodestotag])
610 610 while nodestotag:
611 611 # grab a node to tag
612 612 n = nodestotag.pop()
613 613 # Never tag nullid
614 614 if n == nullid:
615 615 continue
616 616 # A node's revision number represents its place in a
617 617 # topologically sorted list of nodes.
618 618 r = self.rev(n)
619 619 if r >= lowestrev:
620 620 if n not in ancestors:
621 621 # If we are possibly a descendent of one of the roots
622 622 # and we haven't already been marked as an ancestor
623 623 ancestors[n] = 1 # Mark as ancestor
624 624 # Add non-nullid parents to list of nodes to tag.
625 625 nodestotag.extend([p for p in self.parents(n) if
626 626 p != nullid])
627 627 elif n in heads: # We've seen it before, is it a fake head?
628 628 # So it is, real heads should not be the ancestors of
629 629 # any other heads.
630 630 heads.pop(n)
631 631 if not ancestors:
632 632 return nonodes
633 633 # Now that we have our set of ancestors, we want to remove any
634 634 # roots that are not ancestors.
635 635
636 636 # If one of the roots was nullid, everything is included anyway.
637 637 if lowestrev > -1:
638 638 # But, since we weren't, let's recompute the lowest rev to not
639 639 # include roots that aren't ancestors.
640 640
641 641 # Filter out roots that aren't ancestors of heads
642 642 roots = [n for n in roots if n in ancestors]
643 643 # Recompute the lowest revision
644 644 if roots:
645 645 lowestrev = min([self.rev(n) for n in roots])
646 646 else:
647 647 # No more roots? Return empty list
648 648 return nonodes
649 649 else:
650 650 # We are descending from nullid, and don't need to care about
651 651 # any other roots.
652 652 lowestrev = -1
653 653 roots = [nullid]
654 654 # Transform our roots list into a 'set' (i.e. a dictionary where the
655 655 # values don't matter.
656 656 descendents = dict.fromkeys(roots, 1)
657 657 # Also, keep the original roots so we can filter out roots that aren't
658 658 # 'real' roots (i.e. are descended from other roots).
659 659 roots = descendents.copy()
660 660 # Our topologically sorted list of output nodes.
661 661 orderedout = []
662 662 # Don't start at nullid since we don't want nullid in our output list,
663 663 # and if nullid shows up in descedents, empty parents will look like
664 664 # they're descendents.
665 665 for r in xrange(max(lowestrev, 0), highestrev + 1):
666 666 n = self.node(r)
667 667 isdescendent = False
668 668 if lowestrev == -1: # Everybody is a descendent of nullid
669 669 isdescendent = True
670 670 elif n in descendents:
671 671 # n is already a descendent
672 672 isdescendent = True
673 673 # This check only needs to be done here because all the roots
674 674 # will start being marked is descendents before the loop.
675 675 if n in roots:
676 676 # If n was a root, check if it's a 'real' root.
677 677 p = tuple(self.parents(n))
678 678 # If any of its parents are descendents, it's not a root.
679 679 if (p[0] in descendents) or (p[1] in descendents):
680 680 roots.pop(n)
681 681 else:
682 682 p = tuple(self.parents(n))
683 683 # A node is a descendent if either of its parents are
684 684 # descendents. (We seeded the dependents list with the roots
685 685 # up there, remember?)
686 686 if (p[0] in descendents) or (p[1] in descendents):
687 687 descendents[n] = 1
688 688 isdescendent = True
689 689 if isdescendent and ((ancestors is None) or (n in ancestors)):
690 690 # Only include nodes that are both descendents and ancestors.
691 691 orderedout.append(n)
692 692 if (ancestors is not None) and (n in heads):
693 693 # We're trying to figure out which heads are reachable
694 694 # from roots.
695 695 # Mark this head as having been reached
696 696 heads[n] = 1
697 697 elif ancestors is None:
698 698 # Otherwise, we're trying to discover the heads.
699 699 # Assume this is a head because if it isn't, the next step
700 700 # will eventually remove it.
701 701 heads[n] = 1
702 702 # But, obviously its parents aren't.
703 703 for p in self.parents(n):
704 704 heads.pop(p, None)
705 705 heads = [n for n in heads.iterkeys() if heads[n] != 0]
706 706 roots = roots.keys()
707 707 assert orderedout
708 708 assert roots
709 709 assert heads
710 710 return (orderedout, roots, heads)
711 711
712 712 def heads(self, start=None):
713 713 """return the list of all nodes that have no children
714 714
715 715 if start is specified, only heads that are descendants of
716 716 start will be returned
717 717
718 718 """
719 719 if start is None:
720 720 start = nullid
721 721 startrev = self.rev(start)
722 722 reachable = {startrev: 1}
723 723 heads = {startrev: 1}
724 724
725 725 parentrevs = self.parentrevs
726 726 for r in xrange(startrev + 1, self.count()):
727 727 for p in parentrevs(r):
728 728 if p in reachable:
729 729 reachable[r] = 1
730 730 heads[r] = 1
731 731 if p in heads:
732 732 del heads[p]
733 733 return [self.node(r) for r in heads]
734 734
735 735 def children(self, node):
736 736 """find the children of a given node"""
737 737 c = []
738 738 p = self.rev(node)
739 739 for r in range(p + 1, self.count()):
740 740 n = self.node(r)
741 741 for pn in self.parents(n):
742 742 if pn == node:
743 743 c.append(n)
744 744 continue
745 745 elif pn == nullid:
746 746 continue
747 747 return c
748 748
749 749 def lookup(self, id):
750 750 """locate a node based on:
751 751 - revision number or str(revision number)
752 752 - nodeid or subset of hex nodeid
753 753 """
754 754 if isinstance(id, (long, int)):
755 755 # rev
756 756 return self.node(id)
757 757 try:
758 758 # str(rev)
759 759 rev = int(id)
760 760 if str(rev) != id: raise ValueError
761 761 if rev < 0: rev = self.count() + rev
762 762 if rev < 0 or rev >= self.count(): raise ValueError
763 763 return self.node(rev)
764 764 except (ValueError, OverflowError):
765 765 pass
766 766 try:
767 767 # hex(node)[:...]
768 768 if len(id) % 2 == 0:
769 769 bin_id = bin(id)
770 770 else:
771 771 bin_id = bin(id[:-1])
772 772 node = None
773 773 for n in self.nodemap:
774 774 if n.startswith(bin_id) and hex(n).startswith(id):
775 775 if node is not None:
776 776 raise RevlogError(_("Ambiguous identifier"))
777 777 node = n
778 778 if node is not None:
779 779 return node
780 780 except TypeError:
781 781 pass
782 782
783 783 # might need fixing if we change hash lengths
784 784 if len(id) == 20 and id in self.nodemap:
785 785 # node
786 786 return id
787 787
788 788 raise RevlogError(_("No match found"))
789 789
790 790 def cmp(self, node, text):
791 791 """compare text with a given file revision"""
792 792 p1, p2 = self.parents(node)
793 793 return hash(text, p1, p2) != node
794 794
795 795 def makenode(self, node, text):
796 796 """calculate a file nodeid for text, descended or possibly
797 797 unchanged from node"""
798 798
799 799 if self.cmp(node, text):
800 800 return hash(text, node, nullid)
801 801 return node
802 802
803 803 def diff(self, a, b):
804 804 """return a delta between two revisions"""
805 805 return mdiff.textdiff(a, b)
806 806
807 807 def patches(self, t, pl):
808 808 """apply a list of patches to a string"""
809 809 return mdiff.patches(t, pl)
810 810
811 811 def chunk(self, rev, df=None, cachelen=4096):
812 812 start, length = self.start(rev), self.length(rev)
813 813 inline = self.inlinedata()
814 814 if inline:
815 815 start += (rev + 1) * struct.calcsize(self.indexformat)
816 816 end = start + length
817 817 def loadcache(df):
818 818 cache_length = max(cachelen, length) # 4k
819 819 if not df:
820 820 if inline:
821 821 df = self.opener(self.indexfile)
822 822 else:
823 823 df = self.opener(self.datafile)
824 824 df.seek(start)
825 825 self.chunkcache = (start, df.read(cache_length))
826 826
827 827 if not self.chunkcache:
828 828 loadcache(df)
829 829
830 830 cache_start = self.chunkcache[0]
831 831 cache_end = cache_start + len(self.chunkcache[1])
832 832 if start >= cache_start and end <= cache_end:
833 833 # it is cached
834 834 offset = start - cache_start
835 835 else:
836 836 loadcache(df)
837 837 offset = 0
838 838
839 839 #def checkchunk():
840 840 # df = self.opener(self.datafile)
841 841 # df.seek(start)
842 842 # return df.read(length)
843 843 #assert s == checkchunk()
844 844 return decompress(self.chunkcache[1][offset:offset + length])
845 845
846 846 def delta(self, node):
847 847 """return or calculate a delta between a node and its predecessor"""
848 848 r = self.rev(node)
849 849 return self.revdiff(r - 1, r)
850 850
851 851 def revdiff(self, rev1, rev2):
852 852 """return or calculate a delta between two revisions"""
853 853 b1 = self.base(rev1)
854 854 b2 = self.base(rev2)
855 855 if b1 == b2 and rev1 + 1 == rev2:
856 856 return self.chunk(rev2)
857 857 else:
858 858 return self.diff(self.revision(self.node(rev1)),
859 859 self.revision(self.node(rev2)))
860 860
861 861 def revision(self, node):
862 862 """return an uncompressed revision of a given"""
863 863 if node == nullid: return ""
864 864 if self.cache and self.cache[0] == node: return self.cache[2]
865 865
866 866 # look up what we need to read
867 867 text = None
868 868 rev = self.rev(node)
869 869 base = self.base(rev)
870 870
871 871 if self.inlinedata():
872 872 # we probably have the whole chunk cached
873 873 df = None
874 874 else:
875 875 df = self.opener(self.datafile)
876 876
877 877 # do we have useful data cached?
878 878 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
879 879 base = self.cache[1]
880 880 text = self.cache[2]
881 881 self.loadindex(base, rev + 1)
882 882 else:
883 883 self.loadindex(base, rev + 1)
884 884 text = self.chunk(base, df=df)
885 885
886 886 bins = []
887 887 for r in xrange(base + 1, rev + 1):
888 888 bins.append(self.chunk(r, df=df))
889 889
890 890 text = self.patches(text, bins)
891 891
892 892 p1, p2 = self.parents(node)
893 893 if node != hash(text, p1, p2):
894 894 raise RevlogError(_("integrity check failed on %s:%d")
895 895 % (self.datafile, rev))
896 896
897 897 self.cache = (node, rev, text)
898 898 return text
899 899
900 900 def checkinlinesize(self, tr, fp=None):
901 901 if not self.inlinedata():
902 902 return
903 903 if not fp:
904 904 fp = self.opener(self.indexfile, 'r')
905 905 fp.seek(0, 2)
906 906 size = fp.tell()
907 907 if size < 131072:
908 908 return
909 909 trinfo = tr.find(self.indexfile)
910 910 if trinfo == None:
911 911 raise RevlogError(_("%s not found in the transaction" %
912 912 self.indexfile))
913 913
914 914 trindex = trinfo[2]
915 915 dataoff = self.start(trindex)
916 916
917 917 tr.add(self.datafile, dataoff)
918 918 df = self.opener(self.datafile, 'w')
919 919 calc = struct.calcsize(self.indexformat)
920 920 for r in xrange(self.count()):
921 921 start = self.start(r) + (r + 1) * calc
922 922 length = self.length(r)
923 923 fp.seek(start)
924 924 d = fp.read(length)
925 925 df.write(d)
926 926 fp.close()
927 927 df.close()
928 928 fp = self.opener(self.indexfile, 'w', atomictemp=True)
929 929 self.version &= ~(REVLOGNGINLINEDATA)
930 930 if self.count():
931 931 x = self.index[0]
932 932 e = struct.pack(self.indexformat, *x)[4:]
933 933 l = struct.pack(versionformat, self.version)
934 934 fp.write(l)
935 935 fp.write(e)
936 936
937 937 for i in xrange(1, self.count()):
938 938 x = self.index[i]
939 939 e = struct.pack(self.indexformat, *x)
940 940 fp.write(e)
941 941
942 942 # if we don't call rename, the temp file will never replace the
943 943 # real index
944 944 fp.rename()
945 945
946 946 tr.replace(self.indexfile, trindex * calc)
947 947 self.chunkcache = None
948 948
949 949 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
950 950 """add a revision to the log
951 951
952 952 text - the revision data to add
953 953 transaction - the transaction object used for rollback
954 954 link - the linkrev data to add
955 955 p1, p2 - the parent nodeids of the revision
956 956 d - an optional precomputed delta
957 957 """
958 958 if text is None: text = ""
959 959 if p1 is None: p1 = self.tip()
960 960 if p2 is None: p2 = nullid
961 961
962 962 node = hash(text, p1, p2)
963 963
964 964 if node in self.nodemap:
965 965 return node
966 966
967 967 n = self.count()
968 968 t = n - 1
969 969
970 970 if n:
971 971 base = self.base(t)
972 972 start = self.start(base)
973 973 end = self.end(t)
974 974 if not d:
975 975 prev = self.revision(self.tip())
976 d = self.diff(prev, str(text))
976 d = self.diff(prev, text)
977 977 data = compress(d)
978 978 l = len(data[1]) + len(data[0])
979 979 dist = end - start + l
980 980
981 981 # full versions are inserted when the needed deltas
982 982 # become comparable to the uncompressed text
983 983 if not n or dist > len(text) * 2:
984 984 data = compress(text)
985 985 l = len(data[1]) + len(data[0])
986 986 base = n
987 987 else:
988 988 base = self.base(t)
989 989
990 990 offset = 0
991 991 if t >= 0:
992 992 offset = self.end(t)
993 993
994 994 if self.version == REVLOGV0:
995 995 e = (offset, l, base, link, p1, p2, node)
996 996 else:
997 997 e = (self.offset_type(offset, 0), l, len(text),
998 998 base, link, self.rev(p1), self.rev(p2), node)
999 999
1000 1000 self.index.append(e)
1001 1001 self.nodemap[node] = n
1002 1002 entry = struct.pack(self.indexformat, *e)
1003 1003
1004 1004 if not self.inlinedata():
1005 1005 transaction.add(self.datafile, offset)
1006 1006 transaction.add(self.indexfile, n * len(entry))
1007 1007 f = self.opener(self.datafile, "a")
1008 1008 if data[0]:
1009 1009 f.write(data[0])
1010 1010 f.write(data[1])
1011 1011 f.close()
1012 1012 f = self.opener(self.indexfile, "a")
1013 1013 else:
1014 1014 f = self.opener(self.indexfile, "a+")
1015 1015 f.seek(0, 2)
1016 1016 transaction.add(self.indexfile, f.tell(), self.count() - 1)
1017 1017
1018 1018 if len(self.index) == 1 and self.version != REVLOGV0:
1019 1019 l = struct.pack(versionformat, self.version)
1020 1020 f.write(l)
1021 1021 entry = entry[4:]
1022 1022
1023 1023 f.write(entry)
1024 1024
1025 1025 if self.inlinedata():
1026 1026 f.write(data[0])
1027 1027 f.write(data[1])
1028 1028 self.checkinlinesize(transaction, f)
1029 1029
1030 1030 self.cache = (node, n, text)
1031 1031 return node
1032 1032
1033 1033 def ancestor(self, a, b):
1034 1034 """calculate the least common ancestor of nodes a and b"""
1035 1035
1036 1036 def parents(rev):
1037 1037 return [p for p in self.parentrevs(rev) if p != -1]
1038 1038
1039 1039 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1040 1040 if c is None:
1041 1041 return nullid
1042 1042
1043 1043 return self.node(c)
1044 1044
1045 1045 def group(self, nodelist, lookup, infocollect=None):
1046 1046 """calculate a delta group
1047 1047
1048 1048 Given a list of changeset revs, return a set of deltas and
1049 1049 metadata corresponding to nodes. the first delta is
1050 1050 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1051 1051 have this parent as it has all history before these
1052 1052 changesets. parent is parent[0]
1053 1053 """
1054 1054 revs = [self.rev(n) for n in nodelist]
1055 1055
1056 1056 # if we don't have any revisions touched by these changesets, bail
1057 1057 if not revs:
1058 1058 yield changegroup.closechunk()
1059 1059 return
1060 1060
1061 1061 # add the parent of the first rev
1062 1062 p = self.parents(self.node(revs[0]))[0]
1063 1063 revs.insert(0, self.rev(p))
1064 1064
1065 1065 # build deltas
1066 1066 for d in xrange(0, len(revs) - 1):
1067 1067 a, b = revs[d], revs[d + 1]
1068 1068 nb = self.node(b)
1069 1069
1070 1070 if infocollect is not None:
1071 1071 infocollect(nb)
1072 1072
1073 1073 d = self.revdiff(a, b)
1074 1074 p = self.parents(nb)
1075 1075 meta = nb + p[0] + p[1] + lookup(nb)
1076 1076 yield changegroup.genchunk("%s%s" % (meta, d))
1077 1077
1078 1078 yield changegroup.closechunk()
1079 1079
1080 1080 def addgroup(self, revs, linkmapper, transaction, unique=0):
1081 1081 """
1082 1082 add a delta group
1083 1083
1084 1084 given a set of deltas, add them to the revision log. the
1085 1085 first delta is against its parent, which should be in our
1086 1086 log, the rest are against the previous delta.
1087 1087 """
1088 1088
1089 1089 #track the base of the current delta log
1090 1090 r = self.count()
1091 1091 t = r - 1
1092 1092 node = None
1093 1093
1094 1094 base = prev = -1
1095 1095 start = end = textlen = 0
1096 1096 if r:
1097 1097 end = self.end(t)
1098 1098
1099 1099 ifh = self.opener(self.indexfile, "a+")
1100 1100 ifh.seek(0, 2)
1101 1101 transaction.add(self.indexfile, ifh.tell(), self.count())
1102 1102 if self.inlinedata():
1103 1103 dfh = None
1104 1104 else:
1105 1105 transaction.add(self.datafile, end)
1106 1106 dfh = self.opener(self.datafile, "a")
1107 1107
1108 1108 # loop through our set of deltas
1109 1109 chain = None
1110 1110 for chunk in revs:
1111 1111 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1112 1112 link = linkmapper(cs)
1113 1113 if node in self.nodemap:
1114 1114 # this can happen if two branches make the same change
1115 1115 # if unique:
1116 1116 # raise RevlogError(_("already have %s") % hex(node[:4]))
1117 1117 chain = node
1118 1118 continue
1119 1119 delta = chunk[80:]
1120 1120
1121 1121 for p in (p1, p2):
1122 1122 if not p in self.nodemap:
1123 1123 raise RevlogError(_("unknown parent %s") % short(p))
1124 1124
1125 1125 if not chain:
1126 1126 # retrieve the parent revision of the delta chain
1127 1127 chain = p1
1128 1128 if not chain in self.nodemap:
1129 1129 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1130 1130
1131 1131 # full versions are inserted when the needed deltas become
1132 1132 # comparable to the uncompressed text or when the previous
1133 1133 # version is not the one we have a delta against. We use
1134 1134 # the size of the previous full rev as a proxy for the
1135 1135 # current size.
1136 1136
1137 1137 if chain == prev:
1138 1138 tempd = compress(delta)
1139 1139 cdelta = tempd[0] + tempd[1]
1140 1140 textlen = mdiff.patchedsize(textlen, delta)
1141 1141
1142 1142 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1143 1143 # flush our writes here so we can read it in revision
1144 1144 if dfh:
1145 1145 dfh.flush()
1146 1146 ifh.flush()
1147 1147 text = self.revision(chain)
1148 1148 text = self.patches(text, [delta])
1149 1149 chk = self.addrevision(text, transaction, link, p1, p2)
1150 1150 if chk != node:
1151 1151 raise RevlogError(_("consistency error adding group"))
1152 1152 textlen = len(text)
1153 1153 else:
1154 1154 if self.version == REVLOGV0:
1155 1155 e = (end, len(cdelta), base, link, p1, p2, node)
1156 1156 else:
1157 1157 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1158 1158 link, self.rev(p1), self.rev(p2), node)
1159 1159 self.index.append(e)
1160 1160 self.nodemap[node] = r
1161 1161 if self.inlinedata():
1162 1162 ifh.write(struct.pack(self.indexformat, *e))
1163 1163 ifh.write(cdelta)
1164 1164 self.checkinlinesize(transaction, ifh)
1165 1165 if not self.inlinedata():
1166 1166 dfh = self.opener(self.datafile, "a")
1167 1167 ifh = self.opener(self.indexfile, "a")
1168 1168 else:
1169 1169 if not dfh:
1170 1170 # addrevision switched from inline to conventional
1171 1171 # reopen the index
1172 1172 dfh = self.opener(self.datafile, "a")
1173 1173 ifh = self.opener(self.indexfile, "a")
1174 1174 dfh.write(cdelta)
1175 1175 ifh.write(struct.pack(self.indexformat, *e))
1176 1176
1177 1177 t, r, chain, prev = r, r + 1, node, node
1178 1178 base = self.base(t)
1179 1179 start = self.start(base)
1180 1180 end = self.end(t)
1181 1181
1182 1182 return node
1183 1183
1184 1184 def strip(self, rev, minlink):
1185 1185 if self.count() == 0 or rev >= self.count():
1186 1186 return
1187 1187
1188 1188 if isinstance(self.index, lazyindex):
1189 1189 self.loadindexmap()
1190 1190
1191 1191 # When stripping away a revision, we need to make sure it
1192 1192 # does not actually belong to an older changeset.
1193 1193 # The minlink parameter defines the oldest revision
1194 1194 # we're allowed to strip away.
1195 1195 while minlink > self.index[rev][-4]:
1196 1196 rev += 1
1197 1197 if rev >= self.count():
1198 1198 return
1199 1199
1200 1200 # first truncate the files on disk
1201 1201 end = self.start(rev)
1202 1202 if not self.inlinedata():
1203 1203 df = self.opener(self.datafile, "a")
1204 1204 df.truncate(end)
1205 1205 end = rev * struct.calcsize(self.indexformat)
1206 1206 else:
1207 1207 end += rev * struct.calcsize(self.indexformat)
1208 1208
1209 1209 indexf = self.opener(self.indexfile, "a")
1210 1210 indexf.truncate(end)
1211 1211
1212 1212 # then reset internal state in memory to forget those revisions
1213 1213 self.cache = None
1214 1214 self.chunkcache = None
1215 1215 for x in xrange(rev, self.count()):
1216 1216 del self.nodemap[self.node(x)]
1217 1217
1218 1218 del self.index[rev:]
1219 1219
1220 1220 def checksize(self):
1221 1221 expected = 0
1222 1222 if self.count():
1223 1223 expected = self.end(self.count() - 1)
1224 1224
1225 1225 try:
1226 1226 f = self.opener(self.datafile)
1227 1227 f.seek(0, 2)
1228 1228 actual = f.tell()
1229 1229 dd = actual - expected
1230 1230 except IOError, inst:
1231 1231 if inst.errno != errno.ENOENT:
1232 1232 raise
1233 1233 dd = 0
1234 1234
1235 1235 try:
1236 1236 f = self.opener(self.indexfile)
1237 1237 f.seek(0, 2)
1238 1238 actual = f.tell()
1239 1239 s = struct.calcsize(self.indexformat)
1240 1240 i = actual / s
1241 1241 di = actual - (i * s)
1242 1242 if self.inlinedata():
1243 1243 databytes = 0
1244 1244 for r in xrange(self.count()):
1245 1245 databytes += self.length(r)
1246 1246 dd = 0
1247 1247 di = actual - self.count() * s - databytes
1248 1248 except IOError, inst:
1249 1249 if inst.errno != errno.ENOENT:
1250 1250 raise
1251 1251 di = 0
1252 1252
1253 1253 return (dd, di)
1254 1254
1255 1255
General Comments 0
You need to be logged in to leave comments. Login now