##// END OF EJS Templates
bundle: move chunk parsing into unbundle class
Matt Mackall -
r12336:9d234f7d default
parent child Browse files
Show More
@@ -1,193 +1,202 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import util
10 10 import struct, os, bz2, zlib, tempfile
11 11
12 12 def getchunk(source):
13 13 """return the next chunk from changegroup 'source' as a string"""
14 14 d = source.read(4)
15 15 if not d:
16 16 return ""
17 17 l = struct.unpack(">l", d)[0]
18 18 if l <= 4:
19 19 return ""
20 20 d = source.read(l - 4)
21 21 if len(d) < l - 4:
22 22 raise util.Abort(_("premature EOF reading chunk"
23 23 " (got %d bytes, expected %d)")
24 24 % (len(d), l - 4))
25 25 return d
26 26
27 27 def chunkheader(length):
28 28 """return a changegroup chunk header (string)"""
29 29 return struct.pack(">l", length + 4)
30 30
31 31 def closechunk():
32 32 """return a changegroup chunk header (string) for a zero-length chunk"""
33 33 return struct.pack(">l", 0)
34 34
35 35 class nocompress(object):
36 36 def compress(self, x):
37 37 return x
38 38 def flush(self):
39 39 return ""
40 40
41 41 bundletypes = {
42 42 "": ("", nocompress),
43 43 "HG10UN": ("HG10UN", nocompress),
44 44 "HG10BZ": ("HG10", lambda: bz2.BZ2Compressor()),
45 45 "HG10GZ": ("HG10GZ", lambda: zlib.compressobj()),
46 46 }
47 47
48 48 def collector(cl, mmfs, files):
49 49 # Gather information about changeset nodes going out in a bundle.
50 50 # We want to gather manifests needed and filelogs affected.
51 51 def collect(node):
52 52 c = cl.read(node)
53 53 files.update(c[3])
54 54 mmfs.setdefault(c[0], node)
55 55 return collect
56 56
57 57 # hgweb uses this list to communicate its preferred type
58 58 bundlepriority = ['HG10GZ', 'HG10BZ', 'HG10UN']
59 59
60 60 def writebundle(cg, filename, bundletype):
61 61 """Write a bundle file and return its filename.
62 62
63 63 Existing files will not be overwritten.
64 64 If no filename is specified, a temporary file is created.
65 65 bz2 compression can be turned off.
66 66 The bundle file will be deleted in case of errors.
67 67 """
68 68
69 69 fh = None
70 70 cleanup = None
71 71 try:
72 72 if filename:
73 73 fh = open(filename, "wb")
74 74 else:
75 75 fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")
76 76 fh = os.fdopen(fd, "wb")
77 77 cleanup = filename
78 78
79 79 header, compressor = bundletypes[bundletype]
80 80 fh.write(header)
81 81 z = compressor()
82 82
83 83 # parse the changegroup data, otherwise we will block
84 84 # in case of sshrepo because we don't know the end of the stream
85 85
86 86 # an empty chunkgroup is the end of the changegroup
87 87 # a changegroup has at least 2 chunkgroups (changelog and manifest).
88 88 # after that, an empty chunkgroup is the end of the changegroup
89 89 empty = False
90 90 count = 0
91 91 while not empty or count <= 2:
92 92 empty = True
93 93 count += 1
94 94 while 1:
95 95 chunk = getchunk(cg)
96 96 if not chunk:
97 97 break
98 98 empty = False
99 99 fh.write(z.compress(chunkheader(len(chunk))))
100 100 pos = 0
101 101 while pos < len(chunk):
102 102 next = pos + 2**20
103 103 fh.write(z.compress(chunk[pos:next]))
104 104 pos = next
105 105 fh.write(z.compress(closechunk()))
106 106 fh.write(z.flush())
107 107 cleanup = None
108 108 return filename
109 109 finally:
110 110 if fh is not None:
111 111 fh.close()
112 112 if cleanup is not None:
113 113 os.unlink(cleanup)
114 114
115 115 def decompressor(fh, alg):
116 116 if alg == 'UN':
117 117 return fh
118 118 elif alg == 'GZ':
119 119 def generator(f):
120 120 zd = zlib.decompressobj()
121 121 for chunk in f:
122 122 yield zd.decompress(chunk)
123 123 elif alg == 'BZ':
124 124 def generator(f):
125 125 zd = bz2.BZ2Decompressor()
126 126 zd.decompress("BZ")
127 127 for chunk in util.filechunkiter(f, 4096):
128 128 yield zd.decompress(chunk)
129 129 else:
130 130 raise util.Abort("unknown bundle compression '%s'" % alg)
131 131 return util.chunkbuffer(generator(fh))
132 132
133 133 class unbundle10(object):
134 134 def __init__(self, fh, alg):
135 135 self._stream = decompressor(fh, alg)
136 136 self._type = alg
137 137 self.callback = None
138 138 def compressed(self):
139 139 return self._type != 'UN'
140 140 def read(self, l):
141 141 return self._stream.read(l)
142 142 def seek(self, pos):
143 143 return self._stream.seek(pos)
144 144 def tell(self):
145 145 return self._stream.tell()
146 146
147 147 def chunklength(self):
148 148 d = self.read(4)
149 149 if not d:
150 150 return 0
151 151 l = max(0, struct.unpack(">l", d)[0] - 4)
152 152 if l and self.callback:
153 153 self.callback()
154 154 return l
155 155
156 156 def chunk(self):
157 157 """return the next chunk from changegroup 'source' as a string"""
158 158 l = self.chunklength()
159 159 d = self.read(l)
160 160 if len(d) < l:
161 161 raise util.Abort(_("premature EOF reading chunk"
162 162 " (got %d bytes, expected %d)")
163 163 % (len(d), l))
164 164 return d
165 165
166 def parsechunk(self):
167 l = self.chunklength()
168 if not l:
169 return {}
170 h = self.read(80)
171 node, p1, p2, cs = struct.unpack("20s20s20s20s", h)
172 data = self.read(l - 80)
173 return dict(node=node, p1=p1, p2=p2, cs=cs, data=data)
174
166 175 class headerlessfixup(object):
167 176 def __init__(self, fh, h):
168 177 self._h = h
169 178 self._fh = fh
170 179 def read(self, n):
171 180 if self._h:
172 181 d, self._h = self._h[:n], self._h[n:]
173 182 if len(d) < n:
174 183 d += self._fh.read(n - len(d))
175 184 return d
176 185 return self._fh.read(n)
177 186
178 187 def readbundle(fh, fname):
179 188 header = fh.read(6)
180 189
181 190 if not fname:
182 191 fname = "stream"
183 192 if not header.startswith('HG') and header.startswith('\0'):
184 193 fh = headerlessfixup(fh, header)
185 194 header = "HG10UN"
186 195
187 196 magic, version, alg = header[0:2], header[2:4], header[4:6]
188 197
189 198 if magic != 'HG':
190 199 raise util.Abort(_('%s: not a Mercurial bundle') % fname)
191 200 if version != '10':
192 201 raise util.Abort(_('%s: unknown bundle version %s') % (fname, version))
193 202 return unbundle10(fh, alg)
@@ -1,1484 +1,1487 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 16 from i18n import _
17 17 import changegroup, ancestor, mdiff, parsers, error, util
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGSHALLOW = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
35 35
36 36 # revlog index flags
37 37 REVIDX_PARENTDELTA = 1
38 38 REVIDX_PUNCHED_FLAG = 2
39 39 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
40 40
41 41 # amount of data read unconditionally, should be >= 4
42 42 # when not inline: threshold for using lazy index
43 43 _prereadsize = 1048576
44 44 # max size of revlog with inline data
45 45 _maxinline = 131072
46 46
47 47 RevlogError = error.RevlogError
48 48 LookupError = error.LookupError
49 49
50 50 def getoffset(q):
51 51 return int(q >> 16)
52 52
53 53 def gettype(q):
54 54 return int(q & 0xFFFF)
55 55
56 56 def offset_type(offset, type):
57 57 return long(long(offset) << 16 | type)
58 58
59 59 nullhash = _sha(nullid)
60 60
61 61 def hash(text, p1, p2):
62 62 """generate a hash from the given text and its parent hashes
63 63
64 64 This hash combines both the current file contents and its history
65 65 in a manner that makes it easy to distinguish nodes with the same
66 66 content in the revision graph.
67 67 """
68 68 # As of now, if one of the parent node is null, p2 is null
69 69 if p2 == nullid:
70 70 # deep copy of a hash is faster than creating one
71 71 s = nullhash.copy()
72 72 s.update(p1)
73 73 else:
74 74 # none of the parent nodes are nullid
75 75 l = [p1, p2]
76 76 l.sort()
77 77 s = _sha(l[0])
78 78 s.update(l[1])
79 79 s.update(text)
80 80 return s.digest()
81 81
82 82 def compress(text):
83 83 """ generate a possibly-compressed representation of text """
84 84 if not text:
85 85 return ("", text)
86 86 l = len(text)
87 87 bin = None
88 88 if l < 44:
89 89 pass
90 90 elif l > 1000000:
91 91 # zlib makes an internal copy, thus doubling memory usage for
92 92 # large files, so lets do this in pieces
93 93 z = zlib.compressobj()
94 94 p = []
95 95 pos = 0
96 96 while pos < l:
97 97 pos2 = pos + 2**20
98 98 p.append(z.compress(text[pos:pos2]))
99 99 pos = pos2
100 100 p.append(z.flush())
101 101 if sum(map(len, p)) < l:
102 102 bin = "".join(p)
103 103 else:
104 104 bin = _compress(text)
105 105 if bin is None or len(bin) > l:
106 106 if text[0] == '\0':
107 107 return ("", text)
108 108 return ('u', text)
109 109 return ("", bin)
110 110
111 111 def decompress(bin):
112 112 """ decompress the given input """
113 113 if not bin:
114 114 return bin
115 115 t = bin[0]
116 116 if t == '\0':
117 117 return bin
118 118 if t == 'x':
119 119 return _decompress(bin)
120 120 if t == 'u':
121 121 return bin[1:]
122 122 raise RevlogError(_("unknown compression type %r") % t)
123 123
124 124 class lazyparser(object):
125 125 """
126 126 this class avoids the need to parse the entirety of large indices
127 127 """
128 128
129 129 # lazyparser is not safe to use on windows if win32 extensions not
130 130 # available. it keeps file handle open, which make it not possible
131 131 # to break hardlinks on local cloned repos.
132 132
133 133 def __init__(self, dataf):
134 134 try:
135 135 size = util.fstat(dataf).st_size
136 136 except AttributeError:
137 137 size = 0
138 138 self.dataf = dataf
139 139 self.s = struct.calcsize(indexformatng)
140 140 self.datasize = size
141 141 self.l = size // self.s
142 142 self.index = [None] * self.l
143 143 self.map = {nullid: nullrev}
144 144 self.allmap = 0
145 145 self.all = 0
146 146 self.mapfind_count = 0
147 147
148 148 def loadmap(self):
149 149 """
150 150 during a commit, we need to make sure the rev being added is
151 151 not a duplicate. This requires loading the entire index,
152 152 which is fairly slow. loadmap can load up just the node map,
153 153 which takes much less time.
154 154 """
155 155 if self.allmap:
156 156 return
157 157 end = self.datasize
158 158 self.allmap = 1
159 159 cur = 0
160 160 count = 0
161 161 blocksize = self.s * 256
162 162 self.dataf.seek(0)
163 163 while cur < end:
164 164 data = self.dataf.read(blocksize)
165 165 off = 0
166 166 for x in xrange(256):
167 167 n = data[off + ngshaoffset:off + ngshaoffset + 20]
168 168 self.map[n] = count
169 169 count += 1
170 170 if count >= self.l:
171 171 break
172 172 off += self.s
173 173 cur += blocksize
174 174
175 175 def loadblock(self, blockstart, blocksize, data=None):
176 176 if self.all:
177 177 return
178 178 if data is None:
179 179 self.dataf.seek(blockstart)
180 180 if blockstart + blocksize > self.datasize:
181 181 # the revlog may have grown since we've started running,
182 182 # but we don't have space in self.index for more entries.
183 183 # limit blocksize so that we don't get too much data.
184 184 blocksize = max(self.datasize - blockstart, 0)
185 185 data = self.dataf.read(blocksize)
186 186 lend = len(data) // self.s
187 187 i = blockstart // self.s
188 188 off = 0
189 189 # lazyindex supports __delitem__
190 190 if lend > len(self.index) - i:
191 191 lend = len(self.index) - i
192 192 for x in xrange(lend):
193 193 if self.index[i + x] is None:
194 194 b = data[off : off + self.s]
195 195 self.index[i + x] = b
196 196 n = b[ngshaoffset:ngshaoffset + 20]
197 197 self.map[n] = i + x
198 198 off += self.s
199 199
200 200 def findnode(self, node):
201 201 """search backwards through the index file for a specific node"""
202 202 if self.allmap:
203 203 return None
204 204
205 205 # hg log will cause many many searches for the manifest
206 206 # nodes. After we get called a few times, just load the whole
207 207 # thing.
208 208 if self.mapfind_count > 8:
209 209 self.loadmap()
210 210 if node in self.map:
211 211 return node
212 212 return None
213 213 self.mapfind_count += 1
214 214 last = self.l - 1
215 215 while self.index[last] != None:
216 216 if last == 0:
217 217 self.all = 1
218 218 self.allmap = 1
219 219 return None
220 220 last -= 1
221 221 end = (last + 1) * self.s
222 222 blocksize = self.s * 256
223 223 while end >= 0:
224 224 start = max(end - blocksize, 0)
225 225 self.dataf.seek(start)
226 226 data = self.dataf.read(end - start)
227 227 findend = end - start
228 228 while True:
229 229 # we're searching backwards, so we have to make sure
230 230 # we don't find a changeset where this node is a parent
231 231 off = data.find(node, 0, findend)
232 232 findend = off
233 233 if off >= 0:
234 234 i = off / self.s
235 235 off = i * self.s
236 236 n = data[off + ngshaoffset:off + ngshaoffset + 20]
237 237 if n == node:
238 238 self.map[n] = i + start / self.s
239 239 return node
240 240 else:
241 241 break
242 242 end -= blocksize
243 243 return None
244 244
245 245 def loadindex(self, i=None, end=None):
246 246 if self.all:
247 247 return
248 248 all = False
249 249 if i is None:
250 250 blockstart = 0
251 251 blocksize = (65536 / self.s) * self.s
252 252 end = self.datasize
253 253 all = True
254 254 else:
255 255 if end:
256 256 blockstart = i * self.s
257 257 end = end * self.s
258 258 blocksize = end - blockstart
259 259 else:
260 260 blockstart = (i & ~1023) * self.s
261 261 blocksize = self.s * 1024
262 262 end = blockstart + blocksize
263 263 while blockstart < end:
264 264 self.loadblock(blockstart, blocksize)
265 265 blockstart += blocksize
266 266 if all:
267 267 self.all = True
268 268
269 269 class lazyindex(object):
270 270 """a lazy version of the index array"""
271 271 def __init__(self, parser):
272 272 self.p = parser
273 273 def __len__(self):
274 274 return len(self.p.index)
275 275 def load(self, pos):
276 276 if pos < 0:
277 277 pos += len(self.p.index)
278 278 self.p.loadindex(pos)
279 279 return self.p.index[pos]
280 280 def __getitem__(self, pos):
281 281 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
282 282 def __setitem__(self, pos, item):
283 283 self.p.index[pos] = _pack(indexformatng, *item)
284 284 def __delitem__(self, pos):
285 285 del self.p.index[pos]
286 286 def insert(self, pos, e):
287 287 self.p.index.insert(pos, _pack(indexformatng, *e))
288 288 def append(self, e):
289 289 self.p.index.append(_pack(indexformatng, *e))
290 290
291 291 class lazymap(object):
292 292 """a lazy version of the node map"""
293 293 def __init__(self, parser):
294 294 self.p = parser
295 295 def load(self, key):
296 296 n = self.p.findnode(key)
297 297 if n is None:
298 298 raise KeyError(key)
299 299 def __contains__(self, key):
300 300 if key in self.p.map:
301 301 return True
302 302 self.p.loadmap()
303 303 return key in self.p.map
304 304 def __iter__(self):
305 305 yield nullid
306 306 for i, ret in enumerate(self.p.index):
307 307 if not ret:
308 308 self.p.loadindex(i)
309 309 ret = self.p.index[i]
310 310 if isinstance(ret, str):
311 311 ret = _unpack(indexformatng, ret)
312 312 yield ret[7]
313 313 def __getitem__(self, key):
314 314 try:
315 315 return self.p.map[key]
316 316 except KeyError:
317 317 try:
318 318 self.load(key)
319 319 return self.p.map[key]
320 320 except KeyError:
321 321 raise KeyError("node " + hex(key))
322 322 def __setitem__(self, key, val):
323 323 self.p.map[key] = val
324 324 def __delitem__(self, key):
325 325 del self.p.map[key]
326 326
327 327 indexformatv0 = ">4l20s20s20s"
328 328 v0shaoffset = 56
329 329
330 330 class revlogoldio(object):
331 331 def __init__(self):
332 332 self.size = struct.calcsize(indexformatv0)
333 333
334 334 def parseindex(self, fp, data, inline):
335 335 s = self.size
336 336 index = []
337 337 nodemap = {nullid: nullrev}
338 338 n = off = 0
339 339 if len(data) == _prereadsize:
340 340 data += fp.read() # read the rest
341 341 l = len(data)
342 342 while off + s <= l:
343 343 cur = data[off:off + s]
344 344 off += s
345 345 e = _unpack(indexformatv0, cur)
346 346 # transform to revlogv1 format
347 347 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
348 348 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
349 349 index.append(e2)
350 350 nodemap[e[6]] = n
351 351 n += 1
352 352
353 353 return index, nodemap, None
354 354
355 355 def packentry(self, entry, node, version, rev):
356 356 if gettype(entry[0]):
357 357 raise RevlogError(_("index entry flags need RevlogNG"))
358 358 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
359 359 node(entry[5]), node(entry[6]), entry[7])
360 360 return _pack(indexformatv0, *e2)
361 361
362 362 # index ng:
363 363 # 6 bytes: offset
364 364 # 2 bytes: flags
365 365 # 4 bytes: compressed length
366 366 # 4 bytes: uncompressed length
367 367 # 4 bytes: base rev
368 368 # 4 bytes: link rev
369 369 # 4 bytes: parent 1 rev
370 370 # 4 bytes: parent 2 rev
371 371 # 32 bytes: nodeid
372 372 indexformatng = ">Qiiiiii20s12x"
373 373 ngshaoffset = 32
374 374 versionformat = ">I"
375 375
376 376 class revlogio(object):
377 377 def __init__(self):
378 378 self.size = struct.calcsize(indexformatng)
379 379
380 380 def parseindex(self, fp, data, inline):
381 381 if len(data) == _prereadsize:
382 382 if util.openhardlinks() and not inline:
383 383 # big index, let's parse it on demand
384 384 parser = lazyparser(fp)
385 385 index = lazyindex(parser)
386 386 nodemap = lazymap(parser)
387 387 e = list(index[0])
388 388 type = gettype(e[0])
389 389 e[0] = offset_type(0, type)
390 390 index[0] = e
391 391 return index, nodemap, None
392 392 else:
393 393 data += fp.read()
394 394
395 395 # call the C implementation to parse the index data
396 396 index, nodemap, cache = parsers.parse_index(data, inline)
397 397 return index, nodemap, cache
398 398
399 399 def packentry(self, entry, node, version, rev):
400 400 p = _pack(indexformatng, *entry)
401 401 if rev == 0:
402 402 p = _pack(versionformat, version) + p[4:]
403 403 return p
404 404
405 405 class revlog(object):
406 406 """
407 407 the underlying revision storage object
408 408
409 409 A revlog consists of two parts, an index and the revision data.
410 410
411 411 The index is a file with a fixed record size containing
412 412 information on each revision, including its nodeid (hash), the
413 413 nodeids of its parents, the position and offset of its data within
414 414 the data file, and the revision it's based on. Finally, each entry
415 415 contains a linkrev entry that can serve as a pointer to external
416 416 data.
417 417
418 418 The revision data itself is a linear collection of data chunks.
419 419 Each chunk represents a revision and is usually represented as a
420 420 delta against the previous chunk. To bound lookup time, runs of
421 421 deltas are limited to about 2 times the length of the original
422 422 version data. This makes retrieval of a version proportional to
423 423 its size, or O(1) relative to the number of revisions.
424 424
425 425 Both pieces of the revlog are written to in an append-only
426 426 fashion, which means we never need to rewrite a file to insert or
427 427 remove data, and can use some simple techniques to avoid the need
428 428 for locking while reading.
429 429 """
430 430 def __init__(self, opener, indexfile, shallowroot=None):
431 431 """
432 432 create a revlog object
433 433
434 434 opener is a function that abstracts the file opening operation
435 435 and can be used to implement COW semantics or the like.
436 436 """
437 437 self.indexfile = indexfile
438 438 self.datafile = indexfile[:-2] + ".d"
439 439 self.opener = opener
440 440 self._cache = None
441 441 self._chunkcache = (0, '')
442 442 self.nodemap = {nullid: nullrev}
443 443 self.index = []
444 444 self._shallowroot = shallowroot
445 445 self._parentdelta = 0
446 446
447 447 v = REVLOG_DEFAULT_VERSION
448 448 if hasattr(opener, 'options') and 'defversion' in opener.options:
449 449 v = opener.options['defversion']
450 450 if v & REVLOGNG:
451 451 v |= REVLOGNGINLINEDATA
452 452 if v & REVLOGNG and 'parentdelta' in opener.options:
453 453 self._parentdelta = 1
454 454
455 455 if shallowroot:
456 456 v |= REVLOGSHALLOW
457 457
458 458 i = ''
459 459 try:
460 460 f = self.opener(self.indexfile)
461 461 if "nonlazy" in getattr(self.opener, 'options', {}):
462 462 i = f.read()
463 463 else:
464 464 i = f.read(_prereadsize)
465 465 if len(i) > 0:
466 466 v = struct.unpack(versionformat, i[:4])[0]
467 467 except IOError, inst:
468 468 if inst.errno != errno.ENOENT:
469 469 raise
470 470
471 471 self.version = v
472 472 self._inline = v & REVLOGNGINLINEDATA
473 473 self._shallow = v & REVLOGSHALLOW
474 474 flags = v & ~0xFFFF
475 475 fmt = v & 0xFFFF
476 476 if fmt == REVLOGV0 and flags:
477 477 raise RevlogError(_("index %s unknown flags %#04x for format v0")
478 478 % (self.indexfile, flags >> 16))
479 479 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
480 480 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
481 481 % (self.indexfile, flags >> 16))
482 482 elif fmt > REVLOGNG:
483 483 raise RevlogError(_("index %s unknown format %d")
484 484 % (self.indexfile, fmt))
485 485
486 486 self._io = revlogio()
487 487 if self.version == REVLOGV0:
488 488 self._io = revlogoldio()
489 489 if i:
490 490 try:
491 491 d = self._io.parseindex(f, i, self._inline)
492 492 except (ValueError, IndexError):
493 493 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
494 494 self.index, self.nodemap, self._chunkcache = d
495 495 if not self._chunkcache:
496 496 self._chunkclear()
497 497
498 498 # add the magic null revision at -1 (if it hasn't been done already)
499 499 if (self.index == [] or isinstance(self.index, lazyindex) or
500 500 self.index[-1][7] != nullid) :
501 501 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
502 502
503 503 def _loadindex(self, start, end):
504 504 """load a block of indexes all at once from the lazy parser"""
505 505 if isinstance(self.index, lazyindex):
506 506 self.index.p.loadindex(start, end)
507 507
508 508 def _loadindexmap(self):
509 509 """loads both the map and the index from the lazy parser"""
510 510 if isinstance(self.index, lazyindex):
511 511 p = self.index.p
512 512 p.loadindex()
513 513 self.nodemap = p.map
514 514
515 515 def _loadmap(self):
516 516 """loads the map from the lazy parser"""
517 517 if isinstance(self.nodemap, lazymap):
518 518 self.nodemap.p.loadmap()
519 519 self.nodemap = self.nodemap.p.map
520 520
521 521 def tip(self):
522 522 return self.node(len(self.index) - 2)
523 523 def __len__(self):
524 524 return len(self.index) - 1
525 525 def __iter__(self):
526 526 for i in xrange(len(self)):
527 527 yield i
528 528 def rev(self, node):
529 529 try:
530 530 return self.nodemap[node]
531 531 except KeyError:
532 532 raise LookupError(node, self.indexfile, _('no node'))
533 533 def node(self, rev):
534 534 return self.index[rev][7]
535 535 def linkrev(self, rev):
536 536 return self.index[rev][4]
537 537 def parents(self, node):
538 538 i = self.index
539 539 d = i[self.rev(node)]
540 540 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
541 541 def parentrevs(self, rev):
542 542 return self.index[rev][5:7]
543 543 def start(self, rev):
544 544 return int(self.index[rev][0] >> 16)
545 545 def end(self, rev):
546 546 return self.start(rev) + self.length(rev)
547 547 def length(self, rev):
548 548 return self.index[rev][1]
549 549 def base(self, rev):
550 550 return self.index[rev][3]
551 551 def flags(self, rev):
552 552 return self.index[rev][0] & 0xFFFF
553 553 def rawsize(self, rev):
554 554 """return the length of the uncompressed text for a given revision"""
555 555 l = self.index[rev][2]
556 556 if l >= 0:
557 557 return l
558 558
559 559 t = self.revision(self.node(rev))
560 560 return len(t)
561 561 size = rawsize
562 562
563 563 def reachable(self, node, stop=None):
564 564 """return the set of all nodes ancestral to a given node, including
565 565 the node itself, stopping when stop is matched"""
566 566 reachable = set((node,))
567 567 visit = [node]
568 568 if stop:
569 569 stopn = self.rev(stop)
570 570 else:
571 571 stopn = 0
572 572 while visit:
573 573 n = visit.pop(0)
574 574 if n == stop:
575 575 continue
576 576 if n == nullid:
577 577 continue
578 578 for p in self.parents(n):
579 579 if self.rev(p) < stopn:
580 580 continue
581 581 if p not in reachable:
582 582 reachable.add(p)
583 583 visit.append(p)
584 584 return reachable
585 585
586 586 def ancestors(self, *revs):
587 587 """Generate the ancestors of 'revs' in reverse topological order.
588 588
589 589 Yield a sequence of revision numbers starting with the parents
590 590 of each revision in revs, i.e., each revision is *not* considered
591 591 an ancestor of itself. Results are in breadth-first order:
592 592 parents of each rev in revs, then parents of those, etc. Result
593 593 does not include the null revision."""
594 594 visit = list(revs)
595 595 seen = set([nullrev])
596 596 while visit:
597 597 for parent in self.parentrevs(visit.pop(0)):
598 598 if parent not in seen:
599 599 visit.append(parent)
600 600 seen.add(parent)
601 601 yield parent
602 602
603 603 def descendants(self, *revs):
604 604 """Generate the descendants of 'revs' in revision order.
605 605
606 606 Yield a sequence of revision numbers starting with a child of
607 607 some rev in revs, i.e., each revision is *not* considered a
608 608 descendant of itself. Results are ordered by revision number (a
609 609 topological sort)."""
610 610 seen = set(revs)
611 611 for i in xrange(min(revs) + 1, len(self)):
612 612 for x in self.parentrevs(i):
613 613 if x != nullrev and x in seen:
614 614 seen.add(i)
615 615 yield i
616 616 break
617 617
618 618 def findmissing(self, common=None, heads=None):
619 619 """Return the ancestors of heads that are not ancestors of common.
620 620
621 621 More specifically, return a list of nodes N such that every N
622 622 satisfies the following constraints:
623 623
624 624 1. N is an ancestor of some node in 'heads'
625 625 2. N is not an ancestor of any node in 'common'
626 626
627 627 The list is sorted by revision number, meaning it is
628 628 topologically sorted.
629 629
630 630 'heads' and 'common' are both lists of node IDs. If heads is
631 631 not supplied, uses all of the revlog's heads. If common is not
632 632 supplied, uses nullid."""
633 633 if common is None:
634 634 common = [nullid]
635 635 if heads is None:
636 636 heads = self.heads()
637 637
638 638 common = [self.rev(n) for n in common]
639 639 heads = [self.rev(n) for n in heads]
640 640
641 641 # we want the ancestors, but inclusive
642 642 has = set(self.ancestors(*common))
643 643 has.add(nullrev)
644 644 has.update(common)
645 645
646 646 # take all ancestors from heads that aren't in has
647 647 missing = set()
648 648 visit = [r for r in heads if r not in has]
649 649 while visit:
650 650 r = visit.pop(0)
651 651 if r in missing:
652 652 continue
653 653 else:
654 654 missing.add(r)
655 655 for p in self.parentrevs(r):
656 656 if p not in has:
657 657 visit.append(p)
658 658 missing = list(missing)
659 659 missing.sort()
660 660 return [self.node(r) for r in missing]
661 661
662 662 def nodesbetween(self, roots=None, heads=None):
663 663 """Return a topological path from 'roots' to 'heads'.
664 664
665 665 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
666 666 topologically sorted list of all nodes N that satisfy both of
667 667 these constraints:
668 668
669 669 1. N is a descendant of some node in 'roots'
670 670 2. N is an ancestor of some node in 'heads'
671 671
672 672 Every node is considered to be both a descendant and an ancestor
673 673 of itself, so every reachable node in 'roots' and 'heads' will be
674 674 included in 'nodes'.
675 675
676 676 'outroots' is the list of reachable nodes in 'roots', i.e., the
677 677 subset of 'roots' that is returned in 'nodes'. Likewise,
678 678 'outheads' is the subset of 'heads' that is also in 'nodes'.
679 679
680 680 'roots' and 'heads' are both lists of node IDs. If 'roots' is
681 681 unspecified, uses nullid as the only root. If 'heads' is
682 682 unspecified, uses list of all of the revlog's heads."""
683 683 nonodes = ([], [], [])
684 684 if roots is not None:
685 685 roots = list(roots)
686 686 if not roots:
687 687 return nonodes
688 688 lowestrev = min([self.rev(n) for n in roots])
689 689 else:
690 690 roots = [nullid] # Everybody's a descendent of nullid
691 691 lowestrev = nullrev
692 692 if (lowestrev == nullrev) and (heads is None):
693 693 # We want _all_ the nodes!
694 694 return ([self.node(r) for r in self], [nullid], list(self.heads()))
695 695 if heads is None:
696 696 # All nodes are ancestors, so the latest ancestor is the last
697 697 # node.
698 698 highestrev = len(self) - 1
699 699 # Set ancestors to None to signal that every node is an ancestor.
700 700 ancestors = None
701 701 # Set heads to an empty dictionary for later discovery of heads
702 702 heads = {}
703 703 else:
704 704 heads = list(heads)
705 705 if not heads:
706 706 return nonodes
707 707 ancestors = set()
708 708 # Turn heads into a dictionary so we can remove 'fake' heads.
709 709 # Also, later we will be using it to filter out the heads we can't
710 710 # find from roots.
711 711 heads = dict.fromkeys(heads, 0)
712 712 # Start at the top and keep marking parents until we're done.
713 713 nodestotag = set(heads)
714 714 # Remember where the top was so we can use it as a limit later.
715 715 highestrev = max([self.rev(n) for n in nodestotag])
716 716 while nodestotag:
717 717 # grab a node to tag
718 718 n = nodestotag.pop()
719 719 # Never tag nullid
720 720 if n == nullid:
721 721 continue
722 722 # A node's revision number represents its place in a
723 723 # topologically sorted list of nodes.
724 724 r = self.rev(n)
725 725 if r >= lowestrev:
726 726 if n not in ancestors:
727 727 # If we are possibly a descendent of one of the roots
728 728 # and we haven't already been marked as an ancestor
729 729 ancestors.add(n) # Mark as ancestor
730 730 # Add non-nullid parents to list of nodes to tag.
731 731 nodestotag.update([p for p in self.parents(n) if
732 732 p != nullid])
733 733 elif n in heads: # We've seen it before, is it a fake head?
734 734 # So it is, real heads should not be the ancestors of
735 735 # any other heads.
736 736 heads.pop(n)
737 737 if not ancestors:
738 738 return nonodes
739 739 # Now that we have our set of ancestors, we want to remove any
740 740 # roots that are not ancestors.
741 741
742 742 # If one of the roots was nullid, everything is included anyway.
743 743 if lowestrev > nullrev:
744 744 # But, since we weren't, let's recompute the lowest rev to not
745 745 # include roots that aren't ancestors.
746 746
747 747 # Filter out roots that aren't ancestors of heads
748 748 roots = [n for n in roots if n in ancestors]
749 749 # Recompute the lowest revision
750 750 if roots:
751 751 lowestrev = min([self.rev(n) for n in roots])
752 752 else:
753 753 # No more roots? Return empty list
754 754 return nonodes
755 755 else:
756 756 # We are descending from nullid, and don't need to care about
757 757 # any other roots.
758 758 lowestrev = nullrev
759 759 roots = [nullid]
760 760 # Transform our roots list into a set.
761 761 descendents = set(roots)
762 762 # Also, keep the original roots so we can filter out roots that aren't
763 763 # 'real' roots (i.e. are descended from other roots).
764 764 roots = descendents.copy()
765 765 # Our topologically sorted list of output nodes.
766 766 orderedout = []
767 767 # Don't start at nullid since we don't want nullid in our output list,
768 768 # and if nullid shows up in descedents, empty parents will look like
769 769 # they're descendents.
770 770 for r in xrange(max(lowestrev, 0), highestrev + 1):
771 771 n = self.node(r)
772 772 isdescendent = False
773 773 if lowestrev == nullrev: # Everybody is a descendent of nullid
774 774 isdescendent = True
775 775 elif n in descendents:
776 776 # n is already a descendent
777 777 isdescendent = True
778 778 # This check only needs to be done here because all the roots
779 779 # will start being marked is descendents before the loop.
780 780 if n in roots:
781 781 # If n was a root, check if it's a 'real' root.
782 782 p = tuple(self.parents(n))
783 783 # If any of its parents are descendents, it's not a root.
784 784 if (p[0] in descendents) or (p[1] in descendents):
785 785 roots.remove(n)
786 786 else:
787 787 p = tuple(self.parents(n))
788 788 # A node is a descendent if either of its parents are
789 789 # descendents. (We seeded the dependents list with the roots
790 790 # up there, remember?)
791 791 if (p[0] in descendents) or (p[1] in descendents):
792 792 descendents.add(n)
793 793 isdescendent = True
794 794 if isdescendent and ((ancestors is None) or (n in ancestors)):
795 795 # Only include nodes that are both descendents and ancestors.
796 796 orderedout.append(n)
797 797 if (ancestors is not None) and (n in heads):
798 798 # We're trying to figure out which heads are reachable
799 799 # from roots.
800 800 # Mark this head as having been reached
801 801 heads[n] = 1
802 802 elif ancestors is None:
803 803 # Otherwise, we're trying to discover the heads.
804 804 # Assume this is a head because if it isn't, the next step
805 805 # will eventually remove it.
806 806 heads[n] = 1
807 807 # But, obviously its parents aren't.
808 808 for p in self.parents(n):
809 809 heads.pop(p, None)
810 810 heads = [n for n in heads.iterkeys() if heads[n] != 0]
811 811 roots = list(roots)
812 812 assert orderedout
813 813 assert roots
814 814 assert heads
815 815 return (orderedout, roots, heads)
816 816
817 817 def heads(self, start=None, stop=None):
818 818 """return the list of all nodes that have no children
819 819
820 820 if start is specified, only heads that are descendants of
821 821 start will be returned
822 822 if stop is specified, it will consider all the revs from stop
823 823 as if they had no children
824 824 """
825 825 if start is None and stop is None:
826 826 count = len(self)
827 827 if not count:
828 828 return [nullid]
829 829 ishead = [1] * (count + 1)
830 830 index = self.index
831 831 for r in xrange(count):
832 832 e = index[r]
833 833 ishead[e[5]] = ishead[e[6]] = 0
834 834 return [self.node(r) for r in xrange(count) if ishead[r]]
835 835
836 836 if start is None:
837 837 start = nullid
838 838 if stop is None:
839 839 stop = []
840 840 stoprevs = set([self.rev(n) for n in stop])
841 841 startrev = self.rev(start)
842 842 reachable = set((startrev,))
843 843 heads = set((startrev,))
844 844
845 845 parentrevs = self.parentrevs
846 846 for r in xrange(startrev + 1, len(self)):
847 847 for p in parentrevs(r):
848 848 if p in reachable:
849 849 if r not in stoprevs:
850 850 reachable.add(r)
851 851 heads.add(r)
852 852 if p in heads and p not in stoprevs:
853 853 heads.remove(p)
854 854
855 855 return [self.node(r) for r in heads]
856 856
857 857 def children(self, node):
858 858 """find the children of a given node"""
859 859 c = []
860 860 p = self.rev(node)
861 861 for r in range(p + 1, len(self)):
862 862 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
863 863 if prevs:
864 864 for pr in prevs:
865 865 if pr == p:
866 866 c.append(self.node(r))
867 867 elif p == nullrev:
868 868 c.append(self.node(r))
869 869 return c
870 870
871 871 def descendant(self, start, end):
872 872 for i in self.descendants(start):
873 873 if i == end:
874 874 return True
875 875 elif i > end:
876 876 break
877 877 return False
878 878
879 879 def ancestor(self, a, b):
880 880 """calculate the least common ancestor of nodes a and b"""
881 881
882 882 # fast path, check if it is a descendant
883 883 a, b = self.rev(a), self.rev(b)
884 884 start, end = sorted((a, b))
885 885 if self.descendant(start, end):
886 886 return self.node(start)
887 887
888 888 def parents(rev):
889 889 return [p for p in self.parentrevs(rev) if p != nullrev]
890 890
891 891 c = ancestor.ancestor(a, b, parents)
892 892 if c is None:
893 893 return nullid
894 894
895 895 return self.node(c)
896 896
897 897 def _match(self, id):
898 898 if isinstance(id, (long, int)):
899 899 # rev
900 900 return self.node(id)
901 901 if len(id) == 20:
902 902 # possibly a binary node
903 903 # odds of a binary node being all hex in ASCII are 1 in 10**25
904 904 try:
905 905 node = id
906 906 self.rev(node) # quick search the index
907 907 return node
908 908 except LookupError:
909 909 pass # may be partial hex id
910 910 try:
911 911 # str(rev)
912 912 rev = int(id)
913 913 if str(rev) != id:
914 914 raise ValueError
915 915 if rev < 0:
916 916 rev = len(self) + rev
917 917 if rev < 0 or rev >= len(self):
918 918 raise ValueError
919 919 return self.node(rev)
920 920 except (ValueError, OverflowError):
921 921 pass
922 922 if len(id) == 40:
923 923 try:
924 924 # a full hex nodeid?
925 925 node = bin(id)
926 926 self.rev(node)
927 927 return node
928 928 except (TypeError, LookupError):
929 929 pass
930 930
931 931 def _partialmatch(self, id):
932 932 if len(id) < 40:
933 933 try:
934 934 # hex(node)[:...]
935 935 l = len(id) // 2 # grab an even number of digits
936 936 bin_id = bin(id[:l * 2])
937 937 nl = [n for n in self.nodemap if n[:l] == bin_id]
938 938 nl = [n for n in nl if hex(n).startswith(id)]
939 939 if len(nl) > 0:
940 940 if len(nl) == 1:
941 941 return nl[0]
942 942 raise LookupError(id, self.indexfile,
943 943 _('ambiguous identifier'))
944 944 return None
945 945 except TypeError:
946 946 pass
947 947
948 948 def lookup(self, id):
949 949 """locate a node based on:
950 950 - revision number or str(revision number)
951 951 - nodeid or subset of hex nodeid
952 952 """
953 953 n = self._match(id)
954 954 if n is not None:
955 955 return n
956 956 n = self._partialmatch(id)
957 957 if n:
958 958 return n
959 959
960 960 raise LookupError(id, self.indexfile, _('no match found'))
961 961
962 962 def cmp(self, node, text):
963 963 """compare text with a given file revision
964 964
965 965 returns True if text is different than what is stored.
966 966 """
967 967 p1, p2 = self.parents(node)
968 968 return hash(text, p1, p2) != node
969 969
970 970 def _addchunk(self, offset, data):
971 971 o, d = self._chunkcache
972 972 # try to add to existing cache
973 973 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
974 974 self._chunkcache = o, d + data
975 975 else:
976 976 self._chunkcache = offset, data
977 977
978 978 def _loadchunk(self, offset, length):
979 979 if self._inline:
980 980 df = self.opener(self.indexfile)
981 981 else:
982 982 df = self.opener(self.datafile)
983 983
984 984 readahead = max(65536, length)
985 985 df.seek(offset)
986 986 d = df.read(readahead)
987 987 self._addchunk(offset, d)
988 988 if readahead > length:
989 989 return d[:length]
990 990 return d
991 991
992 992 def _getchunk(self, offset, length):
993 993 o, d = self._chunkcache
994 994 l = len(d)
995 995
996 996 # is it in the cache?
997 997 cachestart = offset - o
998 998 cacheend = cachestart + length
999 999 if cachestart >= 0 and cacheend <= l:
1000 1000 if cachestart == 0 and cacheend == l:
1001 1001 return d # avoid a copy
1002 1002 return d[cachestart:cacheend]
1003 1003
1004 1004 return self._loadchunk(offset, length)
1005 1005
1006 1006 def _chunkraw(self, startrev, endrev):
1007 1007 start = self.start(startrev)
1008 1008 length = self.end(endrev) - start
1009 1009 if self._inline:
1010 1010 start += (startrev + 1) * self._io.size
1011 1011 return self._getchunk(start, length)
1012 1012
1013 1013 def _chunk(self, rev):
1014 1014 return decompress(self._chunkraw(rev, rev))
1015 1015
1016 1016 def _chunkclear(self):
1017 1017 self._chunkcache = (0, '')
1018 1018
1019 1019 def deltaparent(self, rev):
1020 1020 """return previous revision or parentrev according to flags"""
1021 1021 if self.flags(rev) & REVIDX_PARENTDELTA:
1022 1022 return self.parentrevs(rev)[0]
1023 1023 else:
1024 1024 return rev - 1
1025 1025
1026 1026 def revdiff(self, rev1, rev2):
1027 1027 """return or calculate a delta between two revisions"""
1028 1028 if self.base(rev2) != rev2 and self.deltaparent(rev2) == rev1:
1029 1029 return self._chunk(rev2)
1030 1030
1031 1031 return mdiff.textdiff(self.revision(self.node(rev1)),
1032 1032 self.revision(self.node(rev2)))
1033 1033
1034 1034 def revision(self, node):
1035 1035 """return an uncompressed revision of a given node"""
1036 1036 cachedrev = None
1037 1037 if node == nullid:
1038 1038 return ""
1039 1039 if self._cache:
1040 1040 if self._cache[0] == node:
1041 1041 return self._cache[2]
1042 1042 cachedrev = self._cache[1]
1043 1043
1044 1044 # look up what we need to read
1045 1045 text = None
1046 1046 rev = self.rev(node)
1047 1047 base = self.base(rev)
1048 1048
1049 1049 # check rev flags
1050 1050 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1051 1051 raise RevlogError(_('incompatible revision flag %x') %
1052 1052 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1053 1053
1054 1054 # build delta chain
1055 1055 self._loadindex(base, rev + 1)
1056 1056 chain = []
1057 1057 index = self.index # for performance
1058 1058 iterrev = rev
1059 1059 e = index[iterrev]
1060 1060 while iterrev != base and iterrev != cachedrev:
1061 1061 chain.append(iterrev)
1062 1062 if e[0] & REVIDX_PARENTDELTA:
1063 1063 iterrev = e[5]
1064 1064 else:
1065 1065 iterrev -= 1
1066 1066 e = index[iterrev]
1067 1067 chain.reverse()
1068 1068 base = iterrev
1069 1069
1070 1070 if iterrev == cachedrev:
1071 1071 # cache hit
1072 1072 text = self._cache[2]
1073 1073
1074 1074 # drop cache to save memory
1075 1075 self._cache = None
1076 1076
1077 1077 self._chunkraw(base, rev)
1078 1078 if text is None:
1079 1079 text = self._chunk(base)
1080 1080
1081 1081 bins = [self._chunk(r) for r in chain]
1082 1082 text = mdiff.patches(text, bins)
1083 1083 p1, p2 = self.parents(node)
1084 1084 if (node != hash(text, p1, p2) and
1085 1085 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
1086 1086 raise RevlogError(_("integrity check failed on %s:%d")
1087 1087 % (self.indexfile, rev))
1088 1088
1089 1089 self._cache = (node, rev, text)
1090 1090 return text
1091 1091
1092 1092 def checkinlinesize(self, tr, fp=None):
1093 1093 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1094 1094 return
1095 1095
1096 1096 trinfo = tr.find(self.indexfile)
1097 1097 if trinfo is None:
1098 1098 raise RevlogError(_("%s not found in the transaction")
1099 1099 % self.indexfile)
1100 1100
1101 1101 trindex = trinfo[2]
1102 1102 dataoff = self.start(trindex)
1103 1103
1104 1104 tr.add(self.datafile, dataoff)
1105 1105
1106 1106 if fp:
1107 1107 fp.flush()
1108 1108 fp.close()
1109 1109
1110 1110 df = self.opener(self.datafile, 'w')
1111 1111 try:
1112 1112 for r in self:
1113 1113 df.write(self._chunkraw(r, r))
1114 1114 finally:
1115 1115 df.close()
1116 1116
1117 1117 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1118 1118 self.version &= ~(REVLOGNGINLINEDATA)
1119 1119 self._inline = False
1120 1120 for i in self:
1121 1121 e = self._io.packentry(self.index[i], self.node, self.version, i)
1122 1122 fp.write(e)
1123 1123
1124 1124 # if we don't call rename, the temp file will never replace the
1125 1125 # real index
1126 1126 fp.rename()
1127 1127
1128 1128 tr.replace(self.indexfile, trindex * self._io.size)
1129 1129 self._chunkclear()
1130 1130
1131 1131 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
1132 1132 """add a revision to the log
1133 1133
1134 1134 text - the revision data to add
1135 1135 transaction - the transaction object used for rollback
1136 1136 link - the linkrev data to add
1137 1137 p1, p2 - the parent nodeids of the revision
1138 1138 cachedelta - an optional precomputed delta
1139 1139 """
1140 1140 node = hash(text, p1, p2)
1141 1141 if (node in self.nodemap and
1142 1142 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1143 1143 return node
1144 1144
1145 1145 dfh = None
1146 1146 if not self._inline:
1147 1147 dfh = self.opener(self.datafile, "a")
1148 1148 ifh = self.opener(self.indexfile, "a+")
1149 1149 try:
1150 1150 return self._addrevision(node, text, transaction, link, p1, p2,
1151 1151 cachedelta, ifh, dfh)
1152 1152 finally:
1153 1153 if dfh:
1154 1154 dfh.close()
1155 1155 ifh.close()
1156 1156
1157 1157 def _addrevision(self, node, text, transaction, link, p1, p2,
1158 1158 cachedelta, ifh, dfh):
1159 1159 curr = len(self)
1160 1160 prev = curr - 1
1161 1161 base = curr
1162 1162 offset = self.end(prev)
1163 1163 flags = 0
1164 1164 d = None
1165 1165
1166 1166 if self._parentdelta:
1167 1167 deltarev, deltanode = self.rev(p1), p1
1168 1168 flags = REVIDX_PARENTDELTA
1169 1169 else:
1170 1170 deltarev, deltanode = prev, self.node(prev)
1171 1171
1172 1172 # should we try to build a delta?
1173 1173 if deltarev != nullrev:
1174 1174 # can we use the cached delta?
1175 1175 if cachedelta:
1176 1176 cacherev, d = cachedelta
1177 1177 if cacherev != deltarev:
1178 1178 d = None
1179 1179 if d is None:
1180 1180 ptext = self.revision(deltanode)
1181 1181 d = mdiff.textdiff(ptext, text)
1182 1182 data = compress(d)
1183 1183 l = len(data[1]) + len(data[0])
1184 1184 base = self.base(deltarev)
1185 1185 dist = l + offset - self.start(base)
1186 1186
1187 1187 # full versions are inserted when the needed deltas
1188 1188 # become comparable to the uncompressed text
1189 1189 # or the base revision is punched
1190 1190 if (d is None or dist > len(text) * 2 or
1191 1191 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1192 1192 data = compress(text)
1193 1193 l = len(data[1]) + len(data[0])
1194 1194 base = curr
1195 1195
1196 1196 e = (offset_type(offset, flags), l, len(text),
1197 1197 base, link, self.rev(p1), self.rev(p2), node)
1198 1198 self.index.insert(-1, e)
1199 1199 self.nodemap[node] = curr
1200 1200
1201 1201 entry = self._io.packentry(e, self.node, self.version, curr)
1202 1202 if not self._inline:
1203 1203 transaction.add(self.datafile, offset)
1204 1204 transaction.add(self.indexfile, curr * len(entry))
1205 1205 if data[0]:
1206 1206 dfh.write(data[0])
1207 1207 dfh.write(data[1])
1208 1208 dfh.flush()
1209 1209 ifh.write(entry)
1210 1210 else:
1211 1211 offset += curr * self._io.size
1212 1212 transaction.add(self.indexfile, offset, curr)
1213 1213 ifh.write(entry)
1214 1214 ifh.write(data[0])
1215 1215 ifh.write(data[1])
1216 1216 self.checkinlinesize(transaction, ifh)
1217 1217
1218 1218 if type(text) == str: # only accept immutable objects
1219 1219 self._cache = (node, curr, text)
1220 1220 return node
1221 1221
1222 1222 def group(self, nodelist, lookup, infocollect=None, fullrev=False):
1223 1223 """Calculate a delta group, yielding a sequence of changegroup chunks
1224 1224 (strings).
1225 1225
1226 1226 Given a list of changeset revs, return a set of deltas and
1227 1227 metadata corresponding to nodes. The first delta is
1228 1228 first parent(nodelist[0]) -> nodelist[0], the receiver is
1229 1229 guaranteed to have this parent as it has all history before
1230 1230 these changesets. In the case firstparent is nullrev the
1231 1231 changegroup starts with a full revision.
1232 1232 fullrev forces the insertion of the full revision, necessary
1233 1233 in the case of shallow clones where the first parent might
1234 1234 not exist at the reciever.
1235 1235 """
1236 1236
1237 1237 revs = [self.rev(n) for n in nodelist]
1238 1238
1239 1239 # if we don't have any revisions touched by these changesets, bail
1240 1240 if not revs:
1241 1241 yield changegroup.closechunk()
1242 1242 return
1243 1243
1244 1244 # add the parent of the first rev
1245 1245 p = self.parentrevs(revs[0])[0]
1246 1246 revs.insert(0, p)
1247 1247 if p == nullrev:
1248 1248 fullrev = True
1249 1249
1250 1250 # build deltas
1251 1251 for d in xrange(len(revs) - 1):
1252 1252 a, b = revs[d], revs[d + 1]
1253 1253 nb = self.node(b)
1254 1254
1255 1255 if infocollect is not None:
1256 1256 infocollect(nb)
1257 1257
1258 1258 p = self.parents(nb)
1259 1259 meta = nb + p[0] + p[1] + lookup(nb)
1260 1260 if fullrev:
1261 1261 d = self.revision(nb)
1262 1262 meta += mdiff.trivialdiffheader(len(d))
1263 1263 fullrev = False
1264 1264 else:
1265 1265 d = self.revdiff(a, b)
1266 1266 yield changegroup.chunkheader(len(meta) + len(d))
1267 1267 yield meta
1268 1268 yield d
1269 1269
1270 1270 yield changegroup.closechunk()
1271 1271
1272 1272 def addgroup(self, bundle, linkmapper, transaction):
1273 1273 """
1274 1274 add a delta group
1275 1275
1276 1276 given a set of deltas, add them to the revision log. the
1277 1277 first delta is against its parent, which should be in our
1278 1278 log, the rest are against the previous delta.
1279 1279 """
1280 1280
1281 1281 #track the base of the current delta log
1282 1282 r = len(self)
1283 1283 t = r - 1
1284 1284 node = None
1285 1285
1286 1286 base = prev = nullrev
1287 1287 start = end = textlen = 0
1288 1288 if r:
1289 1289 end = self.end(t)
1290 1290
1291 1291 ifh = self.opener(self.indexfile, "a+")
1292 1292 isize = r * self._io.size
1293 1293 if self._inline:
1294 1294 transaction.add(self.indexfile, end + isize, r)
1295 1295 dfh = None
1296 1296 else:
1297 1297 transaction.add(self.indexfile, isize, r)
1298 1298 transaction.add(self.datafile, end)
1299 1299 dfh = self.opener(self.datafile, "a")
1300 1300
1301 1301 try:
1302 1302 # loop through our set of deltas
1303 1303 chain = None
1304 1304 while 1:
1305 chunk = bundle.chunk()
1306 if not chunk:
1305 chunkdata = bundle.parsechunk()
1306 if not chunkdata:
1307 1307 break
1308 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1308 node = chunkdata['node']
1309 p1 = chunkdata['p1']
1310 p2 = chunkdata['p2']
1311 cs = chunkdata['cs']
1312 delta = chunkdata['data']
1313
1309 1314 link = linkmapper(cs)
1310 1315 if (node in self.nodemap and
1311 1316 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1312 1317 # this can happen if two branches make the same change
1313 1318 chain = node
1314 1319 continue
1315 delta = buffer(chunk, 80)
1316 del chunk
1317 1320
1318 1321 for p in (p1, p2):
1319 1322 if not p in self.nodemap:
1320 1323 if self._shallow:
1321 1324 # add null entries for missing parents
1322 1325 if base == nullrev:
1323 1326 base = len(self)
1324 1327 e = (offset_type(end, REVIDX_PUNCHED_FLAG),
1325 1328 0, 0, base, nullrev, nullrev, nullrev, p)
1326 1329 self.index.insert(-1, e)
1327 1330 self.nodemap[p] = r
1328 1331 entry = self._io.packentry(e, self.node,
1329 1332 self.version, r)
1330 1333 ifh.write(entry)
1331 1334 t, r = r, r + 1
1332 1335 else:
1333 1336 raise LookupError(p, self.indexfile,
1334 1337 _('unknown parent'))
1335 1338
1336 1339 if not chain:
1337 1340 # retrieve the parent revision of the delta chain
1338 1341 chain = p1
1339 1342 if not chain in self.nodemap:
1340 1343 raise LookupError(chain, self.indexfile, _('unknown base'))
1341 1344
1342 1345 # full versions are inserted when the needed deltas become
1343 1346 # comparable to the uncompressed text or when the previous
1344 1347 # version is not the one we have a delta against. We use
1345 1348 # the size of the previous full rev as a proxy for the
1346 1349 # current size.
1347 1350
1348 1351 if chain == prev:
1349 1352 cdelta = compress(delta)
1350 1353 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1351 1354 textlen = mdiff.patchedsize(textlen, delta)
1352 1355
1353 1356 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1354 1357 # flush our writes here so we can read it in revision
1355 1358 if dfh:
1356 1359 dfh.flush()
1357 1360 ifh.flush()
1358 1361 text = self.revision(chain)
1359 1362 text = mdiff.patch(text, delta)
1360 1363 del delta
1361 1364 chk = self._addrevision(node, text, transaction, link,
1362 1365 p1, p2, None, ifh, dfh)
1363 1366 if not dfh and not self._inline:
1364 1367 # addrevision switched from inline to conventional
1365 1368 # reopen the index
1366 1369 dfh = self.opener(self.datafile, "a")
1367 1370 ifh = self.opener(self.indexfile, "a")
1368 1371 if chk != node:
1369 1372 raise RevlogError(_("consistency error adding group"))
1370 1373 textlen = len(text)
1371 1374 else:
1372 1375 e = (offset_type(end, 0), cdeltalen, textlen, base,
1373 1376 link, self.rev(p1), self.rev(p2), node)
1374 1377 self.index.insert(-1, e)
1375 1378 self.nodemap[node] = r
1376 1379 entry = self._io.packentry(e, self.node, self.version, r)
1377 1380 if self._inline:
1378 1381 ifh.write(entry)
1379 1382 ifh.write(cdelta[0])
1380 1383 ifh.write(cdelta[1])
1381 1384 self.checkinlinesize(transaction, ifh)
1382 1385 if not self._inline:
1383 1386 dfh = self.opener(self.datafile, "a")
1384 1387 ifh = self.opener(self.indexfile, "a")
1385 1388 else:
1386 1389 dfh.write(cdelta[0])
1387 1390 dfh.write(cdelta[1])
1388 1391 ifh.write(entry)
1389 1392
1390 1393 t, r, chain, prev = r, r + 1, node, node
1391 1394 base = self.base(t)
1392 1395 start = self.start(base)
1393 1396 end = self.end(t)
1394 1397 finally:
1395 1398 if dfh:
1396 1399 dfh.close()
1397 1400 ifh.close()
1398 1401
1399 1402 return node
1400 1403
1401 1404 def strip(self, minlink, transaction):
1402 1405 """truncate the revlog on the first revision with a linkrev >= minlink
1403 1406
1404 1407 This function is called when we're stripping revision minlink and
1405 1408 its descendants from the repository.
1406 1409
1407 1410 We have to remove all revisions with linkrev >= minlink, because
1408 1411 the equivalent changelog revisions will be renumbered after the
1409 1412 strip.
1410 1413
1411 1414 So we truncate the revlog on the first of these revisions, and
1412 1415 trust that the caller has saved the revisions that shouldn't be
1413 1416 removed and that it'll readd them after this truncation.
1414 1417 """
1415 1418 if len(self) == 0:
1416 1419 return
1417 1420
1418 1421 if isinstance(self.index, lazyindex):
1419 1422 self._loadindexmap()
1420 1423
1421 1424 for rev in self:
1422 1425 if self.index[rev][4] >= minlink:
1423 1426 break
1424 1427 else:
1425 1428 return
1426 1429
1427 1430 # first truncate the files on disk
1428 1431 end = self.start(rev)
1429 1432 if not self._inline:
1430 1433 transaction.add(self.datafile, end)
1431 1434 end = rev * self._io.size
1432 1435 else:
1433 1436 end += rev * self._io.size
1434 1437
1435 1438 transaction.add(self.indexfile, end)
1436 1439
1437 1440 # then reset internal state in memory to forget those revisions
1438 1441 self._cache = None
1439 1442 self._chunkclear()
1440 1443 for x in xrange(rev, len(self)):
1441 1444 del self.nodemap[self.node(x)]
1442 1445
1443 1446 del self.index[rev:-1]
1444 1447
1445 1448 def checksize(self):
1446 1449 expected = 0
1447 1450 if len(self):
1448 1451 expected = max(0, self.end(len(self) - 1))
1449 1452
1450 1453 try:
1451 1454 f = self.opener(self.datafile)
1452 1455 f.seek(0, 2)
1453 1456 actual = f.tell()
1454 1457 dd = actual - expected
1455 1458 except IOError, inst:
1456 1459 if inst.errno != errno.ENOENT:
1457 1460 raise
1458 1461 dd = 0
1459 1462
1460 1463 try:
1461 1464 f = self.opener(self.indexfile)
1462 1465 f.seek(0, 2)
1463 1466 actual = f.tell()
1464 1467 s = self._io.size
1465 1468 i = max(0, actual // s)
1466 1469 di = actual - (i * s)
1467 1470 if self._inline:
1468 1471 databytes = 0
1469 1472 for r in self:
1470 1473 databytes += max(0, self.length(r))
1471 1474 dd = 0
1472 1475 di = actual - len(self) * s - databytes
1473 1476 except IOError, inst:
1474 1477 if inst.errno != errno.ENOENT:
1475 1478 raise
1476 1479 di = 0
1477 1480
1478 1481 return (dd, di)
1479 1482
1480 1483 def files(self):
1481 1484 res = [self.indexfile]
1482 1485 if not self._inline:
1483 1486 res.append(self.datafile)
1484 1487 return res
General Comments 0
You need to be logged in to leave comments. Login now