##// END OF EJS Templates
revlog: addgroup checks if incoming deltas add censored revs, sets flag bit...
Mike Edgar -
r24255:4bfe9f2d default
parent child Browse files
Show More
@@ -1,109 +1,129 b''
1 1 # filelog.py - file history class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 import error, revlog
9 import re
8 import error, mdiff, revlog
9 import re, struct
10 10
11 11 _mdre = re.compile('\1\n')
12 12 def parsemeta(text):
13 13 """return (metadatadict, keylist, metadatasize)"""
14 14 # text can be buffer, so we can't use .startswith or .index
15 15 if text[:2] != '\1\n':
16 16 return None, None
17 17 s = _mdre.search(text, 2).start()
18 18 mtext = text[2:s]
19 19 meta = {}
20 20 for l in mtext.splitlines():
21 21 k, v = l.split(": ", 1)
22 22 meta[k] = v
23 23 return meta, (s + 2)
24 24
25 25 def packmeta(meta, text):
26 26 keys = sorted(meta.iterkeys())
27 27 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
28 28 return "\1\n%s\1\n%s" % (metatext, text)
29 29
30 30 def _censoredtext(text):
31 31 m, offs = parsemeta(text)
32 32 return m and "censored" in m
33 33
34 34 class filelog(revlog.revlog):
35 35 def __init__(self, opener, path):
36 36 super(filelog, self).__init__(opener,
37 37 "/".join(("data", path + ".i")))
38 38
39 39 def read(self, node):
40 40 t = self.revision(node)
41 41 if not t.startswith('\1\n'):
42 42 return t
43 43 s = t.index('\1\n', 2)
44 44 return t[s + 2:]
45 45
46 46 def add(self, text, meta, transaction, link, p1=None, p2=None):
47 47 if meta or text.startswith('\1\n'):
48 48 text = packmeta(meta, text)
49 49 return self.addrevision(text, transaction, link, p1, p2)
50 50
51 51 def renamed(self, node):
52 52 if self.parents(node)[0] != revlog.nullid:
53 53 return False
54 54 t = self.revision(node)
55 55 m = parsemeta(t)[0]
56 56 if m and "copy" in m:
57 57 return (m["copy"], revlog.bin(m["copyrev"]))
58 58 return False
59 59
60 60 def size(self, rev):
61 61 """return the size of a given revision"""
62 62
63 63 # for revisions with renames, we have to go the slow way
64 64 node = self.node(rev)
65 65 if self.renamed(node):
66 66 return len(self.read(node))
67 67 if self.iscensored(rev):
68 68 return 0
69 69
70 70 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
71 71 return super(filelog, self).size(rev)
72 72
73 73 def cmp(self, node, text):
74 74 """compare text with a given file revision
75 75
76 76 returns True if text is different than what is stored.
77 77 """
78 78
79 79 t = text
80 80 if text.startswith('\1\n'):
81 81 t = '\1\n\1\n' + text
82 82
83 83 samehashes = not super(filelog, self).cmp(node, t)
84 84 if samehashes:
85 85 return False
86 86
87 87 # censored files compare against the empty file
88 88 if self.iscensored(self.rev(node)):
89 89 return text != ''
90 90
91 91 # renaming a file produces a different hash, even if the data
92 92 # remains unchanged. Check if it's the case (slow):
93 93 if self.renamed(node):
94 94 t2 = self.read(node)
95 95 return t2 != text
96 96
97 97 return True
98 98
99 99 def checkhash(self, text, p1, p2, node, rev=None):
100 100 try:
101 101 super(filelog, self).checkhash(text, p1, p2, node, rev=rev)
102 102 except error.RevlogError:
103 103 if _censoredtext(text):
104 104 raise error.CensoredNodeError(self.indexfile, node, text)
105 105 raise
106 106
107 107 def iscensored(self, rev):
108 108 """Check if a file revision is censored."""
109 109 return self.flags(rev) & revlog.REVIDX_ISCENSORED
110
111 def _peek_iscensored(self, baserev, delta, flush):
112 """Quickly check if a delta produces a censored revision."""
113 # Fragile heuristic: unless new file meta keys are added alphabetically
114 # preceding "censored", all censored revisions are prefixed by
115 # "\1\ncensored:". A delta producing such a censored revision must be a
116 # full-replacement delta, so we inspect the first and only patch in the
117 # delta for this prefix.
118 hlen = struct.calcsize(">lll")
119 if len(delta) <= hlen:
120 return False
121
122 oldlen = self.rawsize(baserev)
123 newlen = len(delta) - hlen
124 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
125 return False
126
127 add = "\1\ncensored:"
128 addlen = len(add)
129 return newlen >= addlen and delta[hlen:hlen + addlen] == add
@@ -1,1574 +1,1585 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, templatefilters
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
38 38 REVIDX_DEFAULT_FLAGS = 0
39 39 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
40 40
41 41 # max size of revlog with inline data
42 42 _maxinline = 131072
43 43 _chunksize = 1048576
44 44
45 45 RevlogError = error.RevlogError
46 46 LookupError = error.LookupError
47 47 CensoredNodeError = error.CensoredNodeError
48 48
49 49 def getoffset(q):
50 50 return int(q >> 16)
51 51
52 52 def gettype(q):
53 53 return int(q & 0xFFFF)
54 54
55 55 def offset_type(offset, type):
56 56 return long(long(offset) << 16 | type)
57 57
58 58 _nullhash = _sha(nullid)
59 59
60 60 def hash(text, p1, p2):
61 61 """generate a hash from the given text and its parent hashes
62 62
63 63 This hash combines both the current file contents and its history
64 64 in a manner that makes it easy to distinguish nodes with the same
65 65 content in the revision graph.
66 66 """
67 67 # As of now, if one of the parent node is null, p2 is null
68 68 if p2 == nullid:
69 69 # deep copy of a hash is faster than creating one
70 70 s = _nullhash.copy()
71 71 s.update(p1)
72 72 else:
73 73 # none of the parent nodes are nullid
74 74 l = [p1, p2]
75 75 l.sort()
76 76 s = _sha(l[0])
77 77 s.update(l[1])
78 78 s.update(text)
79 79 return s.digest()
80 80
81 81 def decompress(bin):
82 82 """ decompress the given input """
83 83 if not bin:
84 84 return bin
85 85 t = bin[0]
86 86 if t == '\0':
87 87 return bin
88 88 if t == 'x':
89 89 try:
90 90 return _decompress(bin)
91 91 except zlib.error, e:
92 92 raise RevlogError(_("revlog decompress error: %s") % str(e))
93 93 if t == 'u':
94 94 return bin[1:]
95 95 raise RevlogError(_("unknown compression type %r") % t)
96 96
97 97 # index v0:
98 98 # 4 bytes: offset
99 99 # 4 bytes: compressed length
100 100 # 4 bytes: base rev
101 101 # 4 bytes: link rev
102 102 # 32 bytes: parent 1 nodeid
103 103 # 32 bytes: parent 2 nodeid
104 104 # 32 bytes: nodeid
105 105 indexformatv0 = ">4l20s20s20s"
106 106 v0shaoffset = 56
107 107
108 108 class revlogoldio(object):
109 109 def __init__(self):
110 110 self.size = struct.calcsize(indexformatv0)
111 111
112 112 def parseindex(self, data, inline):
113 113 s = self.size
114 114 index = []
115 115 nodemap = {nullid: nullrev}
116 116 n = off = 0
117 117 l = len(data)
118 118 while off + s <= l:
119 119 cur = data[off:off + s]
120 120 off += s
121 121 e = _unpack(indexformatv0, cur)
122 122 # transform to revlogv1 format
123 123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
124 124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
125 125 index.append(e2)
126 126 nodemap[e[6]] = n
127 127 n += 1
128 128
129 129 # add the magic null revision at -1
130 130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
131 131
132 132 return index, nodemap, None
133 133
134 134 def packentry(self, entry, node, version, rev):
135 135 if gettype(entry[0]):
136 136 raise RevlogError(_("index entry flags need RevlogNG"))
137 137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
138 138 node(entry[5]), node(entry[6]), entry[7])
139 139 return _pack(indexformatv0, *e2)
140 140
141 141 # index ng:
142 142 # 6 bytes: offset
143 143 # 2 bytes: flags
144 144 # 4 bytes: compressed length
145 145 # 4 bytes: uncompressed length
146 146 # 4 bytes: base rev
147 147 # 4 bytes: link rev
148 148 # 4 bytes: parent 1 rev
149 149 # 4 bytes: parent 2 rev
150 150 # 32 bytes: nodeid
151 151 indexformatng = ">Qiiiiii20s12x"
152 152 ngshaoffset = 32
153 153 versionformat = ">I"
154 154
155 155 class revlogio(object):
156 156 def __init__(self):
157 157 self.size = struct.calcsize(indexformatng)
158 158
159 159 def parseindex(self, data, inline):
160 160 # call the C implementation to parse the index data
161 161 index, cache = parsers.parse_index2(data, inline)
162 162 return index, getattr(index, 'nodemap', None), cache
163 163
164 164 def packentry(self, entry, node, version, rev):
165 165 p = _pack(indexformatng, *entry)
166 166 if rev == 0:
167 167 p = _pack(versionformat, version) + p[4:]
168 168 return p
169 169
170 170 class revlog(object):
171 171 """
172 172 the underlying revision storage object
173 173
174 174 A revlog consists of two parts, an index and the revision data.
175 175
176 176 The index is a file with a fixed record size containing
177 177 information on each revision, including its nodeid (hash), the
178 178 nodeids of its parents, the position and offset of its data within
179 179 the data file, and the revision it's based on. Finally, each entry
180 180 contains a linkrev entry that can serve as a pointer to external
181 181 data.
182 182
183 183 The revision data itself is a linear collection of data chunks.
184 184 Each chunk represents a revision and is usually represented as a
185 185 delta against the previous chunk. To bound lookup time, runs of
186 186 deltas are limited to about 2 times the length of the original
187 187 version data. This makes retrieval of a version proportional to
188 188 its size, or O(1) relative to the number of revisions.
189 189
190 190 Both pieces of the revlog are written to in an append-only
191 191 fashion, which means we never need to rewrite a file to insert or
192 192 remove data, and can use some simple techniques to avoid the need
193 193 for locking while reading.
194 194 """
195 195 def __init__(self, opener, indexfile):
196 196 """
197 197 create a revlog object
198 198
199 199 opener is a function that abstracts the file opening operation
200 200 and can be used to implement COW semantics or the like.
201 201 """
202 202 self.indexfile = indexfile
203 203 self.datafile = indexfile[:-2] + ".d"
204 204 self.opener = opener
205 205 self._cache = None
206 206 self._basecache = None
207 207 self._chunkcache = (0, '')
208 208 self._chunkcachesize = 65536
209 209 self._maxchainlen = None
210 210 self.index = []
211 211 self._pcache = {}
212 212 self._nodecache = {nullid: nullrev}
213 213 self._nodepos = None
214 214
215 215 v = REVLOG_DEFAULT_VERSION
216 216 opts = getattr(opener, 'options', None)
217 217 if opts is not None:
218 218 if 'revlogv1' in opts:
219 219 if 'generaldelta' in opts:
220 220 v |= REVLOGGENERALDELTA
221 221 else:
222 222 v = 0
223 223 if 'chunkcachesize' in opts:
224 224 self._chunkcachesize = opts['chunkcachesize']
225 225 if 'maxchainlen' in opts:
226 226 self._maxchainlen = opts['maxchainlen']
227 227
228 228 if self._chunkcachesize <= 0:
229 229 raise RevlogError(_('revlog chunk cache size %r is not greater '
230 230 'than 0') % self._chunkcachesize)
231 231 elif self._chunkcachesize & (self._chunkcachesize - 1):
232 232 raise RevlogError(_('revlog chunk cache size %r is not a power '
233 233 'of 2') % self._chunkcachesize)
234 234
235 235 i = ''
236 236 self._initempty = True
237 237 try:
238 238 f = self.opener(self.indexfile)
239 239 i = f.read()
240 240 f.close()
241 241 if len(i) > 0:
242 242 v = struct.unpack(versionformat, i[:4])[0]
243 243 self._initempty = False
244 244 except IOError, inst:
245 245 if inst.errno != errno.ENOENT:
246 246 raise
247 247
248 248 self.version = v
249 249 self._inline = v & REVLOGNGINLINEDATA
250 250 self._generaldelta = v & REVLOGGENERALDELTA
251 251 flags = v & ~0xFFFF
252 252 fmt = v & 0xFFFF
253 253 if fmt == REVLOGV0 and flags:
254 254 raise RevlogError(_("index %s unknown flags %#04x for format v0")
255 255 % (self.indexfile, flags >> 16))
256 256 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
257 257 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
258 258 % (self.indexfile, flags >> 16))
259 259 elif fmt > REVLOGNG:
260 260 raise RevlogError(_("index %s unknown format %d")
261 261 % (self.indexfile, fmt))
262 262
263 263 self._io = revlogio()
264 264 if self.version == REVLOGV0:
265 265 self._io = revlogoldio()
266 266 try:
267 267 d = self._io.parseindex(i, self._inline)
268 268 except (ValueError, IndexError):
269 269 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
270 270 self.index, nodemap, self._chunkcache = d
271 271 if nodemap is not None:
272 272 self.nodemap = self._nodecache = nodemap
273 273 if not self._chunkcache:
274 274 self._chunkclear()
275 275 # revnum -> (chain-length, sum-delta-length)
276 276 self._chaininfocache = {}
277 277
278 278 def tip(self):
279 279 return self.node(len(self.index) - 2)
280 280 def __contains__(self, rev):
281 281 return 0 <= rev < len(self)
282 282 def __len__(self):
283 283 return len(self.index) - 1
284 284 def __iter__(self):
285 285 return iter(xrange(len(self)))
286 286 def revs(self, start=0, stop=None):
287 287 """iterate over all rev in this revlog (from start to stop)"""
288 288 step = 1
289 289 if stop is not None:
290 290 if start > stop:
291 291 step = -1
292 292 stop += step
293 293 else:
294 294 stop = len(self)
295 295 return xrange(start, stop, step)
296 296
297 297 @util.propertycache
298 298 def nodemap(self):
299 299 self.rev(self.node(0))
300 300 return self._nodecache
301 301
302 302 def hasnode(self, node):
303 303 try:
304 304 self.rev(node)
305 305 return True
306 306 except KeyError:
307 307 return False
308 308
309 309 def clearcaches(self):
310 310 try:
311 311 self._nodecache.clearcaches()
312 312 except AttributeError:
313 313 self._nodecache = {nullid: nullrev}
314 314 self._nodepos = None
315 315
316 316 def rev(self, node):
317 317 try:
318 318 return self._nodecache[node]
319 319 except TypeError:
320 320 raise
321 321 except RevlogError:
322 322 # parsers.c radix tree lookup failed
323 323 raise LookupError(node, self.indexfile, _('no node'))
324 324 except KeyError:
325 325 # pure python cache lookup failed
326 326 n = self._nodecache
327 327 i = self.index
328 328 p = self._nodepos
329 329 if p is None:
330 330 p = len(i) - 2
331 331 for r in xrange(p, -1, -1):
332 332 v = i[r][7]
333 333 n[v] = r
334 334 if v == node:
335 335 self._nodepos = r - 1
336 336 return r
337 337 raise LookupError(node, self.indexfile, _('no node'))
338 338
339 339 def node(self, rev):
340 340 return self.index[rev][7]
341 341 def linkrev(self, rev):
342 342 return self.index[rev][4]
343 343 def parents(self, node):
344 344 i = self.index
345 345 d = i[self.rev(node)]
346 346 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
347 347 def parentrevs(self, rev):
348 348 return self.index[rev][5:7]
349 349 def start(self, rev):
350 350 return int(self.index[rev][0] >> 16)
351 351 def end(self, rev):
352 352 return self.start(rev) + self.length(rev)
353 353 def length(self, rev):
354 354 return self.index[rev][1]
355 355 def chainbase(self, rev):
356 356 index = self.index
357 357 base = index[rev][3]
358 358 while base != rev:
359 359 rev = base
360 360 base = index[rev][3]
361 361 return base
362 362 def chainlen(self, rev):
363 363 return self._chaininfo(rev)[0]
364 364
365 365 def _chaininfo(self, rev):
366 366 chaininfocache = self._chaininfocache
367 367 if rev in chaininfocache:
368 368 return chaininfocache[rev]
369 369 index = self.index
370 370 generaldelta = self._generaldelta
371 371 iterrev = rev
372 372 e = index[iterrev]
373 373 clen = 0
374 374 compresseddeltalen = 0
375 375 while iterrev != e[3]:
376 376 clen += 1
377 377 compresseddeltalen += e[1]
378 378 if generaldelta:
379 379 iterrev = e[3]
380 380 else:
381 381 iterrev -= 1
382 382 if iterrev in chaininfocache:
383 383 t = chaininfocache[iterrev]
384 384 clen += t[0]
385 385 compresseddeltalen += t[1]
386 386 break
387 387 e = index[iterrev]
388 388 else:
389 389 # Add text length of base since decompressing that also takes
390 390 # work. For cache hits the length is already included.
391 391 compresseddeltalen += e[1]
392 392 r = (clen, compresseddeltalen)
393 393 chaininfocache[rev] = r
394 394 return r
395 395
396 396 def flags(self, rev):
397 397 return self.index[rev][0] & 0xFFFF
398 398 def rawsize(self, rev):
399 399 """return the length of the uncompressed text for a given revision"""
400 400 l = self.index[rev][2]
401 401 if l >= 0:
402 402 return l
403 403
404 404 t = self.revision(self.node(rev))
405 405 return len(t)
406 406 size = rawsize
407 407
408 408 def ancestors(self, revs, stoprev=0, inclusive=False):
409 409 """Generate the ancestors of 'revs' in reverse topological order.
410 410 Does not generate revs lower than stoprev.
411 411
412 412 See the documentation for ancestor.lazyancestors for more details."""
413 413
414 414 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
415 415 inclusive=inclusive)
416 416
417 417 def descendants(self, revs):
418 418 """Generate the descendants of 'revs' in revision order.
419 419
420 420 Yield a sequence of revision numbers starting with a child of
421 421 some rev in revs, i.e., each revision is *not* considered a
422 422 descendant of itself. Results are ordered by revision number (a
423 423 topological sort)."""
424 424 first = min(revs)
425 425 if first == nullrev:
426 426 for i in self:
427 427 yield i
428 428 return
429 429
430 430 seen = set(revs)
431 431 for i in self.revs(start=first + 1):
432 432 for x in self.parentrevs(i):
433 433 if x != nullrev and x in seen:
434 434 seen.add(i)
435 435 yield i
436 436 break
437 437
438 438 def findcommonmissing(self, common=None, heads=None):
439 439 """Return a tuple of the ancestors of common and the ancestors of heads
440 440 that are not ancestors of common. In revset terminology, we return the
441 441 tuple:
442 442
443 443 ::common, (::heads) - (::common)
444 444
445 445 The list is sorted by revision number, meaning it is
446 446 topologically sorted.
447 447
448 448 'heads' and 'common' are both lists of node IDs. If heads is
449 449 not supplied, uses all of the revlog's heads. If common is not
450 450 supplied, uses nullid."""
451 451 if common is None:
452 452 common = [nullid]
453 453 if heads is None:
454 454 heads = self.heads()
455 455
456 456 common = [self.rev(n) for n in common]
457 457 heads = [self.rev(n) for n in heads]
458 458
459 459 # we want the ancestors, but inclusive
460 460 class lazyset(object):
461 461 def __init__(self, lazyvalues):
462 462 self.addedvalues = set()
463 463 self.lazyvalues = lazyvalues
464 464
465 465 def __contains__(self, value):
466 466 return value in self.addedvalues or value in self.lazyvalues
467 467
468 468 def __iter__(self):
469 469 added = self.addedvalues
470 470 for r in added:
471 471 yield r
472 472 for r in self.lazyvalues:
473 473 if not r in added:
474 474 yield r
475 475
476 476 def add(self, value):
477 477 self.addedvalues.add(value)
478 478
479 479 def update(self, values):
480 480 self.addedvalues.update(values)
481 481
482 482 has = lazyset(self.ancestors(common))
483 483 has.add(nullrev)
484 484 has.update(common)
485 485
486 486 # take all ancestors from heads that aren't in has
487 487 missing = set()
488 488 visit = util.deque(r for r in heads if r not in has)
489 489 while visit:
490 490 r = visit.popleft()
491 491 if r in missing:
492 492 continue
493 493 else:
494 494 missing.add(r)
495 495 for p in self.parentrevs(r):
496 496 if p not in has:
497 497 visit.append(p)
498 498 missing = list(missing)
499 499 missing.sort()
500 500 return has, [self.node(r) for r in missing]
501 501
502 502 def incrementalmissingrevs(self, common=None):
503 503 """Return an object that can be used to incrementally compute the
504 504 revision numbers of the ancestors of arbitrary sets that are not
505 505 ancestors of common. This is an ancestor.incrementalmissingancestors
506 506 object.
507 507
508 508 'common' is a list of revision numbers. If common is not supplied, uses
509 509 nullrev.
510 510 """
511 511 if common is None:
512 512 common = [nullrev]
513 513
514 514 return ancestor.incrementalmissingancestors(self.parentrevs, common)
515 515
516 516 def findmissingrevs(self, common=None, heads=None):
517 517 """Return the revision numbers of the ancestors of heads that
518 518 are not ancestors of common.
519 519
520 520 More specifically, return a list of revision numbers corresponding to
521 521 nodes N such that every N satisfies the following constraints:
522 522
523 523 1. N is an ancestor of some node in 'heads'
524 524 2. N is not an ancestor of any node in 'common'
525 525
526 526 The list is sorted by revision number, meaning it is
527 527 topologically sorted.
528 528
529 529 'heads' and 'common' are both lists of revision numbers. If heads is
530 530 not supplied, uses all of the revlog's heads. If common is not
531 531 supplied, uses nullid."""
532 532 if common is None:
533 533 common = [nullrev]
534 534 if heads is None:
535 535 heads = self.headrevs()
536 536
537 537 inc = self.incrementalmissingrevs(common=common)
538 538 return inc.missingancestors(heads)
539 539
540 540 def findmissing(self, common=None, heads=None):
541 541 """Return the ancestors of heads that are not ancestors of common.
542 542
543 543 More specifically, return a list of nodes N such that every N
544 544 satisfies the following constraints:
545 545
546 546 1. N is an ancestor of some node in 'heads'
547 547 2. N is not an ancestor of any node in 'common'
548 548
549 549 The list is sorted by revision number, meaning it is
550 550 topologically sorted.
551 551
552 552 'heads' and 'common' are both lists of node IDs. If heads is
553 553 not supplied, uses all of the revlog's heads. If common is not
554 554 supplied, uses nullid."""
555 555 if common is None:
556 556 common = [nullid]
557 557 if heads is None:
558 558 heads = self.heads()
559 559
560 560 common = [self.rev(n) for n in common]
561 561 heads = [self.rev(n) for n in heads]
562 562
563 563 inc = self.incrementalmissingrevs(common=common)
564 564 return [self.node(r) for r in inc.missingancestors(heads)]
565 565
566 566 def nodesbetween(self, roots=None, heads=None):
567 567 """Return a topological path from 'roots' to 'heads'.
568 568
569 569 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
570 570 topologically sorted list of all nodes N that satisfy both of
571 571 these constraints:
572 572
573 573 1. N is a descendant of some node in 'roots'
574 574 2. N is an ancestor of some node in 'heads'
575 575
576 576 Every node is considered to be both a descendant and an ancestor
577 577 of itself, so every reachable node in 'roots' and 'heads' will be
578 578 included in 'nodes'.
579 579
580 580 'outroots' is the list of reachable nodes in 'roots', i.e., the
581 581 subset of 'roots' that is returned in 'nodes'. Likewise,
582 582 'outheads' is the subset of 'heads' that is also in 'nodes'.
583 583
584 584 'roots' and 'heads' are both lists of node IDs. If 'roots' is
585 585 unspecified, uses nullid as the only root. If 'heads' is
586 586 unspecified, uses list of all of the revlog's heads."""
587 587 nonodes = ([], [], [])
588 588 if roots is not None:
589 589 roots = list(roots)
590 590 if not roots:
591 591 return nonodes
592 592 lowestrev = min([self.rev(n) for n in roots])
593 593 else:
594 594 roots = [nullid] # Everybody's a descendant of nullid
595 595 lowestrev = nullrev
596 596 if (lowestrev == nullrev) and (heads is None):
597 597 # We want _all_ the nodes!
598 598 return ([self.node(r) for r in self], [nullid], list(self.heads()))
599 599 if heads is None:
600 600 # All nodes are ancestors, so the latest ancestor is the last
601 601 # node.
602 602 highestrev = len(self) - 1
603 603 # Set ancestors to None to signal that every node is an ancestor.
604 604 ancestors = None
605 605 # Set heads to an empty dictionary for later discovery of heads
606 606 heads = {}
607 607 else:
608 608 heads = list(heads)
609 609 if not heads:
610 610 return nonodes
611 611 ancestors = set()
612 612 # Turn heads into a dictionary so we can remove 'fake' heads.
613 613 # Also, later we will be using it to filter out the heads we can't
614 614 # find from roots.
615 615 heads = dict.fromkeys(heads, False)
616 616 # Start at the top and keep marking parents until we're done.
617 617 nodestotag = set(heads)
618 618 # Remember where the top was so we can use it as a limit later.
619 619 highestrev = max([self.rev(n) for n in nodestotag])
620 620 while nodestotag:
621 621 # grab a node to tag
622 622 n = nodestotag.pop()
623 623 # Never tag nullid
624 624 if n == nullid:
625 625 continue
626 626 # A node's revision number represents its place in a
627 627 # topologically sorted list of nodes.
628 628 r = self.rev(n)
629 629 if r >= lowestrev:
630 630 if n not in ancestors:
631 631 # If we are possibly a descendant of one of the roots
632 632 # and we haven't already been marked as an ancestor
633 633 ancestors.add(n) # Mark as ancestor
634 634 # Add non-nullid parents to list of nodes to tag.
635 635 nodestotag.update([p for p in self.parents(n) if
636 636 p != nullid])
637 637 elif n in heads: # We've seen it before, is it a fake head?
638 638 # So it is, real heads should not be the ancestors of
639 639 # any other heads.
640 640 heads.pop(n)
641 641 if not ancestors:
642 642 return nonodes
643 643 # Now that we have our set of ancestors, we want to remove any
644 644 # roots that are not ancestors.
645 645
646 646 # If one of the roots was nullid, everything is included anyway.
647 647 if lowestrev > nullrev:
648 648 # But, since we weren't, let's recompute the lowest rev to not
649 649 # include roots that aren't ancestors.
650 650
651 651 # Filter out roots that aren't ancestors of heads
652 652 roots = [n for n in roots if n in ancestors]
653 653 # Recompute the lowest revision
654 654 if roots:
655 655 lowestrev = min([self.rev(n) for n in roots])
656 656 else:
657 657 # No more roots? Return empty list
658 658 return nonodes
659 659 else:
660 660 # We are descending from nullid, and don't need to care about
661 661 # any other roots.
662 662 lowestrev = nullrev
663 663 roots = [nullid]
664 664 # Transform our roots list into a set.
665 665 descendants = set(roots)
666 666 # Also, keep the original roots so we can filter out roots that aren't
667 667 # 'real' roots (i.e. are descended from other roots).
668 668 roots = descendants.copy()
669 669 # Our topologically sorted list of output nodes.
670 670 orderedout = []
671 671 # Don't start at nullid since we don't want nullid in our output list,
672 672 # and if nullid shows up in descendants, empty parents will look like
673 673 # they're descendants.
674 674 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
675 675 n = self.node(r)
676 676 isdescendant = False
677 677 if lowestrev == nullrev: # Everybody is a descendant of nullid
678 678 isdescendant = True
679 679 elif n in descendants:
680 680 # n is already a descendant
681 681 isdescendant = True
682 682 # This check only needs to be done here because all the roots
683 683 # will start being marked is descendants before the loop.
684 684 if n in roots:
685 685 # If n was a root, check if it's a 'real' root.
686 686 p = tuple(self.parents(n))
687 687 # If any of its parents are descendants, it's not a root.
688 688 if (p[0] in descendants) or (p[1] in descendants):
689 689 roots.remove(n)
690 690 else:
691 691 p = tuple(self.parents(n))
692 692 # A node is a descendant if either of its parents are
693 693 # descendants. (We seeded the dependents list with the roots
694 694 # up there, remember?)
695 695 if (p[0] in descendants) or (p[1] in descendants):
696 696 descendants.add(n)
697 697 isdescendant = True
698 698 if isdescendant and ((ancestors is None) or (n in ancestors)):
699 699 # Only include nodes that are both descendants and ancestors.
700 700 orderedout.append(n)
701 701 if (ancestors is not None) and (n in heads):
702 702 # We're trying to figure out which heads are reachable
703 703 # from roots.
704 704 # Mark this head as having been reached
705 705 heads[n] = True
706 706 elif ancestors is None:
707 707 # Otherwise, we're trying to discover the heads.
708 708 # Assume this is a head because if it isn't, the next step
709 709 # will eventually remove it.
710 710 heads[n] = True
711 711 # But, obviously its parents aren't.
712 712 for p in self.parents(n):
713 713 heads.pop(p, None)
714 714 heads = [n for n, flag in heads.iteritems() if flag]
715 715 roots = list(roots)
716 716 assert orderedout
717 717 assert roots
718 718 assert heads
719 719 return (orderedout, roots, heads)
720 720
721 721 def headrevs(self):
722 722 try:
723 723 return self.index.headrevs()
724 724 except AttributeError:
725 725 return self._headrevs()
726 726
727 727 def _headrevs(self):
728 728 count = len(self)
729 729 if not count:
730 730 return [nullrev]
731 731 # we won't iter over filtered rev so nobody is a head at start
732 732 ishead = [0] * (count + 1)
733 733 index = self.index
734 734 for r in self:
735 735 ishead[r] = 1 # I may be an head
736 736 e = index[r]
737 737 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
738 738 return [r for r, val in enumerate(ishead) if val]
739 739
740 740 def heads(self, start=None, stop=None):
741 741 """return the list of all nodes that have no children
742 742
743 743 if start is specified, only heads that are descendants of
744 744 start will be returned
745 745 if stop is specified, it will consider all the revs from stop
746 746 as if they had no children
747 747 """
748 748 if start is None and stop is None:
749 749 if not len(self):
750 750 return [nullid]
751 751 return [self.node(r) for r in self.headrevs()]
752 752
753 753 if start is None:
754 754 start = nullid
755 755 if stop is None:
756 756 stop = []
757 757 stoprevs = set([self.rev(n) for n in stop])
758 758 startrev = self.rev(start)
759 759 reachable = set((startrev,))
760 760 heads = set((startrev,))
761 761
762 762 parentrevs = self.parentrevs
763 763 for r in self.revs(start=startrev + 1):
764 764 for p in parentrevs(r):
765 765 if p in reachable:
766 766 if r not in stoprevs:
767 767 reachable.add(r)
768 768 heads.add(r)
769 769 if p in heads and p not in stoprevs:
770 770 heads.remove(p)
771 771
772 772 return [self.node(r) for r in heads]
773 773
774 774 def children(self, node):
775 775 """find the children of a given node"""
776 776 c = []
777 777 p = self.rev(node)
778 778 for r in self.revs(start=p + 1):
779 779 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
780 780 if prevs:
781 781 for pr in prevs:
782 782 if pr == p:
783 783 c.append(self.node(r))
784 784 elif p == nullrev:
785 785 c.append(self.node(r))
786 786 return c
787 787
788 788 def descendant(self, start, end):
789 789 if start == nullrev:
790 790 return True
791 791 for i in self.descendants([start]):
792 792 if i == end:
793 793 return True
794 794 elif i > end:
795 795 break
796 796 return False
797 797
798 798 def commonancestorsheads(self, a, b):
799 799 """calculate all the heads of the common ancestors of nodes a and b"""
800 800 a, b = self.rev(a), self.rev(b)
801 801 try:
802 802 ancs = self.index.commonancestorsheads(a, b)
803 803 except (AttributeError, OverflowError): # C implementation failed
804 804 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
805 805 return map(self.node, ancs)
806 806
807 807 def isancestor(self, a, b):
808 808 """return True if node a is an ancestor of node b
809 809
810 810 The implementation of this is trivial but the use of
811 811 commonancestorsheads is not."""
812 812 return a in self.commonancestorsheads(a, b)
813 813
814 814 def ancestor(self, a, b):
815 815 """calculate the "best" common ancestor of nodes a and b"""
816 816
817 817 a, b = self.rev(a), self.rev(b)
818 818 try:
819 819 ancs = self.index.ancestors(a, b)
820 820 except (AttributeError, OverflowError):
821 821 ancs = ancestor.ancestors(self.parentrevs, a, b)
822 822 if ancs:
823 823 # choose a consistent winner when there's a tie
824 824 return min(map(self.node, ancs))
825 825 return nullid
826 826
827 827 def _match(self, id):
828 828 if isinstance(id, int):
829 829 # rev
830 830 return self.node(id)
831 831 if len(id) == 20:
832 832 # possibly a binary node
833 833 # odds of a binary node being all hex in ASCII are 1 in 10**25
834 834 try:
835 835 node = id
836 836 self.rev(node) # quick search the index
837 837 return node
838 838 except LookupError:
839 839 pass # may be partial hex id
840 840 try:
841 841 # str(rev)
842 842 rev = int(id)
843 843 if str(rev) != id:
844 844 raise ValueError
845 845 if rev < 0:
846 846 rev = len(self) + rev
847 847 if rev < 0 or rev >= len(self):
848 848 raise ValueError
849 849 return self.node(rev)
850 850 except (ValueError, OverflowError):
851 851 pass
852 852 if len(id) == 40:
853 853 try:
854 854 # a full hex nodeid?
855 855 node = bin(id)
856 856 self.rev(node)
857 857 return node
858 858 except (TypeError, LookupError):
859 859 pass
860 860
861 861 def _partialmatch(self, id):
862 862 try:
863 863 n = self.index.partialmatch(id)
864 864 if n and self.hasnode(n):
865 865 return n
866 866 return None
867 867 except RevlogError:
868 868 # parsers.c radix tree lookup gave multiple matches
869 869 # fall through to slow path that filters hidden revisions
870 870 pass
871 871 except (AttributeError, ValueError):
872 872 # we are pure python, or key was too short to search radix tree
873 873 pass
874 874
875 875 if id in self._pcache:
876 876 return self._pcache[id]
877 877
878 878 if len(id) < 40:
879 879 try:
880 880 # hex(node)[:...]
881 881 l = len(id) // 2 # grab an even number of digits
882 882 prefix = bin(id[:l * 2])
883 883 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
884 884 nl = [n for n in nl if hex(n).startswith(id) and
885 885 self.hasnode(n)]
886 886 if len(nl) > 0:
887 887 if len(nl) == 1:
888 888 self._pcache[id] = nl[0]
889 889 return nl[0]
890 890 raise LookupError(id, self.indexfile,
891 891 _('ambiguous identifier'))
892 892 return None
893 893 except TypeError:
894 894 pass
895 895
896 896 def lookup(self, id):
897 897 """locate a node based on:
898 898 - revision number or str(revision number)
899 899 - nodeid or subset of hex nodeid
900 900 """
901 901 n = self._match(id)
902 902 if n is not None:
903 903 return n
904 904 n = self._partialmatch(id)
905 905 if n:
906 906 return n
907 907
908 908 raise LookupError(id, self.indexfile, _('no match found'))
909 909
910 910 def cmp(self, node, text):
911 911 """compare text with a given file revision
912 912
913 913 returns True if text is different than what is stored.
914 914 """
915 915 p1, p2 = self.parents(node)
916 916 return hash(text, p1, p2) != node
917 917
918 918 def _addchunk(self, offset, data):
919 919 o, d = self._chunkcache
920 920 # try to add to existing cache
921 921 if o + len(d) == offset and len(d) + len(data) < _chunksize:
922 922 self._chunkcache = o, d + data
923 923 else:
924 924 self._chunkcache = offset, data
925 925
926 926 def _loadchunk(self, offset, length):
927 927 if self._inline:
928 928 df = self.opener(self.indexfile)
929 929 else:
930 930 df = self.opener(self.datafile)
931 931
932 932 # Cache data both forward and backward around the requested
933 933 # data, in a fixed size window. This helps speed up operations
934 934 # involving reading the revlog backwards.
935 935 cachesize = self._chunkcachesize
936 936 realoffset = offset & ~(cachesize - 1)
937 937 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
938 938 - realoffset)
939 939 df.seek(realoffset)
940 940 d = df.read(reallength)
941 941 df.close()
942 942 self._addchunk(realoffset, d)
943 943 if offset != realoffset or reallength != length:
944 944 return util.buffer(d, offset - realoffset, length)
945 945 return d
946 946
947 947 def _getchunk(self, offset, length):
948 948 o, d = self._chunkcache
949 949 l = len(d)
950 950
951 951 # is it in the cache?
952 952 cachestart = offset - o
953 953 cacheend = cachestart + length
954 954 if cachestart >= 0 and cacheend <= l:
955 955 if cachestart == 0 and cacheend == l:
956 956 return d # avoid a copy
957 957 return util.buffer(d, cachestart, cacheend - cachestart)
958 958
959 959 return self._loadchunk(offset, length)
960 960
961 961 def _chunkraw(self, startrev, endrev):
962 962 start = self.start(startrev)
963 963 end = self.end(endrev)
964 964 if self._inline:
965 965 start += (startrev + 1) * self._io.size
966 966 end += (endrev + 1) * self._io.size
967 967 length = end - start
968 968 return self._getchunk(start, length)
969 969
970 970 def _chunk(self, rev):
971 971 return decompress(self._chunkraw(rev, rev))
972 972
973 973 def _chunks(self, revs):
974 974 '''faster version of [self._chunk(rev) for rev in revs]
975 975
976 976 Assumes that revs is in ascending order.'''
977 977 if not revs:
978 978 return []
979 979 start = self.start
980 980 length = self.length
981 981 inline = self._inline
982 982 iosize = self._io.size
983 983 buffer = util.buffer
984 984
985 985 l = []
986 986 ladd = l.append
987 987
988 988 # preload the cache
989 989 try:
990 990 while True:
991 991 # ensure that the cache doesn't change out from under us
992 992 _cache = self._chunkcache
993 993 self._chunkraw(revs[0], revs[-1])
994 994 if _cache == self._chunkcache:
995 995 break
996 996 offset, data = _cache
997 997 except OverflowError:
998 998 # issue4215 - we can't cache a run of chunks greater than
999 999 # 2G on Windows
1000 1000 return [self._chunk(rev) for rev in revs]
1001 1001
1002 1002 for rev in revs:
1003 1003 chunkstart = start(rev)
1004 1004 if inline:
1005 1005 chunkstart += (rev + 1) * iosize
1006 1006 chunklength = length(rev)
1007 1007 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1008 1008
1009 1009 return l
1010 1010
1011 1011 def _chunkclear(self):
1012 1012 self._chunkcache = (0, '')
1013 1013
1014 1014 def deltaparent(self, rev):
1015 1015 """return deltaparent of the given revision"""
1016 1016 base = self.index[rev][3]
1017 1017 if base == rev:
1018 1018 return nullrev
1019 1019 elif self._generaldelta:
1020 1020 return base
1021 1021 else:
1022 1022 return rev - 1
1023 1023
1024 1024 def revdiff(self, rev1, rev2):
1025 1025 """return or calculate a delta between two revisions"""
1026 1026 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1027 1027 return str(self._chunk(rev2))
1028 1028
1029 1029 return mdiff.textdiff(self.revision(rev1),
1030 1030 self.revision(rev2))
1031 1031
1032 1032 def revision(self, nodeorrev):
1033 1033 """return an uncompressed revision of a given node or revision
1034 1034 number.
1035 1035 """
1036 1036 if isinstance(nodeorrev, int):
1037 1037 rev = nodeorrev
1038 1038 node = self.node(rev)
1039 1039 else:
1040 1040 node = nodeorrev
1041 1041 rev = None
1042 1042
1043 1043 _cache = self._cache # grab local copy of cache to avoid thread race
1044 1044 cachedrev = None
1045 1045 if node == nullid:
1046 1046 return ""
1047 1047 if _cache:
1048 1048 if _cache[0] == node:
1049 1049 return _cache[2]
1050 1050 cachedrev = _cache[1]
1051 1051
1052 1052 # look up what we need to read
1053 1053 text = None
1054 1054 if rev is None:
1055 1055 rev = self.rev(node)
1056 1056
1057 1057 # check rev flags
1058 1058 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1059 1059 raise RevlogError(_('incompatible revision flag %x') %
1060 1060 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1061 1061
1062 1062 # build delta chain
1063 1063 chain = []
1064 1064 index = self.index # for performance
1065 1065 generaldelta = self._generaldelta
1066 1066 iterrev = rev
1067 1067 e = index[iterrev]
1068 1068 while iterrev != e[3] and iterrev != cachedrev:
1069 1069 chain.append(iterrev)
1070 1070 if generaldelta:
1071 1071 iterrev = e[3]
1072 1072 else:
1073 1073 iterrev -= 1
1074 1074 e = index[iterrev]
1075 1075
1076 1076 if iterrev == cachedrev:
1077 1077 # cache hit
1078 1078 text = _cache[2]
1079 1079 else:
1080 1080 chain.append(iterrev)
1081 1081 chain.reverse()
1082 1082
1083 1083 # drop cache to save memory
1084 1084 self._cache = None
1085 1085
1086 1086 bins = self._chunks(chain)
1087 1087 if text is None:
1088 1088 text = str(bins[0])
1089 1089 bins = bins[1:]
1090 1090
1091 1091 text = mdiff.patches(text, bins)
1092 1092
1093 1093 text = self._checkhash(text, node, rev)
1094 1094
1095 1095 self._cache = (node, rev, text)
1096 1096 return text
1097 1097
1098 1098 def hash(self, text, p1, p2):
1099 1099 """Compute a node hash.
1100 1100
1101 1101 Available as a function so that subclasses can replace the hash
1102 1102 as needed.
1103 1103 """
1104 1104 return hash(text, p1, p2)
1105 1105
1106 1106 def _checkhash(self, text, node, rev):
1107 1107 p1, p2 = self.parents(node)
1108 1108 self.checkhash(text, p1, p2, node, rev)
1109 1109 return text
1110 1110
1111 1111 def checkhash(self, text, p1, p2, node, rev=None):
1112 1112 if node != self.hash(text, p1, p2):
1113 1113 revornode = rev
1114 1114 if revornode is None:
1115 1115 revornode = templatefilters.short(hex(node))
1116 1116 raise RevlogError(_("integrity check failed on %s:%s")
1117 1117 % (self.indexfile, revornode))
1118 1118
1119 1119 def checkinlinesize(self, tr, fp=None):
1120 1120 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1121 1121 return
1122 1122
1123 1123 trinfo = tr.find(self.indexfile)
1124 1124 if trinfo is None:
1125 1125 raise RevlogError(_("%s not found in the transaction")
1126 1126 % self.indexfile)
1127 1127
1128 1128 trindex = trinfo[2]
1129 1129 dataoff = self.start(trindex)
1130 1130
1131 1131 tr.add(self.datafile, dataoff)
1132 1132
1133 1133 if fp:
1134 1134 fp.flush()
1135 1135 fp.close()
1136 1136
1137 1137 df = self.opener(self.datafile, 'w')
1138 1138 try:
1139 1139 for r in self:
1140 1140 df.write(self._chunkraw(r, r))
1141 1141 finally:
1142 1142 df.close()
1143 1143
1144 1144 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1145 1145 self.version &= ~(REVLOGNGINLINEDATA)
1146 1146 self._inline = False
1147 1147 for i in self:
1148 1148 e = self._io.packentry(self.index[i], self.node, self.version, i)
1149 1149 fp.write(e)
1150 1150
1151 1151 # if we don't call close, the temp file will never replace the
1152 1152 # real index
1153 1153 fp.close()
1154 1154
1155 1155 tr.replace(self.indexfile, trindex * self._io.size)
1156 1156 self._chunkclear()
1157 1157
1158 1158 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1159 1159 node=None):
1160 1160 """add a revision to the log
1161 1161
1162 1162 text - the revision data to add
1163 1163 transaction - the transaction object used for rollback
1164 1164 link - the linkrev data to add
1165 1165 p1, p2 - the parent nodeids of the revision
1166 1166 cachedelta - an optional precomputed delta
1167 1167 node - nodeid of revision; typically node is not specified, and it is
1168 1168 computed by default as hash(text, p1, p2), however subclasses might
1169 1169 use different hashing method (and override checkhash() in such case)
1170 1170 """
1171 1171 if link == nullrev:
1172 1172 raise RevlogError(_("attempted to add linkrev -1 to %s")
1173 1173 % self.indexfile)
1174 1174 node = node or self.hash(text, p1, p2)
1175 1175 if node in self.nodemap:
1176 1176 return node
1177 1177
1178 1178 dfh = None
1179 1179 if not self._inline:
1180 1180 dfh = self.opener(self.datafile, "a")
1181 1181 ifh = self.opener(self.indexfile, "a+")
1182 1182 try:
1183 1183 return self._addrevision(node, text, transaction, link, p1, p2,
1184 1184 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1185 1185 finally:
1186 1186 if dfh:
1187 1187 dfh.close()
1188 1188 ifh.close()
1189 1189
1190 1190 def compress(self, text):
1191 1191 """ generate a possibly-compressed representation of text """
1192 1192 if not text:
1193 1193 return ("", text)
1194 1194 l = len(text)
1195 1195 bin = None
1196 1196 if l < 44:
1197 1197 pass
1198 1198 elif l > 1000000:
1199 1199 # zlib makes an internal copy, thus doubling memory usage for
1200 1200 # large files, so lets do this in pieces
1201 1201 z = zlib.compressobj()
1202 1202 p = []
1203 1203 pos = 0
1204 1204 while pos < l:
1205 1205 pos2 = pos + 2**20
1206 1206 p.append(z.compress(text[pos:pos2]))
1207 1207 pos = pos2
1208 1208 p.append(z.flush())
1209 1209 if sum(map(len, p)) < l:
1210 1210 bin = "".join(p)
1211 1211 else:
1212 1212 bin = _compress(text)
1213 1213 if bin is None or len(bin) > l:
1214 1214 if text[0] == '\0':
1215 1215 return ("", text)
1216 1216 return ('u', text)
1217 1217 return ("", bin)
1218 1218
1219 1219 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1220 1220 cachedelta, ifh, dfh):
1221 1221 """internal function to add revisions to the log
1222 1222
1223 1223 see addrevision for argument descriptions.
1224 1224 invariants:
1225 1225 - text is optional (can be None); if not set, cachedelta must be set.
1226 1226 if both are set, they must correspond to each other.
1227 1227 """
1228 1228 btext = [text]
1229 1229 def buildtext():
1230 1230 if btext[0] is not None:
1231 1231 return btext[0]
1232 1232 # flush any pending writes here so we can read it in revision
1233 1233 if dfh:
1234 1234 dfh.flush()
1235 1235 ifh.flush()
1236 1236 baserev = cachedelta[0]
1237 1237 delta = cachedelta[1]
1238 1238 # special case deltas which replace entire base; no need to decode
1239 1239 # base revision. this neatly avoids censored bases, which throw when
1240 1240 # they're decoded.
1241 1241 hlen = struct.calcsize(">lll")
1242 1242 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1243 1243 len(delta) - hlen):
1244 1244 btext[0] = delta[hlen:]
1245 1245 else:
1246 1246 basetext = self.revision(self.node(baserev))
1247 1247 btext[0] = mdiff.patch(basetext, delta)
1248 1248 try:
1249 1249 self.checkhash(btext[0], p1, p2, node)
1250 1250 if flags & REVIDX_ISCENSORED:
1251 1251 raise RevlogError(_('node %s is not censored') % node)
1252 1252 except CensoredNodeError:
1253 1253 # must pass the censored index flag to add censored revisions
1254 1254 if not flags & REVIDX_ISCENSORED:
1255 1255 raise
1256 1256 return btext[0]
1257 1257
1258 1258 def builddelta(rev):
1259 1259 # can we use the cached delta?
1260 1260 if cachedelta and cachedelta[0] == rev:
1261 1261 delta = cachedelta[1]
1262 1262 else:
1263 1263 t = buildtext()
1264 1264 if self.iscensored(rev):
1265 1265 # deltas based on a censored revision must replace the
1266 1266 # full content in one patch, so delta works everywhere
1267 1267 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1268 1268 delta = header + t
1269 1269 else:
1270 1270 ptext = self.revision(self.node(rev))
1271 1271 delta = mdiff.textdiff(ptext, t)
1272 1272 data = self.compress(delta)
1273 1273 l = len(data[1]) + len(data[0])
1274 1274 if basecache[0] == rev:
1275 1275 chainbase = basecache[1]
1276 1276 else:
1277 1277 chainbase = self.chainbase(rev)
1278 1278 dist = l + offset - self.start(chainbase)
1279 1279 if self._generaldelta:
1280 1280 base = rev
1281 1281 else:
1282 1282 base = chainbase
1283 1283 chainlen, compresseddeltalen = self._chaininfo(rev)
1284 1284 chainlen += 1
1285 1285 compresseddeltalen += l
1286 1286 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1287 1287
1288 1288 curr = len(self)
1289 1289 prev = curr - 1
1290 1290 base = chainbase = curr
1291 1291 chainlen = None
1292 1292 offset = self.end(prev)
1293 1293 d = None
1294 1294 if self._basecache is None:
1295 1295 self._basecache = (prev, self.chainbase(prev))
1296 1296 basecache = self._basecache
1297 1297 p1r, p2r = self.rev(p1), self.rev(p2)
1298 1298
1299 1299 # should we try to build a delta?
1300 1300 if prev != nullrev:
1301 1301 if self._generaldelta:
1302 1302 if p1r >= basecache[1]:
1303 1303 d = builddelta(p1r)
1304 1304 elif p2r >= basecache[1]:
1305 1305 d = builddelta(p2r)
1306 1306 else:
1307 1307 d = builddelta(prev)
1308 1308 else:
1309 1309 d = builddelta(prev)
1310 1310 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1311 1311
1312 1312 # full versions are inserted when the needed deltas
1313 1313 # become comparable to the uncompressed text
1314 1314 if text is None:
1315 1315 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1316 1316 cachedelta[1])
1317 1317 else:
1318 1318 textlen = len(text)
1319 1319
1320 1320 # - 'dist' is the distance from the base revision -- bounding it limits
1321 1321 # the amount of I/O we need to do.
1322 1322 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1323 1323 # to apply -- bounding it limits the amount of CPU we consume.
1324 1324 if (d is None or dist > textlen * 4 or l > textlen or
1325 1325 compresseddeltalen > textlen * 2 or
1326 1326 (self._maxchainlen and chainlen > self._maxchainlen)):
1327 1327 text = buildtext()
1328 1328 data = self.compress(text)
1329 1329 l = len(data[1]) + len(data[0])
1330 1330 base = chainbase = curr
1331 1331
1332 1332 e = (offset_type(offset, flags), l, textlen,
1333 1333 base, link, p1r, p2r, node)
1334 1334 self.index.insert(-1, e)
1335 1335 self.nodemap[node] = curr
1336 1336
1337 1337 entry = self._io.packentry(e, self.node, self.version, curr)
1338 1338 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1339 1339
1340 1340 if type(text) == str: # only accept immutable objects
1341 1341 self._cache = (node, curr, text)
1342 1342 self._basecache = (curr, chainbase)
1343 1343 return node
1344 1344
1345 1345 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1346 1346 curr = len(self) - 1
1347 1347 if not self._inline:
1348 1348 transaction.add(self.datafile, offset)
1349 1349 transaction.add(self.indexfile, curr * len(entry))
1350 1350 if data[0]:
1351 1351 dfh.write(data[0])
1352 1352 dfh.write(data[1])
1353 1353 dfh.flush()
1354 1354 ifh.write(entry)
1355 1355 else:
1356 1356 offset += curr * self._io.size
1357 1357 transaction.add(self.indexfile, offset, curr)
1358 1358 ifh.write(entry)
1359 1359 ifh.write(data[0])
1360 1360 ifh.write(data[1])
1361 1361 self.checkinlinesize(transaction, ifh)
1362 1362
1363 1363 def addgroup(self, bundle, linkmapper, transaction):
1364 1364 """
1365 1365 add a delta group
1366 1366
1367 1367 given a set of deltas, add them to the revision log. the
1368 1368 first delta is against its parent, which should be in our
1369 1369 log, the rest are against the previous delta.
1370 1370 """
1371 1371
1372 1372 # track the base of the current delta log
1373 1373 content = []
1374 1374 node = None
1375 1375
1376 1376 r = len(self)
1377 1377 end = 0
1378 1378 if r:
1379 1379 end = self.end(r - 1)
1380 1380 ifh = self.opener(self.indexfile, "a+")
1381 1381 isize = r * self._io.size
1382 1382 if self._inline:
1383 1383 transaction.add(self.indexfile, end + isize, r)
1384 1384 dfh = None
1385 1385 else:
1386 1386 transaction.add(self.indexfile, isize, r)
1387 1387 transaction.add(self.datafile, end)
1388 1388 dfh = self.opener(self.datafile, "a")
1389
1389 def flush():
1390 if dfh:
1391 dfh.flush()
1392 ifh.flush()
1390 1393 try:
1391 1394 # loop through our set of deltas
1392 1395 chain = None
1393 1396 while True:
1394 1397 chunkdata = bundle.deltachunk(chain)
1395 1398 if not chunkdata:
1396 1399 break
1397 1400 node = chunkdata['node']
1398 1401 p1 = chunkdata['p1']
1399 1402 p2 = chunkdata['p2']
1400 1403 cs = chunkdata['cs']
1401 1404 deltabase = chunkdata['deltabase']
1402 1405 delta = chunkdata['delta']
1403 1406
1404 1407 content.append(node)
1405 1408
1406 1409 link = linkmapper(cs)
1407 1410 if node in self.nodemap:
1408 1411 # this can happen if two branches make the same change
1409 1412 chain = node
1410 1413 continue
1411 1414
1412 1415 for p in (p1, p2):
1413 1416 if p not in self.nodemap:
1414 1417 raise LookupError(p, self.indexfile,
1415 1418 _('unknown parent'))
1416 1419
1417 1420 if deltabase not in self.nodemap:
1418 1421 raise LookupError(deltabase, self.indexfile,
1419 1422 _('unknown delta base'))
1420 1423
1421 1424 baserev = self.rev(deltabase)
1422 1425
1423 1426 if baserev != nullrev and self.iscensored(baserev):
1424 1427 # if base is censored, delta must be full replacement in a
1425 1428 # single patch operation
1426 1429 hlen = struct.calcsize(">lll")
1427 1430 oldlen = self.rawsize(baserev)
1428 1431 newlen = len(delta) - hlen
1429 1432 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1430 1433 raise error.CensoredBaseError(self.indexfile,
1431 1434 self.node(baserev))
1432 1435
1436 flags = REVIDX_DEFAULT_FLAGS
1437 if self._peek_iscensored(baserev, delta, flush):
1438 flags |= REVIDX_ISCENSORED
1439
1433 1440 chain = self._addrevision(node, None, transaction, link,
1434 p1, p2, REVIDX_DEFAULT_FLAGS,
1435 (baserev, delta), ifh, dfh)
1441 p1, p2, flags, (baserev, delta),
1442 ifh, dfh)
1436 1443 if not dfh and not self._inline:
1437 1444 # addrevision switched from inline to conventional
1438 1445 # reopen the index
1439 1446 ifh.close()
1440 1447 dfh = self.opener(self.datafile, "a")
1441 1448 ifh = self.opener(self.indexfile, "a")
1442 1449 finally:
1443 1450 if dfh:
1444 1451 dfh.close()
1445 1452 ifh.close()
1446 1453
1447 1454 return content
1448 1455
1449 1456 def iscensored(self, rev):
1450 1457 """Check if a file revision is censored."""
1451 1458 return False
1452 1459
1460 def _peek_iscensored(self, baserev, delta, flush):
1461 """Quickly check if a delta produces a censored revision."""
1462 return False
1463
1453 1464 def getstrippoint(self, minlink):
1454 1465 """find the minimum rev that must be stripped to strip the linkrev
1455 1466
1456 1467 Returns a tuple containing the minimum rev and a set of all revs that
1457 1468 have linkrevs that will be broken by this strip.
1458 1469 """
1459 1470 brokenrevs = set()
1460 1471 strippoint = len(self)
1461 1472
1462 1473 heads = {}
1463 1474 futurelargelinkrevs = set()
1464 1475 for head in self.headrevs():
1465 1476 headlinkrev = self.linkrev(head)
1466 1477 heads[head] = headlinkrev
1467 1478 if headlinkrev >= minlink:
1468 1479 futurelargelinkrevs.add(headlinkrev)
1469 1480
1470 1481 # This algorithm involves walking down the rev graph, starting at the
1471 1482 # heads. Since the revs are topologically sorted according to linkrev,
1472 1483 # once all head linkrevs are below the minlink, we know there are
1473 1484 # no more revs that could have a linkrev greater than minlink.
1474 1485 # So we can stop walking.
1475 1486 while futurelargelinkrevs:
1476 1487 strippoint -= 1
1477 1488 linkrev = heads.pop(strippoint)
1478 1489
1479 1490 if linkrev < minlink:
1480 1491 brokenrevs.add(strippoint)
1481 1492 else:
1482 1493 futurelargelinkrevs.remove(linkrev)
1483 1494
1484 1495 for p in self.parentrevs(strippoint):
1485 1496 if p != nullrev:
1486 1497 plinkrev = self.linkrev(p)
1487 1498 heads[p] = plinkrev
1488 1499 if plinkrev >= minlink:
1489 1500 futurelargelinkrevs.add(plinkrev)
1490 1501
1491 1502 return strippoint, brokenrevs
1492 1503
1493 1504 def strip(self, minlink, transaction):
1494 1505 """truncate the revlog on the first revision with a linkrev >= minlink
1495 1506
1496 1507 This function is called when we're stripping revision minlink and
1497 1508 its descendants from the repository.
1498 1509
1499 1510 We have to remove all revisions with linkrev >= minlink, because
1500 1511 the equivalent changelog revisions will be renumbered after the
1501 1512 strip.
1502 1513
1503 1514 So we truncate the revlog on the first of these revisions, and
1504 1515 trust that the caller has saved the revisions that shouldn't be
1505 1516 removed and that it'll re-add them after this truncation.
1506 1517 """
1507 1518 if len(self) == 0:
1508 1519 return
1509 1520
1510 1521 rev, _ = self.getstrippoint(minlink)
1511 1522 if rev == len(self):
1512 1523 return
1513 1524
1514 1525 # first truncate the files on disk
1515 1526 end = self.start(rev)
1516 1527 if not self._inline:
1517 1528 transaction.add(self.datafile, end)
1518 1529 end = rev * self._io.size
1519 1530 else:
1520 1531 end += rev * self._io.size
1521 1532
1522 1533 transaction.add(self.indexfile, end)
1523 1534
1524 1535 # then reset internal state in memory to forget those revisions
1525 1536 self._cache = None
1526 1537 self._chaininfocache = {}
1527 1538 self._chunkclear()
1528 1539 for x in xrange(rev, len(self)):
1529 1540 del self.nodemap[self.node(x)]
1530 1541
1531 1542 del self.index[rev:-1]
1532 1543
1533 1544 def checksize(self):
1534 1545 expected = 0
1535 1546 if len(self):
1536 1547 expected = max(0, self.end(len(self) - 1))
1537 1548
1538 1549 try:
1539 1550 f = self.opener(self.datafile)
1540 1551 f.seek(0, 2)
1541 1552 actual = f.tell()
1542 1553 f.close()
1543 1554 dd = actual - expected
1544 1555 except IOError, inst:
1545 1556 if inst.errno != errno.ENOENT:
1546 1557 raise
1547 1558 dd = 0
1548 1559
1549 1560 try:
1550 1561 f = self.opener(self.indexfile)
1551 1562 f.seek(0, 2)
1552 1563 actual = f.tell()
1553 1564 f.close()
1554 1565 s = self._io.size
1555 1566 i = max(0, actual // s)
1556 1567 di = actual - (i * s)
1557 1568 if self._inline:
1558 1569 databytes = 0
1559 1570 for r in self:
1560 1571 databytes += max(0, self.length(r))
1561 1572 dd = 0
1562 1573 di = actual - len(self) * s - databytes
1563 1574 except IOError, inst:
1564 1575 if inst.errno != errno.ENOENT:
1565 1576 raise
1566 1577 di = 0
1567 1578
1568 1579 return (dd, di)
1569 1580
1570 1581 def files(self):
1571 1582 res = [self.indexfile]
1572 1583 if not self._inline:
1573 1584 res.append(self.datafile)
1574 1585 return res
General Comments 0
You need to be logged in to leave comments. Login now