##// END OF EJS Templates
revlog: add "iscensored()" to revlog public API...
Mike Edgar -
r24118:76f6ae06 default
parent child Browse files
Show More
@@ -1,109 +1,109 b''
1 1 # filelog.py - file history class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import error, revlog
9 9 import re
10 10
11 11 _mdre = re.compile('\1\n')
12 12 def parsemeta(text):
13 13 """return (metadatadict, keylist, metadatasize)"""
14 14 # text can be buffer, so we can't use .startswith or .index
15 15 if text[:2] != '\1\n':
16 16 return None, None
17 17 s = _mdre.search(text, 2).start()
18 18 mtext = text[2:s]
19 19 meta = {}
20 20 for l in mtext.splitlines():
21 21 k, v = l.split(": ", 1)
22 22 meta[k] = v
23 23 return meta, (s + 2)
24 24
25 25 def packmeta(meta, text):
26 26 keys = sorted(meta.iterkeys())
27 27 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
28 28 return "\1\n%s\1\n%s" % (metatext, text)
29 29
30 30 def _censoredtext(text):
31 31 m, offs = parsemeta(text)
32 32 return m and "censored" in m
33 33
34 34 class filelog(revlog.revlog):
35 35 def __init__(self, opener, path):
36 36 super(filelog, self).__init__(opener,
37 37 "/".join(("data", path + ".i")))
38 38
39 39 def read(self, node):
40 40 t = self.revision(node)
41 41 if not t.startswith('\1\n'):
42 42 return t
43 43 s = t.index('\1\n', 2)
44 44 return t[s + 2:]
45 45
46 46 def add(self, text, meta, transaction, link, p1=None, p2=None):
47 47 if meta or text.startswith('\1\n'):
48 48 text = packmeta(meta, text)
49 49 return self.addrevision(text, transaction, link, p1, p2)
50 50
51 51 def renamed(self, node):
52 52 if self.parents(node)[0] != revlog.nullid:
53 53 return False
54 54 t = self.revision(node)
55 55 m = parsemeta(t)[0]
56 56 if m and "copy" in m:
57 57 return (m["copy"], revlog.bin(m["copyrev"]))
58 58 return False
59 59
60 60 def size(self, rev):
61 61 """return the size of a given revision"""
62 62
63 63 # for revisions with renames, we have to go the slow way
64 64 node = self.node(rev)
65 65 if self.renamed(node):
66 66 return len(self.read(node))
67 if self._iscensored(rev):
67 if self.iscensored(rev):
68 68 return 0
69 69
70 70 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
71 71 return super(filelog, self).size(rev)
72 72
73 73 def cmp(self, node, text):
74 74 """compare text with a given file revision
75 75
76 76 returns True if text is different than what is stored.
77 77 """
78 78
79 79 t = text
80 80 if text.startswith('\1\n'):
81 81 t = '\1\n\1\n' + text
82 82
83 83 samehashes = not super(filelog, self).cmp(node, t)
84 84 if samehashes:
85 85 return False
86 86
87 87 # censored files compare against the empty file
88 if self._iscensored(self.rev(node)):
88 if self.iscensored(self.rev(node)):
89 89 return text != ''
90 90
91 91 # renaming a file produces a different hash, even if the data
92 92 # remains unchanged. Check if it's the case (slow):
93 93 if self.renamed(node):
94 94 t2 = self.read(node)
95 95 return t2 != text
96 96
97 97 return True
98 98
99 99 def checkhash(self, text, p1, p2, node, rev=None):
100 100 try:
101 101 super(filelog, self).checkhash(text, p1, p2, node, rev=rev)
102 102 except error.RevlogError:
103 103 if _censoredtext(text):
104 104 raise error.CensoredNodeError(self.indexfile, node)
105 105 raise
106 106
107 def _iscensored(self, rev):
107 def iscensored(self, rev):
108 108 """Check if a file revision is censored."""
109 109 return self.flags(rev) & revlog.REVIDX_ISCENSORED
@@ -1,1543 +1,1547 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, templatefilters
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
38 38 REVIDX_DEFAULT_FLAGS = 0
39 39 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
40 40
41 41 # max size of revlog with inline data
42 42 _maxinline = 131072
43 43 _chunksize = 1048576
44 44
45 45 RevlogError = error.RevlogError
46 46 LookupError = error.LookupError
47 47 CensoredNodeError = error.CensoredNodeError
48 48
49 49 def getoffset(q):
50 50 return int(q >> 16)
51 51
52 52 def gettype(q):
53 53 return int(q & 0xFFFF)
54 54
55 55 def offset_type(offset, type):
56 56 return long(long(offset) << 16 | type)
57 57
58 58 _nullhash = _sha(nullid)
59 59
60 60 def hash(text, p1, p2):
61 61 """generate a hash from the given text and its parent hashes
62 62
63 63 This hash combines both the current file contents and its history
64 64 in a manner that makes it easy to distinguish nodes with the same
65 65 content in the revision graph.
66 66 """
67 67 # As of now, if one of the parent node is null, p2 is null
68 68 if p2 == nullid:
69 69 # deep copy of a hash is faster than creating one
70 70 s = _nullhash.copy()
71 71 s.update(p1)
72 72 else:
73 73 # none of the parent nodes are nullid
74 74 l = [p1, p2]
75 75 l.sort()
76 76 s = _sha(l[0])
77 77 s.update(l[1])
78 78 s.update(text)
79 79 return s.digest()
80 80
81 81 def decompress(bin):
82 82 """ decompress the given input """
83 83 if not bin:
84 84 return bin
85 85 t = bin[0]
86 86 if t == '\0':
87 87 return bin
88 88 if t == 'x':
89 89 try:
90 90 return _decompress(bin)
91 91 except zlib.error, e:
92 92 raise RevlogError(_("revlog decompress error: %s") % str(e))
93 93 if t == 'u':
94 94 return bin[1:]
95 95 raise RevlogError(_("unknown compression type %r") % t)
96 96
97 97 # index v0:
98 98 # 4 bytes: offset
99 99 # 4 bytes: compressed length
100 100 # 4 bytes: base rev
101 101 # 4 bytes: link rev
102 102 # 32 bytes: parent 1 nodeid
103 103 # 32 bytes: parent 2 nodeid
104 104 # 32 bytes: nodeid
105 105 indexformatv0 = ">4l20s20s20s"
106 106 v0shaoffset = 56
107 107
108 108 class revlogoldio(object):
109 109 def __init__(self):
110 110 self.size = struct.calcsize(indexformatv0)
111 111
112 112 def parseindex(self, data, inline):
113 113 s = self.size
114 114 index = []
115 115 nodemap = {nullid: nullrev}
116 116 n = off = 0
117 117 l = len(data)
118 118 while off + s <= l:
119 119 cur = data[off:off + s]
120 120 off += s
121 121 e = _unpack(indexformatv0, cur)
122 122 # transform to revlogv1 format
123 123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
124 124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
125 125 index.append(e2)
126 126 nodemap[e[6]] = n
127 127 n += 1
128 128
129 129 # add the magic null revision at -1
130 130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
131 131
132 132 return index, nodemap, None
133 133
134 134 def packentry(self, entry, node, version, rev):
135 135 if gettype(entry[0]):
136 136 raise RevlogError(_("index entry flags need RevlogNG"))
137 137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
138 138 node(entry[5]), node(entry[6]), entry[7])
139 139 return _pack(indexformatv0, *e2)
140 140
141 141 # index ng:
142 142 # 6 bytes: offset
143 143 # 2 bytes: flags
144 144 # 4 bytes: compressed length
145 145 # 4 bytes: uncompressed length
146 146 # 4 bytes: base rev
147 147 # 4 bytes: link rev
148 148 # 4 bytes: parent 1 rev
149 149 # 4 bytes: parent 2 rev
150 150 # 32 bytes: nodeid
151 151 indexformatng = ">Qiiiiii20s12x"
152 152 ngshaoffset = 32
153 153 versionformat = ">I"
154 154
155 155 class revlogio(object):
156 156 def __init__(self):
157 157 self.size = struct.calcsize(indexformatng)
158 158
159 159 def parseindex(self, data, inline):
160 160 # call the C implementation to parse the index data
161 161 index, cache = parsers.parse_index2(data, inline)
162 162 return index, getattr(index, 'nodemap', None), cache
163 163
164 164 def packentry(self, entry, node, version, rev):
165 165 p = _pack(indexformatng, *entry)
166 166 if rev == 0:
167 167 p = _pack(versionformat, version) + p[4:]
168 168 return p
169 169
170 170 class revlog(object):
171 171 """
172 172 the underlying revision storage object
173 173
174 174 A revlog consists of two parts, an index and the revision data.
175 175
176 176 The index is a file with a fixed record size containing
177 177 information on each revision, including its nodeid (hash), the
178 178 nodeids of its parents, the position and offset of its data within
179 179 the data file, and the revision it's based on. Finally, each entry
180 180 contains a linkrev entry that can serve as a pointer to external
181 181 data.
182 182
183 183 The revision data itself is a linear collection of data chunks.
184 184 Each chunk represents a revision and is usually represented as a
185 185 delta against the previous chunk. To bound lookup time, runs of
186 186 deltas are limited to about 2 times the length of the original
187 187 version data. This makes retrieval of a version proportional to
188 188 its size, or O(1) relative to the number of revisions.
189 189
190 190 Both pieces of the revlog are written to in an append-only
191 191 fashion, which means we never need to rewrite a file to insert or
192 192 remove data, and can use some simple techniques to avoid the need
193 193 for locking while reading.
194 194 """
195 195 def __init__(self, opener, indexfile):
196 196 """
197 197 create a revlog object
198 198
199 199 opener is a function that abstracts the file opening operation
200 200 and can be used to implement COW semantics or the like.
201 201 """
202 202 self.indexfile = indexfile
203 203 self.datafile = indexfile[:-2] + ".d"
204 204 self.opener = opener
205 205 self._cache = None
206 206 self._basecache = None
207 207 self._chunkcache = (0, '')
208 208 self._chunkcachesize = 65536
209 209 self._maxchainlen = None
210 210 self.index = []
211 211 self._pcache = {}
212 212 self._nodecache = {nullid: nullrev}
213 213 self._nodepos = None
214 214
215 215 v = REVLOG_DEFAULT_VERSION
216 216 opts = getattr(opener, 'options', None)
217 217 if opts is not None:
218 218 if 'revlogv1' in opts:
219 219 if 'generaldelta' in opts:
220 220 v |= REVLOGGENERALDELTA
221 221 else:
222 222 v = 0
223 223 if 'chunkcachesize' in opts:
224 224 self._chunkcachesize = opts['chunkcachesize']
225 225 if 'maxchainlen' in opts:
226 226 self._maxchainlen = opts['maxchainlen']
227 227
228 228 if self._chunkcachesize <= 0:
229 229 raise RevlogError(_('revlog chunk cache size %r is not greater '
230 230 'than 0') % self._chunkcachesize)
231 231 elif self._chunkcachesize & (self._chunkcachesize - 1):
232 232 raise RevlogError(_('revlog chunk cache size %r is not a power '
233 233 'of 2') % self._chunkcachesize)
234 234
235 235 i = ''
236 236 self._initempty = True
237 237 try:
238 238 f = self.opener(self.indexfile)
239 239 i = f.read()
240 240 f.close()
241 241 if len(i) > 0:
242 242 v = struct.unpack(versionformat, i[:4])[0]
243 243 self._initempty = False
244 244 except IOError, inst:
245 245 if inst.errno != errno.ENOENT:
246 246 raise
247 247
248 248 self.version = v
249 249 self._inline = v & REVLOGNGINLINEDATA
250 250 self._generaldelta = v & REVLOGGENERALDELTA
251 251 flags = v & ~0xFFFF
252 252 fmt = v & 0xFFFF
253 253 if fmt == REVLOGV0 and flags:
254 254 raise RevlogError(_("index %s unknown flags %#04x for format v0")
255 255 % (self.indexfile, flags >> 16))
256 256 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
257 257 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
258 258 % (self.indexfile, flags >> 16))
259 259 elif fmt > REVLOGNG:
260 260 raise RevlogError(_("index %s unknown format %d")
261 261 % (self.indexfile, fmt))
262 262
263 263 self._io = revlogio()
264 264 if self.version == REVLOGV0:
265 265 self._io = revlogoldio()
266 266 try:
267 267 d = self._io.parseindex(i, self._inline)
268 268 except (ValueError, IndexError):
269 269 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
270 270 self.index, nodemap, self._chunkcache = d
271 271 if nodemap is not None:
272 272 self.nodemap = self._nodecache = nodemap
273 273 if not self._chunkcache:
274 274 self._chunkclear()
275 275 # revnum -> (chain-length, sum-delta-length)
276 276 self._chaininfocache = {}
277 277
278 278 def tip(self):
279 279 return self.node(len(self.index) - 2)
280 280 def __contains__(self, rev):
281 281 return 0 <= rev < len(self)
282 282 def __len__(self):
283 283 return len(self.index) - 1
284 284 def __iter__(self):
285 285 return iter(xrange(len(self)))
286 286 def revs(self, start=0, stop=None):
287 287 """iterate over all rev in this revlog (from start to stop)"""
288 288 step = 1
289 289 if stop is not None:
290 290 if start > stop:
291 291 step = -1
292 292 stop += step
293 293 else:
294 294 stop = len(self)
295 295 return xrange(start, stop, step)
296 296
297 297 @util.propertycache
298 298 def nodemap(self):
299 299 self.rev(self.node(0))
300 300 return self._nodecache
301 301
302 302 def hasnode(self, node):
303 303 try:
304 304 self.rev(node)
305 305 return True
306 306 except KeyError:
307 307 return False
308 308
309 309 def clearcaches(self):
310 310 try:
311 311 self._nodecache.clearcaches()
312 312 except AttributeError:
313 313 self._nodecache = {nullid: nullrev}
314 314 self._nodepos = None
315 315
316 316 def rev(self, node):
317 317 try:
318 318 return self._nodecache[node]
319 319 except TypeError:
320 320 raise
321 321 except RevlogError:
322 322 # parsers.c radix tree lookup failed
323 323 raise LookupError(node, self.indexfile, _('no node'))
324 324 except KeyError:
325 325 # pure python cache lookup failed
326 326 n = self._nodecache
327 327 i = self.index
328 328 p = self._nodepos
329 329 if p is None:
330 330 p = len(i) - 2
331 331 for r in xrange(p, -1, -1):
332 332 v = i[r][7]
333 333 n[v] = r
334 334 if v == node:
335 335 self._nodepos = r - 1
336 336 return r
337 337 raise LookupError(node, self.indexfile, _('no node'))
338 338
339 339 def node(self, rev):
340 340 return self.index[rev][7]
341 341 def linkrev(self, rev):
342 342 return self.index[rev][4]
343 343 def parents(self, node):
344 344 i = self.index
345 345 d = i[self.rev(node)]
346 346 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
347 347 def parentrevs(self, rev):
348 348 return self.index[rev][5:7]
349 349 def start(self, rev):
350 350 return int(self.index[rev][0] >> 16)
351 351 def end(self, rev):
352 352 return self.start(rev) + self.length(rev)
353 353 def length(self, rev):
354 354 return self.index[rev][1]
355 355 def chainbase(self, rev):
356 356 index = self.index
357 357 base = index[rev][3]
358 358 while base != rev:
359 359 rev = base
360 360 base = index[rev][3]
361 361 return base
362 362 def chainlen(self, rev):
363 363 return self._chaininfo(rev)[0]
364 364
365 365 def _chaininfo(self, rev):
366 366 chaininfocache = self._chaininfocache
367 367 if rev in chaininfocache:
368 368 return chaininfocache[rev]
369 369 index = self.index
370 370 generaldelta = self._generaldelta
371 371 iterrev = rev
372 372 e = index[iterrev]
373 373 clen = 0
374 374 compresseddeltalen = 0
375 375 while iterrev != e[3]:
376 376 clen += 1
377 377 compresseddeltalen += e[1]
378 378 if generaldelta:
379 379 iterrev = e[3]
380 380 else:
381 381 iterrev -= 1
382 382 if iterrev in chaininfocache:
383 383 t = chaininfocache[iterrev]
384 384 clen += t[0]
385 385 compresseddeltalen += t[1]
386 386 break
387 387 e = index[iterrev]
388 388 else:
389 389 # Add text length of base since decompressing that also takes
390 390 # work. For cache hits the length is already included.
391 391 compresseddeltalen += e[1]
392 392 r = (clen, compresseddeltalen)
393 393 chaininfocache[rev] = r
394 394 return r
395 395
396 396 def flags(self, rev):
397 397 return self.index[rev][0] & 0xFFFF
398 398 def rawsize(self, rev):
399 399 """return the length of the uncompressed text for a given revision"""
400 400 l = self.index[rev][2]
401 401 if l >= 0:
402 402 return l
403 403
404 404 t = self.revision(self.node(rev))
405 405 return len(t)
406 406 size = rawsize
407 407
408 408 def ancestors(self, revs, stoprev=0, inclusive=False):
409 409 """Generate the ancestors of 'revs' in reverse topological order.
410 410 Does not generate revs lower than stoprev.
411 411
412 412 See the documentation for ancestor.lazyancestors for more details."""
413 413
414 414 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
415 415 inclusive=inclusive)
416 416
417 417 def descendants(self, revs):
418 418 """Generate the descendants of 'revs' in revision order.
419 419
420 420 Yield a sequence of revision numbers starting with a child of
421 421 some rev in revs, i.e., each revision is *not* considered a
422 422 descendant of itself. Results are ordered by revision number (a
423 423 topological sort)."""
424 424 first = min(revs)
425 425 if first == nullrev:
426 426 for i in self:
427 427 yield i
428 428 return
429 429
430 430 seen = set(revs)
431 431 for i in self.revs(start=first + 1):
432 432 for x in self.parentrevs(i):
433 433 if x != nullrev and x in seen:
434 434 seen.add(i)
435 435 yield i
436 436 break
437 437
438 438 def findcommonmissing(self, common=None, heads=None):
439 439 """Return a tuple of the ancestors of common and the ancestors of heads
440 440 that are not ancestors of common. In revset terminology, we return the
441 441 tuple:
442 442
443 443 ::common, (::heads) - (::common)
444 444
445 445 The list is sorted by revision number, meaning it is
446 446 topologically sorted.
447 447
448 448 'heads' and 'common' are both lists of node IDs. If heads is
449 449 not supplied, uses all of the revlog's heads. If common is not
450 450 supplied, uses nullid."""
451 451 if common is None:
452 452 common = [nullid]
453 453 if heads is None:
454 454 heads = self.heads()
455 455
456 456 common = [self.rev(n) for n in common]
457 457 heads = [self.rev(n) for n in heads]
458 458
459 459 # we want the ancestors, but inclusive
460 460 class lazyset(object):
461 461 def __init__(self, lazyvalues):
462 462 self.addedvalues = set()
463 463 self.lazyvalues = lazyvalues
464 464
465 465 def __contains__(self, value):
466 466 return value in self.addedvalues or value in self.lazyvalues
467 467
468 468 def __iter__(self):
469 469 added = self.addedvalues
470 470 for r in added:
471 471 yield r
472 472 for r in self.lazyvalues:
473 473 if not r in added:
474 474 yield r
475 475
476 476 def add(self, value):
477 477 self.addedvalues.add(value)
478 478
479 479 def update(self, values):
480 480 self.addedvalues.update(values)
481 481
482 482 has = lazyset(self.ancestors(common))
483 483 has.add(nullrev)
484 484 has.update(common)
485 485
486 486 # take all ancestors from heads that aren't in has
487 487 missing = set()
488 488 visit = util.deque(r for r in heads if r not in has)
489 489 while visit:
490 490 r = visit.popleft()
491 491 if r in missing:
492 492 continue
493 493 else:
494 494 missing.add(r)
495 495 for p in self.parentrevs(r):
496 496 if p not in has:
497 497 visit.append(p)
498 498 missing = list(missing)
499 499 missing.sort()
500 500 return has, [self.node(r) for r in missing]
501 501
502 502 def incrementalmissingrevs(self, common=None):
503 503 """Return an object that can be used to incrementally compute the
504 504 revision numbers of the ancestors of arbitrary sets that are not
505 505 ancestors of common. This is an ancestor.incrementalmissingancestors
506 506 object.
507 507
508 508 'common' is a list of revision numbers. If common is not supplied, uses
509 509 nullrev.
510 510 """
511 511 if common is None:
512 512 common = [nullrev]
513 513
514 514 return ancestor.incrementalmissingancestors(self.parentrevs, common)
515 515
516 516 def findmissingrevs(self, common=None, heads=None):
517 517 """Return the revision numbers of the ancestors of heads that
518 518 are not ancestors of common.
519 519
520 520 More specifically, return a list of revision numbers corresponding to
521 521 nodes N such that every N satisfies the following constraints:
522 522
523 523 1. N is an ancestor of some node in 'heads'
524 524 2. N is not an ancestor of any node in 'common'
525 525
526 526 The list is sorted by revision number, meaning it is
527 527 topologically sorted.
528 528
529 529 'heads' and 'common' are both lists of revision numbers. If heads is
530 530 not supplied, uses all of the revlog's heads. If common is not
531 531 supplied, uses nullid."""
532 532 if common is None:
533 533 common = [nullrev]
534 534 if heads is None:
535 535 heads = self.headrevs()
536 536
537 537 inc = self.incrementalmissingrevs(common=common)
538 538 return inc.missingancestors(heads)
539 539
540 540 def findmissing(self, common=None, heads=None):
541 541 """Return the ancestors of heads that are not ancestors of common.
542 542
543 543 More specifically, return a list of nodes N such that every N
544 544 satisfies the following constraints:
545 545
546 546 1. N is an ancestor of some node in 'heads'
547 547 2. N is not an ancestor of any node in 'common'
548 548
549 549 The list is sorted by revision number, meaning it is
550 550 topologically sorted.
551 551
552 552 'heads' and 'common' are both lists of node IDs. If heads is
553 553 not supplied, uses all of the revlog's heads. If common is not
554 554 supplied, uses nullid."""
555 555 if common is None:
556 556 common = [nullid]
557 557 if heads is None:
558 558 heads = self.heads()
559 559
560 560 common = [self.rev(n) for n in common]
561 561 heads = [self.rev(n) for n in heads]
562 562
563 563 inc = self.incrementalmissingrevs(common=common)
564 564 return [self.node(r) for r in inc.missingancestors(heads)]
565 565
566 566 def nodesbetween(self, roots=None, heads=None):
567 567 """Return a topological path from 'roots' to 'heads'.
568 568
569 569 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
570 570 topologically sorted list of all nodes N that satisfy both of
571 571 these constraints:
572 572
573 573 1. N is a descendant of some node in 'roots'
574 574 2. N is an ancestor of some node in 'heads'
575 575
576 576 Every node is considered to be both a descendant and an ancestor
577 577 of itself, so every reachable node in 'roots' and 'heads' will be
578 578 included in 'nodes'.
579 579
580 580 'outroots' is the list of reachable nodes in 'roots', i.e., the
581 581 subset of 'roots' that is returned in 'nodes'. Likewise,
582 582 'outheads' is the subset of 'heads' that is also in 'nodes'.
583 583
584 584 'roots' and 'heads' are both lists of node IDs. If 'roots' is
585 585 unspecified, uses nullid as the only root. If 'heads' is
586 586 unspecified, uses list of all of the revlog's heads."""
587 587 nonodes = ([], [], [])
588 588 if roots is not None:
589 589 roots = list(roots)
590 590 if not roots:
591 591 return nonodes
592 592 lowestrev = min([self.rev(n) for n in roots])
593 593 else:
594 594 roots = [nullid] # Everybody's a descendant of nullid
595 595 lowestrev = nullrev
596 596 if (lowestrev == nullrev) and (heads is None):
597 597 # We want _all_ the nodes!
598 598 return ([self.node(r) for r in self], [nullid], list(self.heads()))
599 599 if heads is None:
600 600 # All nodes are ancestors, so the latest ancestor is the last
601 601 # node.
602 602 highestrev = len(self) - 1
603 603 # Set ancestors to None to signal that every node is an ancestor.
604 604 ancestors = None
605 605 # Set heads to an empty dictionary for later discovery of heads
606 606 heads = {}
607 607 else:
608 608 heads = list(heads)
609 609 if not heads:
610 610 return nonodes
611 611 ancestors = set()
612 612 # Turn heads into a dictionary so we can remove 'fake' heads.
613 613 # Also, later we will be using it to filter out the heads we can't
614 614 # find from roots.
615 615 heads = dict.fromkeys(heads, False)
616 616 # Start at the top and keep marking parents until we're done.
617 617 nodestotag = set(heads)
618 618 # Remember where the top was so we can use it as a limit later.
619 619 highestrev = max([self.rev(n) for n in nodestotag])
620 620 while nodestotag:
621 621 # grab a node to tag
622 622 n = nodestotag.pop()
623 623 # Never tag nullid
624 624 if n == nullid:
625 625 continue
626 626 # A node's revision number represents its place in a
627 627 # topologically sorted list of nodes.
628 628 r = self.rev(n)
629 629 if r >= lowestrev:
630 630 if n not in ancestors:
631 631 # If we are possibly a descendant of one of the roots
632 632 # and we haven't already been marked as an ancestor
633 633 ancestors.add(n) # Mark as ancestor
634 634 # Add non-nullid parents to list of nodes to tag.
635 635 nodestotag.update([p for p in self.parents(n) if
636 636 p != nullid])
637 637 elif n in heads: # We've seen it before, is it a fake head?
638 638 # So it is, real heads should not be the ancestors of
639 639 # any other heads.
640 640 heads.pop(n)
641 641 if not ancestors:
642 642 return nonodes
643 643 # Now that we have our set of ancestors, we want to remove any
644 644 # roots that are not ancestors.
645 645
646 646 # If one of the roots was nullid, everything is included anyway.
647 647 if lowestrev > nullrev:
648 648 # But, since we weren't, let's recompute the lowest rev to not
649 649 # include roots that aren't ancestors.
650 650
651 651 # Filter out roots that aren't ancestors of heads
652 652 roots = [n for n in roots if n in ancestors]
653 653 # Recompute the lowest revision
654 654 if roots:
655 655 lowestrev = min([self.rev(n) for n in roots])
656 656 else:
657 657 # No more roots? Return empty list
658 658 return nonodes
659 659 else:
660 660 # We are descending from nullid, and don't need to care about
661 661 # any other roots.
662 662 lowestrev = nullrev
663 663 roots = [nullid]
664 664 # Transform our roots list into a set.
665 665 descendants = set(roots)
666 666 # Also, keep the original roots so we can filter out roots that aren't
667 667 # 'real' roots (i.e. are descended from other roots).
668 668 roots = descendants.copy()
669 669 # Our topologically sorted list of output nodes.
670 670 orderedout = []
671 671 # Don't start at nullid since we don't want nullid in our output list,
672 672 # and if nullid shows up in descendants, empty parents will look like
673 673 # they're descendants.
674 674 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
675 675 n = self.node(r)
676 676 isdescendant = False
677 677 if lowestrev == nullrev: # Everybody is a descendant of nullid
678 678 isdescendant = True
679 679 elif n in descendants:
680 680 # n is already a descendant
681 681 isdescendant = True
682 682 # This check only needs to be done here because all the roots
683 683 # will start being marked is descendants before the loop.
684 684 if n in roots:
685 685 # If n was a root, check if it's a 'real' root.
686 686 p = tuple(self.parents(n))
687 687 # If any of its parents are descendants, it's not a root.
688 688 if (p[0] in descendants) or (p[1] in descendants):
689 689 roots.remove(n)
690 690 else:
691 691 p = tuple(self.parents(n))
692 692 # A node is a descendant if either of its parents are
693 693 # descendants. (We seeded the dependents list with the roots
694 694 # up there, remember?)
695 695 if (p[0] in descendants) or (p[1] in descendants):
696 696 descendants.add(n)
697 697 isdescendant = True
698 698 if isdescendant and ((ancestors is None) or (n in ancestors)):
699 699 # Only include nodes that are both descendants and ancestors.
700 700 orderedout.append(n)
701 701 if (ancestors is not None) and (n in heads):
702 702 # We're trying to figure out which heads are reachable
703 703 # from roots.
704 704 # Mark this head as having been reached
705 705 heads[n] = True
706 706 elif ancestors is None:
707 707 # Otherwise, we're trying to discover the heads.
708 708 # Assume this is a head because if it isn't, the next step
709 709 # will eventually remove it.
710 710 heads[n] = True
711 711 # But, obviously its parents aren't.
712 712 for p in self.parents(n):
713 713 heads.pop(p, None)
714 714 heads = [n for n, flag in heads.iteritems() if flag]
715 715 roots = list(roots)
716 716 assert orderedout
717 717 assert roots
718 718 assert heads
719 719 return (orderedout, roots, heads)
720 720
721 721 def headrevs(self):
722 722 try:
723 723 return self.index.headrevs()
724 724 except AttributeError:
725 725 return self._headrevs()
726 726
727 727 def _headrevs(self):
728 728 count = len(self)
729 729 if not count:
730 730 return [nullrev]
731 731 # we won't iter over filtered rev so nobody is a head at start
732 732 ishead = [0] * (count + 1)
733 733 index = self.index
734 734 for r in self:
735 735 ishead[r] = 1 # I may be an head
736 736 e = index[r]
737 737 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
738 738 return [r for r, val in enumerate(ishead) if val]
739 739
740 740 def heads(self, start=None, stop=None):
741 741 """return the list of all nodes that have no children
742 742
743 743 if start is specified, only heads that are descendants of
744 744 start will be returned
745 745 if stop is specified, it will consider all the revs from stop
746 746 as if they had no children
747 747 """
748 748 if start is None and stop is None:
749 749 if not len(self):
750 750 return [nullid]
751 751 return [self.node(r) for r in self.headrevs()]
752 752
753 753 if start is None:
754 754 start = nullid
755 755 if stop is None:
756 756 stop = []
757 757 stoprevs = set([self.rev(n) for n in stop])
758 758 startrev = self.rev(start)
759 759 reachable = set((startrev,))
760 760 heads = set((startrev,))
761 761
762 762 parentrevs = self.parentrevs
763 763 for r in self.revs(start=startrev + 1):
764 764 for p in parentrevs(r):
765 765 if p in reachable:
766 766 if r not in stoprevs:
767 767 reachable.add(r)
768 768 heads.add(r)
769 769 if p in heads and p not in stoprevs:
770 770 heads.remove(p)
771 771
772 772 return [self.node(r) for r in heads]
773 773
774 774 def children(self, node):
775 775 """find the children of a given node"""
776 776 c = []
777 777 p = self.rev(node)
778 778 for r in self.revs(start=p + 1):
779 779 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
780 780 if prevs:
781 781 for pr in prevs:
782 782 if pr == p:
783 783 c.append(self.node(r))
784 784 elif p == nullrev:
785 785 c.append(self.node(r))
786 786 return c
787 787
788 788 def descendant(self, start, end):
789 789 if start == nullrev:
790 790 return True
791 791 for i in self.descendants([start]):
792 792 if i == end:
793 793 return True
794 794 elif i > end:
795 795 break
796 796 return False
797 797
798 798 def commonancestorsheads(self, a, b):
799 799 """calculate all the heads of the common ancestors of nodes a and b"""
800 800 a, b = self.rev(a), self.rev(b)
801 801 try:
802 802 ancs = self.index.commonancestorsheads(a, b)
803 803 except (AttributeError, OverflowError): # C implementation failed
804 804 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
805 805 return map(self.node, ancs)
806 806
807 807 def isancestor(self, a, b):
808 808 """return True if node a is an ancestor of node b
809 809
810 810 The implementation of this is trivial but the use of
811 811 commonancestorsheads is not."""
812 812 return a in self.commonancestorsheads(a, b)
813 813
814 814 def ancestor(self, a, b):
815 815 """calculate the "best" common ancestor of nodes a and b"""
816 816
817 817 a, b = self.rev(a), self.rev(b)
818 818 try:
819 819 ancs = self.index.ancestors(a, b)
820 820 except (AttributeError, OverflowError):
821 821 ancs = ancestor.ancestors(self.parentrevs, a, b)
822 822 if ancs:
823 823 # choose a consistent winner when there's a tie
824 824 return min(map(self.node, ancs))
825 825 return nullid
826 826
827 827 def _match(self, id):
828 828 if isinstance(id, int):
829 829 # rev
830 830 return self.node(id)
831 831 if len(id) == 20:
832 832 # possibly a binary node
833 833 # odds of a binary node being all hex in ASCII are 1 in 10**25
834 834 try:
835 835 node = id
836 836 self.rev(node) # quick search the index
837 837 return node
838 838 except LookupError:
839 839 pass # may be partial hex id
840 840 try:
841 841 # str(rev)
842 842 rev = int(id)
843 843 if str(rev) != id:
844 844 raise ValueError
845 845 if rev < 0:
846 846 rev = len(self) + rev
847 847 if rev < 0 or rev >= len(self):
848 848 raise ValueError
849 849 return self.node(rev)
850 850 except (ValueError, OverflowError):
851 851 pass
852 852 if len(id) == 40:
853 853 try:
854 854 # a full hex nodeid?
855 855 node = bin(id)
856 856 self.rev(node)
857 857 return node
858 858 except (TypeError, LookupError):
859 859 pass
860 860
861 861 def _partialmatch(self, id):
862 862 try:
863 863 n = self.index.partialmatch(id)
864 864 if n and self.hasnode(n):
865 865 return n
866 866 return None
867 867 except RevlogError:
868 868 # parsers.c radix tree lookup gave multiple matches
869 869 # fall through to slow path that filters hidden revisions
870 870 pass
871 871 except (AttributeError, ValueError):
872 872 # we are pure python, or key was too short to search radix tree
873 873 pass
874 874
875 875 if id in self._pcache:
876 876 return self._pcache[id]
877 877
878 878 if len(id) < 40:
879 879 try:
880 880 # hex(node)[:...]
881 881 l = len(id) // 2 # grab an even number of digits
882 882 prefix = bin(id[:l * 2])
883 883 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
884 884 nl = [n for n in nl if hex(n).startswith(id) and
885 885 self.hasnode(n)]
886 886 if len(nl) > 0:
887 887 if len(nl) == 1:
888 888 self._pcache[id] = nl[0]
889 889 return nl[0]
890 890 raise LookupError(id, self.indexfile,
891 891 _('ambiguous identifier'))
892 892 return None
893 893 except TypeError:
894 894 pass
895 895
896 896 def lookup(self, id):
897 897 """locate a node based on:
898 898 - revision number or str(revision number)
899 899 - nodeid or subset of hex nodeid
900 900 """
901 901 n = self._match(id)
902 902 if n is not None:
903 903 return n
904 904 n = self._partialmatch(id)
905 905 if n:
906 906 return n
907 907
908 908 raise LookupError(id, self.indexfile, _('no match found'))
909 909
910 910 def cmp(self, node, text):
911 911 """compare text with a given file revision
912 912
913 913 returns True if text is different than what is stored.
914 914 """
915 915 p1, p2 = self.parents(node)
916 916 return hash(text, p1, p2) != node
917 917
918 918 def _addchunk(self, offset, data):
919 919 o, d = self._chunkcache
920 920 # try to add to existing cache
921 921 if o + len(d) == offset and len(d) + len(data) < _chunksize:
922 922 self._chunkcache = o, d + data
923 923 else:
924 924 self._chunkcache = offset, data
925 925
926 926 def _loadchunk(self, offset, length):
927 927 if self._inline:
928 928 df = self.opener(self.indexfile)
929 929 else:
930 930 df = self.opener(self.datafile)
931 931
932 932 # Cache data both forward and backward around the requested
933 933 # data, in a fixed size window. This helps speed up operations
934 934 # involving reading the revlog backwards.
935 935 cachesize = self._chunkcachesize
936 936 realoffset = offset & ~(cachesize - 1)
937 937 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
938 938 - realoffset)
939 939 df.seek(realoffset)
940 940 d = df.read(reallength)
941 941 df.close()
942 942 self._addchunk(realoffset, d)
943 943 if offset != realoffset or reallength != length:
944 944 return util.buffer(d, offset - realoffset, length)
945 945 return d
946 946
947 947 def _getchunk(self, offset, length):
948 948 o, d = self._chunkcache
949 949 l = len(d)
950 950
951 951 # is it in the cache?
952 952 cachestart = offset - o
953 953 cacheend = cachestart + length
954 954 if cachestart >= 0 and cacheend <= l:
955 955 if cachestart == 0 and cacheend == l:
956 956 return d # avoid a copy
957 957 return util.buffer(d, cachestart, cacheend - cachestart)
958 958
959 959 return self._loadchunk(offset, length)
960 960
961 961 def _chunkraw(self, startrev, endrev):
962 962 start = self.start(startrev)
963 963 end = self.end(endrev)
964 964 if self._inline:
965 965 start += (startrev + 1) * self._io.size
966 966 end += (endrev + 1) * self._io.size
967 967 length = end - start
968 968 return self._getchunk(start, length)
969 969
970 970 def _chunk(self, rev):
971 971 return decompress(self._chunkraw(rev, rev))
972 972
973 973 def _chunks(self, revs):
974 974 '''faster version of [self._chunk(rev) for rev in revs]
975 975
976 976 Assumes that revs is in ascending order.'''
977 977 if not revs:
978 978 return []
979 979 start = self.start
980 980 length = self.length
981 981 inline = self._inline
982 982 iosize = self._io.size
983 983 buffer = util.buffer
984 984
985 985 l = []
986 986 ladd = l.append
987 987
988 988 # preload the cache
989 989 try:
990 990 while True:
991 991 # ensure that the cache doesn't change out from under us
992 992 _cache = self._chunkcache
993 993 self._chunkraw(revs[0], revs[-1])
994 994 if _cache == self._chunkcache:
995 995 break
996 996 offset, data = _cache
997 997 except OverflowError:
998 998 # issue4215 - we can't cache a run of chunks greater than
999 999 # 2G on Windows
1000 1000 return [self._chunk(rev) for rev in revs]
1001 1001
1002 1002 for rev in revs:
1003 1003 chunkstart = start(rev)
1004 1004 if inline:
1005 1005 chunkstart += (rev + 1) * iosize
1006 1006 chunklength = length(rev)
1007 1007 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1008 1008
1009 1009 return l
1010 1010
1011 1011 def _chunkclear(self):
1012 1012 self._chunkcache = (0, '')
1013 1013
1014 1014 def deltaparent(self, rev):
1015 1015 """return deltaparent of the given revision"""
1016 1016 base = self.index[rev][3]
1017 1017 if base == rev:
1018 1018 return nullrev
1019 1019 elif self._generaldelta:
1020 1020 return base
1021 1021 else:
1022 1022 return rev - 1
1023 1023
1024 1024 def revdiff(self, rev1, rev2):
1025 1025 """return or calculate a delta between two revisions"""
1026 1026 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1027 1027 return str(self._chunk(rev2))
1028 1028
1029 1029 return mdiff.textdiff(self.revision(rev1),
1030 1030 self.revision(rev2))
1031 1031
1032 1032 def revision(self, nodeorrev):
1033 1033 """return an uncompressed revision of a given node or revision
1034 1034 number.
1035 1035 """
1036 1036 if isinstance(nodeorrev, int):
1037 1037 rev = nodeorrev
1038 1038 node = self.node(rev)
1039 1039 else:
1040 1040 node = nodeorrev
1041 1041 rev = None
1042 1042
1043 1043 _cache = self._cache # grab local copy of cache to avoid thread race
1044 1044 cachedrev = None
1045 1045 if node == nullid:
1046 1046 return ""
1047 1047 if _cache:
1048 1048 if _cache[0] == node:
1049 1049 return _cache[2]
1050 1050 cachedrev = _cache[1]
1051 1051
1052 1052 # look up what we need to read
1053 1053 text = None
1054 1054 if rev is None:
1055 1055 rev = self.rev(node)
1056 1056
1057 1057 # check rev flags
1058 1058 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1059 1059 raise RevlogError(_('incompatible revision flag %x') %
1060 1060 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1061 1061
1062 1062 # build delta chain
1063 1063 chain = []
1064 1064 index = self.index # for performance
1065 1065 generaldelta = self._generaldelta
1066 1066 iterrev = rev
1067 1067 e = index[iterrev]
1068 1068 while iterrev != e[3] and iterrev != cachedrev:
1069 1069 chain.append(iterrev)
1070 1070 if generaldelta:
1071 1071 iterrev = e[3]
1072 1072 else:
1073 1073 iterrev -= 1
1074 1074 e = index[iterrev]
1075 1075
1076 1076 if iterrev == cachedrev:
1077 1077 # cache hit
1078 1078 text = _cache[2]
1079 1079 else:
1080 1080 chain.append(iterrev)
1081 1081 chain.reverse()
1082 1082
1083 1083 # drop cache to save memory
1084 1084 self._cache = None
1085 1085
1086 1086 bins = self._chunks(chain)
1087 1087 if text is None:
1088 1088 text = str(bins[0])
1089 1089 bins = bins[1:]
1090 1090
1091 1091 text = mdiff.patches(text, bins)
1092 1092
1093 1093 text = self._checkhash(text, node, rev)
1094 1094
1095 1095 self._cache = (node, rev, text)
1096 1096 return text
1097 1097
1098 1098 def hash(self, text, p1, p2):
1099 1099 """Compute a node hash.
1100 1100
1101 1101 Available as a function so that subclasses can replace the hash
1102 1102 as needed.
1103 1103 """
1104 1104 return hash(text, p1, p2)
1105 1105
1106 1106 def _checkhash(self, text, node, rev):
1107 1107 p1, p2 = self.parents(node)
1108 1108 self.checkhash(text, p1, p2, node, rev)
1109 1109 return text
1110 1110
1111 1111 def checkhash(self, text, p1, p2, node, rev=None):
1112 1112 if node != self.hash(text, p1, p2):
1113 1113 revornode = rev
1114 1114 if revornode is None:
1115 1115 revornode = templatefilters.short(hex(node))
1116 1116 raise RevlogError(_("integrity check failed on %s:%s")
1117 1117 % (self.indexfile, revornode))
1118 1118
1119 1119 def checkinlinesize(self, tr, fp=None):
1120 1120 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1121 1121 return
1122 1122
1123 1123 trinfo = tr.find(self.indexfile)
1124 1124 if trinfo is None:
1125 1125 raise RevlogError(_("%s not found in the transaction")
1126 1126 % self.indexfile)
1127 1127
1128 1128 trindex = trinfo[2]
1129 1129 dataoff = self.start(trindex)
1130 1130
1131 1131 tr.add(self.datafile, dataoff)
1132 1132
1133 1133 if fp:
1134 1134 fp.flush()
1135 1135 fp.close()
1136 1136
1137 1137 df = self.opener(self.datafile, 'w')
1138 1138 try:
1139 1139 for r in self:
1140 1140 df.write(self._chunkraw(r, r))
1141 1141 finally:
1142 1142 df.close()
1143 1143
1144 1144 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1145 1145 self.version &= ~(REVLOGNGINLINEDATA)
1146 1146 self._inline = False
1147 1147 for i in self:
1148 1148 e = self._io.packentry(self.index[i], self.node, self.version, i)
1149 1149 fp.write(e)
1150 1150
1151 1151 # if we don't call close, the temp file will never replace the
1152 1152 # real index
1153 1153 fp.close()
1154 1154
1155 1155 tr.replace(self.indexfile, trindex * self._io.size)
1156 1156 self._chunkclear()
1157 1157
1158 1158 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1159 1159 node=None):
1160 1160 """add a revision to the log
1161 1161
1162 1162 text - the revision data to add
1163 1163 transaction - the transaction object used for rollback
1164 1164 link - the linkrev data to add
1165 1165 p1, p2 - the parent nodeids of the revision
1166 1166 cachedelta - an optional precomputed delta
1167 1167 node - nodeid of revision; typically node is not specified, and it is
1168 1168 computed by default as hash(text, p1, p2), however subclasses might
1169 1169 use different hashing method (and override checkhash() in such case)
1170 1170 """
1171 1171 if link == nullrev:
1172 1172 raise RevlogError(_("attempted to add linkrev -1 to %s")
1173 1173 % self.indexfile)
1174 1174 node = node or self.hash(text, p1, p2)
1175 1175 if node in self.nodemap:
1176 1176 return node
1177 1177
1178 1178 dfh = None
1179 1179 if not self._inline:
1180 1180 dfh = self.opener(self.datafile, "a")
1181 1181 ifh = self.opener(self.indexfile, "a+")
1182 1182 try:
1183 1183 return self._addrevision(node, text, transaction, link, p1, p2,
1184 1184 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1185 1185 finally:
1186 1186 if dfh:
1187 1187 dfh.close()
1188 1188 ifh.close()
1189 1189
1190 1190 def compress(self, text):
1191 1191 """ generate a possibly-compressed representation of text """
1192 1192 if not text:
1193 1193 return ("", text)
1194 1194 l = len(text)
1195 1195 bin = None
1196 1196 if l < 44:
1197 1197 pass
1198 1198 elif l > 1000000:
1199 1199 # zlib makes an internal copy, thus doubling memory usage for
1200 1200 # large files, so lets do this in pieces
1201 1201 z = zlib.compressobj()
1202 1202 p = []
1203 1203 pos = 0
1204 1204 while pos < l:
1205 1205 pos2 = pos + 2**20
1206 1206 p.append(z.compress(text[pos:pos2]))
1207 1207 pos = pos2
1208 1208 p.append(z.flush())
1209 1209 if sum(map(len, p)) < l:
1210 1210 bin = "".join(p)
1211 1211 else:
1212 1212 bin = _compress(text)
1213 1213 if bin is None or len(bin) > l:
1214 1214 if text[0] == '\0':
1215 1215 return ("", text)
1216 1216 return ('u', text)
1217 1217 return ("", bin)
1218 1218
1219 1219 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1220 1220 cachedelta, ifh, dfh):
1221 1221 """internal function to add revisions to the log
1222 1222
1223 1223 see addrevision for argument descriptions.
1224 1224 invariants:
1225 1225 - text is optional (can be None); if not set, cachedelta must be set.
1226 1226 if both are set, they must correspond to each other.
1227 1227 """
1228 1228 btext = [text]
1229 1229 def buildtext():
1230 1230 if btext[0] is not None:
1231 1231 return btext[0]
1232 1232 # flush any pending writes here so we can read it in revision
1233 1233 if dfh:
1234 1234 dfh.flush()
1235 1235 ifh.flush()
1236 1236 basetext = self.revision(self.node(cachedelta[0]))
1237 1237 btext[0] = mdiff.patch(basetext, cachedelta[1])
1238 1238 try:
1239 1239 self.checkhash(btext[0], p1, p2, node)
1240 1240 if flags & REVIDX_ISCENSORED:
1241 1241 raise RevlogError(_('node %s is not censored') % node)
1242 1242 except CensoredNodeError:
1243 1243 # must pass the censored index flag to add censored revisions
1244 1244 if not flags & REVIDX_ISCENSORED:
1245 1245 raise
1246 1246 return btext[0]
1247 1247
1248 1248 def builddelta(rev):
1249 1249 # can we use the cached delta?
1250 1250 if cachedelta and cachedelta[0] == rev:
1251 1251 delta = cachedelta[1]
1252 1252 else:
1253 1253 t = buildtext()
1254 1254 ptext = self.revision(self.node(rev))
1255 1255 delta = mdiff.textdiff(ptext, t)
1256 1256 data = self.compress(delta)
1257 1257 l = len(data[1]) + len(data[0])
1258 1258 if basecache[0] == rev:
1259 1259 chainbase = basecache[1]
1260 1260 else:
1261 1261 chainbase = self.chainbase(rev)
1262 1262 dist = l + offset - self.start(chainbase)
1263 1263 if self._generaldelta:
1264 1264 base = rev
1265 1265 else:
1266 1266 base = chainbase
1267 1267 chainlen, compresseddeltalen = self._chaininfo(rev)
1268 1268 chainlen += 1
1269 1269 compresseddeltalen += l
1270 1270 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1271 1271
1272 1272 curr = len(self)
1273 1273 prev = curr - 1
1274 1274 base = chainbase = curr
1275 1275 chainlen = None
1276 1276 offset = self.end(prev)
1277 1277 d = None
1278 1278 if self._basecache is None:
1279 1279 self._basecache = (prev, self.chainbase(prev))
1280 1280 basecache = self._basecache
1281 1281 p1r, p2r = self.rev(p1), self.rev(p2)
1282 1282
1283 1283 # should we try to build a delta?
1284 1284 if prev != nullrev:
1285 1285 if self._generaldelta:
1286 1286 if p1r >= basecache[1]:
1287 1287 d = builddelta(p1r)
1288 1288 elif p2r >= basecache[1]:
1289 1289 d = builddelta(p2r)
1290 1290 else:
1291 1291 d = builddelta(prev)
1292 1292 else:
1293 1293 d = builddelta(prev)
1294 1294 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1295 1295
1296 1296 # full versions are inserted when the needed deltas
1297 1297 # become comparable to the uncompressed text
1298 1298 if text is None:
1299 1299 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1300 1300 cachedelta[1])
1301 1301 else:
1302 1302 textlen = len(text)
1303 1303
1304 1304 # - 'dist' is the distance from the base revision -- bounding it limits
1305 1305 # the amount of I/O we need to do.
1306 1306 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1307 1307 # to apply -- bounding it limits the amount of CPU we consume.
1308 1308 if (d is None or dist > textlen * 4 or l > textlen or
1309 1309 compresseddeltalen > textlen * 2 or
1310 1310 (self._maxchainlen and chainlen > self._maxchainlen)):
1311 1311 text = buildtext()
1312 1312 data = self.compress(text)
1313 1313 l = len(data[1]) + len(data[0])
1314 1314 base = chainbase = curr
1315 1315
1316 1316 e = (offset_type(offset, flags), l, textlen,
1317 1317 base, link, p1r, p2r, node)
1318 1318 self.index.insert(-1, e)
1319 1319 self.nodemap[node] = curr
1320 1320
1321 1321 entry = self._io.packentry(e, self.node, self.version, curr)
1322 1322 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1323 1323
1324 1324 if type(text) == str: # only accept immutable objects
1325 1325 self._cache = (node, curr, text)
1326 1326 self._basecache = (curr, chainbase)
1327 1327 return node
1328 1328
1329 1329 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1330 1330 curr = len(self) - 1
1331 1331 if not self._inline:
1332 1332 transaction.add(self.datafile, offset)
1333 1333 transaction.add(self.indexfile, curr * len(entry))
1334 1334 if data[0]:
1335 1335 dfh.write(data[0])
1336 1336 dfh.write(data[1])
1337 1337 dfh.flush()
1338 1338 ifh.write(entry)
1339 1339 else:
1340 1340 offset += curr * self._io.size
1341 1341 transaction.add(self.indexfile, offset, curr)
1342 1342 ifh.write(entry)
1343 1343 ifh.write(data[0])
1344 1344 ifh.write(data[1])
1345 1345 self.checkinlinesize(transaction, ifh)
1346 1346
1347 1347 def addgroup(self, bundle, linkmapper, transaction):
1348 1348 """
1349 1349 add a delta group
1350 1350
1351 1351 given a set of deltas, add them to the revision log. the
1352 1352 first delta is against its parent, which should be in our
1353 1353 log, the rest are against the previous delta.
1354 1354 """
1355 1355
1356 1356 # track the base of the current delta log
1357 1357 content = []
1358 1358 node = None
1359 1359
1360 1360 r = len(self)
1361 1361 end = 0
1362 1362 if r:
1363 1363 end = self.end(r - 1)
1364 1364 ifh = self.opener(self.indexfile, "a+")
1365 1365 isize = r * self._io.size
1366 1366 if self._inline:
1367 1367 transaction.add(self.indexfile, end + isize, r)
1368 1368 dfh = None
1369 1369 else:
1370 1370 transaction.add(self.indexfile, isize, r)
1371 1371 transaction.add(self.datafile, end)
1372 1372 dfh = self.opener(self.datafile, "a")
1373 1373
1374 1374 try:
1375 1375 # loop through our set of deltas
1376 1376 chain = None
1377 1377 while True:
1378 1378 chunkdata = bundle.deltachunk(chain)
1379 1379 if not chunkdata:
1380 1380 break
1381 1381 node = chunkdata['node']
1382 1382 p1 = chunkdata['p1']
1383 1383 p2 = chunkdata['p2']
1384 1384 cs = chunkdata['cs']
1385 1385 deltabase = chunkdata['deltabase']
1386 1386 delta = chunkdata['delta']
1387 1387
1388 1388 content.append(node)
1389 1389
1390 1390 link = linkmapper(cs)
1391 1391 if node in self.nodemap:
1392 1392 # this can happen if two branches make the same change
1393 1393 chain = node
1394 1394 continue
1395 1395
1396 1396 for p in (p1, p2):
1397 1397 if p not in self.nodemap:
1398 1398 raise LookupError(p, self.indexfile,
1399 1399 _('unknown parent'))
1400 1400
1401 1401 if deltabase not in self.nodemap:
1402 1402 raise LookupError(deltabase, self.indexfile,
1403 1403 _('unknown delta base'))
1404 1404
1405 1405 baserev = self.rev(deltabase)
1406 1406 chain = self._addrevision(node, None, transaction, link,
1407 1407 p1, p2, REVIDX_DEFAULT_FLAGS,
1408 1408 (baserev, delta), ifh, dfh)
1409 1409 if not dfh and not self._inline:
1410 1410 # addrevision switched from inline to conventional
1411 1411 # reopen the index
1412 1412 ifh.close()
1413 1413 dfh = self.opener(self.datafile, "a")
1414 1414 ifh = self.opener(self.indexfile, "a")
1415 1415 finally:
1416 1416 if dfh:
1417 1417 dfh.close()
1418 1418 ifh.close()
1419 1419
1420 1420 return content
1421 1421
1422 def iscensored(self, rev):
1423 """Check if a file revision is censored."""
1424 return False
1425
1422 1426 def getstrippoint(self, minlink):
1423 1427 """find the minimum rev that must be stripped to strip the linkrev
1424 1428
1425 1429 Returns a tuple containing the minimum rev and a set of all revs that
1426 1430 have linkrevs that will be broken by this strip.
1427 1431 """
1428 1432 brokenrevs = set()
1429 1433 strippoint = len(self)
1430 1434
1431 1435 heads = {}
1432 1436 futurelargelinkrevs = set()
1433 1437 for head in self.headrevs():
1434 1438 headlinkrev = self.linkrev(head)
1435 1439 heads[head] = headlinkrev
1436 1440 if headlinkrev >= minlink:
1437 1441 futurelargelinkrevs.add(headlinkrev)
1438 1442
1439 1443 # This algorithm involves walking down the rev graph, starting at the
1440 1444 # heads. Since the revs are topologically sorted according to linkrev,
1441 1445 # once all head linkrevs are below the minlink, we know there are
1442 1446 # no more revs that could have a linkrev greater than minlink.
1443 1447 # So we can stop walking.
1444 1448 while futurelargelinkrevs:
1445 1449 strippoint -= 1
1446 1450 linkrev = heads.pop(strippoint)
1447 1451
1448 1452 if linkrev < minlink:
1449 1453 brokenrevs.add(strippoint)
1450 1454 else:
1451 1455 futurelargelinkrevs.remove(linkrev)
1452 1456
1453 1457 for p in self.parentrevs(strippoint):
1454 1458 if p != nullrev:
1455 1459 plinkrev = self.linkrev(p)
1456 1460 heads[p] = plinkrev
1457 1461 if plinkrev >= minlink:
1458 1462 futurelargelinkrevs.add(plinkrev)
1459 1463
1460 1464 return strippoint, brokenrevs
1461 1465
1462 1466 def strip(self, minlink, transaction):
1463 1467 """truncate the revlog on the first revision with a linkrev >= minlink
1464 1468
1465 1469 This function is called when we're stripping revision minlink and
1466 1470 its descendants from the repository.
1467 1471
1468 1472 We have to remove all revisions with linkrev >= minlink, because
1469 1473 the equivalent changelog revisions will be renumbered after the
1470 1474 strip.
1471 1475
1472 1476 So we truncate the revlog on the first of these revisions, and
1473 1477 trust that the caller has saved the revisions that shouldn't be
1474 1478 removed and that it'll re-add them after this truncation.
1475 1479 """
1476 1480 if len(self) == 0:
1477 1481 return
1478 1482
1479 1483 rev, _ = self.getstrippoint(minlink)
1480 1484 if rev == len(self):
1481 1485 return
1482 1486
1483 1487 # first truncate the files on disk
1484 1488 end = self.start(rev)
1485 1489 if not self._inline:
1486 1490 transaction.add(self.datafile, end)
1487 1491 end = rev * self._io.size
1488 1492 else:
1489 1493 end += rev * self._io.size
1490 1494
1491 1495 transaction.add(self.indexfile, end)
1492 1496
1493 1497 # then reset internal state in memory to forget those revisions
1494 1498 self._cache = None
1495 1499 self._chaininfocache = {}
1496 1500 self._chunkclear()
1497 1501 for x in xrange(rev, len(self)):
1498 1502 del self.nodemap[self.node(x)]
1499 1503
1500 1504 del self.index[rev:-1]
1501 1505
1502 1506 def checksize(self):
1503 1507 expected = 0
1504 1508 if len(self):
1505 1509 expected = max(0, self.end(len(self) - 1))
1506 1510
1507 1511 try:
1508 1512 f = self.opener(self.datafile)
1509 1513 f.seek(0, 2)
1510 1514 actual = f.tell()
1511 1515 f.close()
1512 1516 dd = actual - expected
1513 1517 except IOError, inst:
1514 1518 if inst.errno != errno.ENOENT:
1515 1519 raise
1516 1520 dd = 0
1517 1521
1518 1522 try:
1519 1523 f = self.opener(self.indexfile)
1520 1524 f.seek(0, 2)
1521 1525 actual = f.tell()
1522 1526 f.close()
1523 1527 s = self._io.size
1524 1528 i = max(0, actual // s)
1525 1529 di = actual - (i * s)
1526 1530 if self._inline:
1527 1531 databytes = 0
1528 1532 for r in self:
1529 1533 databytes += max(0, self.length(r))
1530 1534 dd = 0
1531 1535 di = actual - len(self) * s - databytes
1532 1536 except IOError, inst:
1533 1537 if inst.errno != errno.ENOENT:
1534 1538 raise
1535 1539 di = 0
1536 1540
1537 1541 return (dd, di)
1538 1542
1539 1543 def files(self):
1540 1544 res = [self.indexfile]
1541 1545 if not self._inline:
1542 1546 res.append(self.datafile)
1543 1547 return res
@@ -1,236 +1,242 b''
1 1 # unionrepo.py - repository class for viewing union of repository changesets
2 2 #
3 3 # Derived from bundlerepo.py
4 4 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
5 5 # Copyright 2013 Unity Technologies, Mads Kiilerich <madski@unity3d.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Repository class for "in-memory pull" of one local repository to another,
11 11 allowing operations like diff and log with revsets.
12 12 """
13 13
14 14 from node import nullid
15 15 from i18n import _
16 16 import os
17 17 import util, mdiff, cmdutil, scmutil
18 18 import localrepo, changelog, manifest, filelog, revlog
19 19
20 20 class unionrevlog(revlog.revlog):
21 21 def __init__(self, opener, indexfile, revlog2, linkmapper):
22 22 # How it works:
23 23 # To retrieve a revision, we just need to know the node id so we can
24 24 # look it up in revlog2.
25 25 #
26 26 # To differentiate a rev in the second revlog from a rev in the revlog,
27 27 # we check revision against repotiprev.
28 28 opener = scmutil.readonlyvfs(opener)
29 29 revlog.revlog.__init__(self, opener, indexfile)
30 30 self.revlog2 = revlog2
31 31
32 32 n = len(self)
33 33 self.repotiprev = n - 1
34 34 self.bundlerevs = set() # used by 'bundle()' revset expression
35 35 for rev2 in self.revlog2:
36 36 rev = self.revlog2.index[rev2]
37 37 # rev numbers - in revlog2, very different from self.rev
38 38 _start, _csize, _rsize, _base, linkrev, p1rev, p2rev, node = rev
39 39
40 40 if linkmapper is None: # link is to same revlog
41 41 assert linkrev == rev2 # we never link back
42 42 link = n
43 43 else: # rev must be mapped from repo2 cl to unified cl by linkmapper
44 44 link = linkmapper(linkrev)
45 45
46 46 if node in self.nodemap:
47 47 # this happens for the common revlog revisions
48 48 self.bundlerevs.add(self.nodemap[node])
49 49 continue
50 50
51 51 p1node = self.revlog2.node(p1rev)
52 52 p2node = self.revlog2.node(p2rev)
53 53
54 54 e = (None, None, None, None,
55 55 link, self.rev(p1node), self.rev(p2node), node)
56 56 self.index.insert(-1, e)
57 57 self.nodemap[node] = n
58 58 self.bundlerevs.add(n)
59 59 n += 1
60 60
61 61 def _chunk(self, rev):
62 62 if rev <= self.repotiprev:
63 63 return revlog.revlog._chunk(self, rev)
64 64 return self.revlog2._chunk(self.node(rev))
65 65
66 66 def revdiff(self, rev1, rev2):
67 67 """return or calculate a delta between two revisions"""
68 68 if rev1 > self.repotiprev and rev2 > self.repotiprev:
69 69 return self.revlog2.revdiff(
70 70 self.revlog2.rev(self.node(rev1)),
71 71 self.revlog2.rev(self.node(rev2)))
72 72 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
73 73 return self.baserevdiff(rev1, rev2)
74 74
75 75 return mdiff.textdiff(self.revision(self.node(rev1)),
76 76 self.revision(self.node(rev2)))
77 77
78 78 def revision(self, nodeorrev):
79 79 """return an uncompressed revision of a given node or revision
80 80 number.
81 81 """
82 82 if isinstance(nodeorrev, int):
83 83 rev = nodeorrev
84 84 node = self.node(rev)
85 85 else:
86 86 node = nodeorrev
87 87 rev = self.rev(node)
88 88
89 89 if node == nullid:
90 90 return ""
91 91
92 92 if rev > self.repotiprev:
93 93 text = self.revlog2.revision(node)
94 94 self._cache = (node, rev, text)
95 95 else:
96 96 text = self.baserevision(rev)
97 97 # already cached
98 98 return text
99 99
100 100 def baserevision(self, nodeorrev):
101 101 # Revlog subclasses may override 'revision' method to modify format of
102 102 # content retrieved from revlog. To use unionrevlog with such class one
103 103 # needs to override 'baserevision' and make more specific call here.
104 104 return revlog.revlog.revision(self, nodeorrev)
105 105
106 106 def baserevdiff(self, rev1, rev2):
107 107 # Exists for the same purpose as baserevision.
108 108 return revlog.revlog.revdiff(self, rev1, rev2)
109 109
110 110 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
111 111 raise NotImplementedError
112 112 def addgroup(self, revs, linkmapper, transaction):
113 113 raise NotImplementedError
114 114 def strip(self, rev, minlink):
115 115 raise NotImplementedError
116 116 def checksize(self):
117 117 raise NotImplementedError
118 118
119 119 class unionchangelog(unionrevlog, changelog.changelog):
120 120 def __init__(self, opener, opener2):
121 121 changelog.changelog.__init__(self, opener)
122 122 linkmapper = None
123 123 changelog2 = changelog.changelog(opener2)
124 124 unionrevlog.__init__(self, opener, self.indexfile, changelog2,
125 125 linkmapper)
126 126
127 127 def baserevision(self, nodeorrev):
128 128 # Although changelog doesn't override 'revision' method, some extensions
129 129 # may replace this class with another that does. Same story with
130 130 # manifest and filelog classes.
131 131 return changelog.changelog.revision(self, nodeorrev)
132 132
133 133 def baserevdiff(self, rev1, rev2):
134 134 return changelog.changelog.revdiff(self, rev1, rev2)
135 135
136 136 class unionmanifest(unionrevlog, manifest.manifest):
137 137 def __init__(self, opener, opener2, linkmapper):
138 138 manifest.manifest.__init__(self, opener)
139 139 manifest2 = manifest.manifest(opener2)
140 140 unionrevlog.__init__(self, opener, self.indexfile, manifest2,
141 141 linkmapper)
142 142
143 143 def baserevision(self, nodeorrev):
144 144 return manifest.manifest.revision(self, nodeorrev)
145 145
146 146 def baserevdiff(self, rev1, rev2):
147 147 return manifest.manifest.revdiff(self, rev1, rev2)
148 148
149 149 class unionfilelog(unionrevlog, filelog.filelog):
150 150 def __init__(self, opener, path, opener2, linkmapper, repo):
151 151 filelog.filelog.__init__(self, opener, path)
152 152 filelog2 = filelog.filelog(opener2, path)
153 153 unionrevlog.__init__(self, opener, self.indexfile, filelog2,
154 154 linkmapper)
155 155 self._repo = repo
156 156
157 157 def baserevision(self, nodeorrev):
158 158 return filelog.filelog.revision(self, nodeorrev)
159 159
160 160 def baserevdiff(self, rev1, rev2):
161 161 return filelog.filelog.revdiff(self, rev1, rev2)
162 162
163 def iscensored(self, rev):
164 """Check if a revision is censored."""
165 if rev <= self.repotiprev:
166 return filelog.filelog.iscensored(self, rev)
167 return self.revlog2.iscensored(rev)
168
163 169 class unionpeer(localrepo.localpeer):
164 170 def canpush(self):
165 171 return False
166 172
167 173 class unionrepository(localrepo.localrepository):
168 174 def __init__(self, ui, path, path2):
169 175 localrepo.localrepository.__init__(self, ui, path)
170 176 self.ui.setconfig('phases', 'publish', False, 'unionrepo')
171 177
172 178 self._url = 'union:%s+%s' % (util.expandpath(path),
173 179 util.expandpath(path2))
174 180 self.repo2 = localrepo.localrepository(ui, path2)
175 181
176 182 @localrepo.unfilteredpropertycache
177 183 def changelog(self):
178 184 return unionchangelog(self.svfs, self.repo2.svfs)
179 185
180 186 def _clrev(self, rev2):
181 187 """map from repo2 changelog rev to temporary rev in self.changelog"""
182 188 node = self.repo2.changelog.node(rev2)
183 189 return self.changelog.rev(node)
184 190
185 191 @localrepo.unfilteredpropertycache
186 192 def manifest(self):
187 193 return unionmanifest(self.svfs, self.repo2.svfs,
188 194 self._clrev)
189 195
190 196 def url(self):
191 197 return self._url
192 198
193 199 def file(self, f):
194 200 return unionfilelog(self.svfs, f, self.repo2.svfs,
195 201 self._clrev, self)
196 202
197 203 def close(self):
198 204 self.repo2.close()
199 205
200 206 def cancopy(self):
201 207 return False
202 208
203 209 def peer(self):
204 210 return unionpeer(self)
205 211
206 212 def getcwd(self):
207 213 return os.getcwd() # always outside the repo
208 214
209 215 def instance(ui, path, create):
210 216 if create:
211 217 raise util.Abort(_('cannot create new union repository'))
212 218 parentpath = ui.config("bundle", "mainreporoot", "")
213 219 if not parentpath:
214 220 # try to find the correct path to the working directory repo
215 221 parentpath = cmdutil.findrepo(os.getcwd())
216 222 if parentpath is None:
217 223 parentpath = ''
218 224 if parentpath:
219 225 # Try to make the full path relative so we get a nice, short URL.
220 226 # In particular, we don't want temp dir names in test outputs.
221 227 cwd = os.getcwd()
222 228 if parentpath == cwd:
223 229 parentpath = ''
224 230 else:
225 231 cwd = os.path.join(cwd,'')
226 232 if parentpath.startswith(cwd):
227 233 parentpath = parentpath[len(cwd):]
228 234 if path.startswith('union:'):
229 235 s = path.split(":", 1)[1].split("+", 1)
230 236 if len(s) == 1:
231 237 repopath, repopath2 = parentpath, s[0]
232 238 else:
233 239 repopath, repopath2 = s
234 240 else:
235 241 repopath, repopath2 = parentpath, path
236 242 return unionrepository(ui, repopath, repopath2)
General Comments 0
You need to be logged in to leave comments. Login now