##// END OF EJS Templates
strip: add faster revlog strip computation...
Durham Goode -
r20074:5fc2ae1c default
parent child Browse files
Show More
@@ -1,184 +1,176
1 1 # repair.py - functions for repository repair for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Chris Mason <mason@suse.com>
4 4 # Copyright 2007 Matt Mackall
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from mercurial import changegroup
10 10 from mercurial.node import short
11 11 from mercurial.i18n import _
12 12 import os
13 13 import errno
14 14
15 15 def _bundle(repo, bases, heads, node, suffix, compress=True):
16 16 """create a bundle with the specified revisions as a backup"""
17 17 cg = repo.changegroupsubset(bases, heads, 'strip')
18 18 backupdir = repo.join("strip-backup")
19 19 if not os.path.isdir(backupdir):
20 20 os.mkdir(backupdir)
21 21 name = os.path.join(backupdir, "%s-%s.hg" % (short(node), suffix))
22 22 if compress:
23 23 bundletype = "HG10BZ"
24 24 else:
25 25 bundletype = "HG10UN"
26 26 return changegroup.writebundle(cg, name, bundletype)
27 27
28 28 def _collectfiles(repo, striprev):
29 29 """find out the filelogs affected by the strip"""
30 30 files = set()
31 31
32 32 for x in xrange(striprev, len(repo)):
33 33 files.update(repo[x].files())
34 34
35 35 return sorted(files)
36 36
37 37 def _collectbrokencsets(repo, files, striprev):
38 38 """return the changesets which will be broken by the truncation"""
39 39 s = set()
40 40 def collectone(revlog):
41 linkgen = (revlog.linkrev(i) for i in revlog)
42 # find the truncation point of the revlog
43 for lrev in linkgen:
44 if lrev >= striprev:
45 break
46 # see if any revision after this point has a linkrev
47 # less than striprev (those will be broken by strip)
48 for lrev in linkgen:
49 if lrev < striprev:
50 s.add(lrev)
41 _, brokenset = revlog.getstrippoint(striprev)
42 s.update([revlog.linkrev(r) for r in brokenset])
51 43
52 44 collectone(repo.manifest)
53 45 for fname in files:
54 46 collectone(repo.file(fname))
55 47
56 48 return s
57 49
58 50 def strip(ui, repo, nodelist, backup="all", topic='backup'):
59 51 repo = repo.unfiltered()
60 52 repo.destroying()
61 53
62 54 cl = repo.changelog
63 55 # TODO handle undo of merge sets
64 56 if isinstance(nodelist, str):
65 57 nodelist = [nodelist]
66 58 striplist = [cl.rev(node) for node in nodelist]
67 59 striprev = min(striplist)
68 60
69 61 keeppartialbundle = backup == 'strip'
70 62
71 63 # Some revisions with rev > striprev may not be descendants of striprev.
72 64 # We have to find these revisions and put them in a bundle, so that
73 65 # we can restore them after the truncations.
74 66 # To create the bundle we use repo.changegroupsubset which requires
75 67 # the list of heads and bases of the set of interesting revisions.
76 68 # (head = revision in the set that has no descendant in the set;
77 69 # base = revision in the set that has no ancestor in the set)
78 70 tostrip = set(striplist)
79 71 for rev in striplist:
80 72 for desc in cl.descendants([rev]):
81 73 tostrip.add(desc)
82 74
83 75 files = _collectfiles(repo, striprev)
84 76 saverevs = _collectbrokencsets(repo, files, striprev)
85 77
86 78 # compute heads
87 79 saveheads = set(saverevs)
88 80 for r in xrange(striprev + 1, len(cl)):
89 81 if r not in tostrip:
90 82 saverevs.add(r)
91 83 saveheads.difference_update(cl.parentrevs(r))
92 84 saveheads.add(r)
93 85 saveheads = [cl.node(r) for r in saveheads]
94 86
95 87 # compute base nodes
96 88 if saverevs:
97 89 descendants = set(cl.descendants(saverevs))
98 90 saverevs.difference_update(descendants)
99 91 savebases = [cl.node(r) for r in saverevs]
100 92 stripbases = [cl.node(r) for r in tostrip]
101 93
102 94 # For a set s, max(parents(s) - s) is the same as max(heads(::s - s)), but
103 95 # is much faster
104 96 newbmtarget = repo.revs('max(parents(%ld) - (%ld))', tostrip, tostrip)
105 97 if newbmtarget:
106 98 newbmtarget = repo[newbmtarget[0]].node()
107 99 else:
108 100 newbmtarget = '.'
109 101
110 102 bm = repo._bookmarks
111 103 updatebm = []
112 104 for m in bm:
113 105 rev = repo[bm[m]].rev()
114 106 if rev in tostrip:
115 107 updatebm.append(m)
116 108
117 109 # create a changegroup for all the branches we need to keep
118 110 backupfile = None
119 111 if backup == "all":
120 112 backupfile = _bundle(repo, stripbases, cl.heads(), node, topic)
121 113 repo.ui.status(_("saved backup bundle to %s\n") % backupfile)
122 114 repo.ui.log("backupbundle", "saved backup bundle to %s\n", backupfile)
123 115 if saveheads or savebases:
124 116 # do not compress partial bundle if we remove it from disk later
125 117 chgrpfile = _bundle(repo, savebases, saveheads, node, 'temp',
126 118 compress=keeppartialbundle)
127 119
128 120 mfst = repo.manifest
129 121
130 122 tr = repo.transaction("strip")
131 123 offset = len(tr.entries)
132 124
133 125 try:
134 126 tr.startgroup()
135 127 cl.strip(striprev, tr)
136 128 mfst.strip(striprev, tr)
137 129 for fn in files:
138 130 repo.file(fn).strip(striprev, tr)
139 131 tr.endgroup()
140 132
141 133 try:
142 134 for i in xrange(offset, len(tr.entries)):
143 135 file, troffset, ignore = tr.entries[i]
144 136 repo.sopener(file, 'a').truncate(troffset)
145 137 tr.close()
146 138 except: # re-raises
147 139 tr.abort()
148 140 raise
149 141
150 142 if saveheads or savebases:
151 143 ui.note(_("adding branch\n"))
152 144 f = open(chgrpfile, "rb")
153 145 gen = changegroup.readbundle(f, chgrpfile)
154 146 if not repo.ui.verbose:
155 147 # silence internal shuffling chatter
156 148 repo.ui.pushbuffer()
157 149 repo.addchangegroup(gen, 'strip', 'bundle:' + chgrpfile, True)
158 150 if not repo.ui.verbose:
159 151 repo.ui.popbuffer()
160 152 f.close()
161 153 if not keeppartialbundle:
162 154 os.unlink(chgrpfile)
163 155
164 156 # remove undo files
165 157 for undofile in repo.undofiles():
166 158 try:
167 159 os.unlink(undofile)
168 160 except OSError, e:
169 161 if e.errno != errno.ENOENT:
170 162 ui.warn(_('error removing %s: %s\n') % (undofile, str(e)))
171 163
172 164 for m in updatebm:
173 165 bm[m] = repo[newbmtarget].node()
174 166 bm.write()
175 167 except: # re-raises
176 168 if backupfile:
177 169 ui.warn(_("strip failed, full bundle stored in '%s'\n")
178 170 % backupfile)
179 171 elif saveheads:
180 172 ui.warn(_("strip failed, partial bundle stored in '%s'\n")
181 173 % chgrpfile)
182 174 raise
183 175
184 176 repo.destroyed()
@@ -1,1370 +1,1408
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, templatefilters
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def decompress(bin):
79 79 """ decompress the given input """
80 80 if not bin:
81 81 return bin
82 82 t = bin[0]
83 83 if t == '\0':
84 84 return bin
85 85 if t == 'x':
86 86 try:
87 87 return _decompress(bin)
88 88 except zlib.error, e:
89 89 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 90 if t == 'u':
91 91 return bin[1:]
92 92 raise RevlogError(_("unknown compression type %r") % t)
93 93
94 94 # index v0:
95 95 # 4 bytes: offset
96 96 # 4 bytes: compressed length
97 97 # 4 bytes: base rev
98 98 # 4 bytes: link rev
99 99 # 32 bytes: parent 1 nodeid
100 100 # 32 bytes: parent 2 nodeid
101 101 # 32 bytes: nodeid
102 102 indexformatv0 = ">4l20s20s20s"
103 103 v0shaoffset = 56
104 104
105 105 class revlogoldio(object):
106 106 def __init__(self):
107 107 self.size = struct.calcsize(indexformatv0)
108 108
109 109 def parseindex(self, data, inline):
110 110 s = self.size
111 111 index = []
112 112 nodemap = {nullid: nullrev}
113 113 n = off = 0
114 114 l = len(data)
115 115 while off + s <= l:
116 116 cur = data[off:off + s]
117 117 off += s
118 118 e = _unpack(indexformatv0, cur)
119 119 # transform to revlogv1 format
120 120 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
121 121 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
122 122 index.append(e2)
123 123 nodemap[e[6]] = n
124 124 n += 1
125 125
126 126 # add the magic null revision at -1
127 127 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
128 128
129 129 return index, nodemap, None
130 130
131 131 def packentry(self, entry, node, version, rev):
132 132 if gettype(entry[0]):
133 133 raise RevlogError(_("index entry flags need RevlogNG"))
134 134 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
135 135 node(entry[5]), node(entry[6]), entry[7])
136 136 return _pack(indexformatv0, *e2)
137 137
138 138 # index ng:
139 139 # 6 bytes: offset
140 140 # 2 bytes: flags
141 141 # 4 bytes: compressed length
142 142 # 4 bytes: uncompressed length
143 143 # 4 bytes: base rev
144 144 # 4 bytes: link rev
145 145 # 4 bytes: parent 1 rev
146 146 # 4 bytes: parent 2 rev
147 147 # 32 bytes: nodeid
148 148 indexformatng = ">Qiiiiii20s12x"
149 149 ngshaoffset = 32
150 150 versionformat = ">I"
151 151
152 152 class revlogio(object):
153 153 def __init__(self):
154 154 self.size = struct.calcsize(indexformatng)
155 155
156 156 def parseindex(self, data, inline):
157 157 # call the C implementation to parse the index data
158 158 index, cache = parsers.parse_index2(data, inline)
159 159 return index, getattr(index, 'nodemap', None), cache
160 160
161 161 def packentry(self, entry, node, version, rev):
162 162 p = _pack(indexformatng, *entry)
163 163 if rev == 0:
164 164 p = _pack(versionformat, version) + p[4:]
165 165 return p
166 166
167 167 class revlog(object):
168 168 """
169 169 the underlying revision storage object
170 170
171 171 A revlog consists of two parts, an index and the revision data.
172 172
173 173 The index is a file with a fixed record size containing
174 174 information on each revision, including its nodeid (hash), the
175 175 nodeids of its parents, the position and offset of its data within
176 176 the data file, and the revision it's based on. Finally, each entry
177 177 contains a linkrev entry that can serve as a pointer to external
178 178 data.
179 179
180 180 The revision data itself is a linear collection of data chunks.
181 181 Each chunk represents a revision and is usually represented as a
182 182 delta against the previous chunk. To bound lookup time, runs of
183 183 deltas are limited to about 2 times the length of the original
184 184 version data. This makes retrieval of a version proportional to
185 185 its size, or O(1) relative to the number of revisions.
186 186
187 187 Both pieces of the revlog are written to in an append-only
188 188 fashion, which means we never need to rewrite a file to insert or
189 189 remove data, and can use some simple techniques to avoid the need
190 190 for locking while reading.
191 191 """
192 192 def __init__(self, opener, indexfile):
193 193 """
194 194 create a revlog object
195 195
196 196 opener is a function that abstracts the file opening operation
197 197 and can be used to implement COW semantics or the like.
198 198 """
199 199 self.indexfile = indexfile
200 200 self.datafile = indexfile[:-2] + ".d"
201 201 self.opener = opener
202 202 self._cache = None
203 203 self._basecache = None
204 204 self._chunkcache = (0, '')
205 205 self.index = []
206 206 self._pcache = {}
207 207 self._nodecache = {nullid: nullrev}
208 208 self._nodepos = None
209 209
210 210 v = REVLOG_DEFAULT_VERSION
211 211 opts = getattr(opener, 'options', None)
212 212 if opts is not None:
213 213 if 'revlogv1' in opts:
214 214 if 'generaldelta' in opts:
215 215 v |= REVLOGGENERALDELTA
216 216 else:
217 217 v = 0
218 218
219 219 i = ''
220 220 self._initempty = True
221 221 try:
222 222 f = self.opener(self.indexfile)
223 223 i = f.read()
224 224 f.close()
225 225 if len(i) > 0:
226 226 v = struct.unpack(versionformat, i[:4])[0]
227 227 self._initempty = False
228 228 except IOError, inst:
229 229 if inst.errno != errno.ENOENT:
230 230 raise
231 231
232 232 self.version = v
233 233 self._inline = v & REVLOGNGINLINEDATA
234 234 self._generaldelta = v & REVLOGGENERALDELTA
235 235 flags = v & ~0xFFFF
236 236 fmt = v & 0xFFFF
237 237 if fmt == REVLOGV0 and flags:
238 238 raise RevlogError(_("index %s unknown flags %#04x for format v0")
239 239 % (self.indexfile, flags >> 16))
240 240 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
241 241 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
242 242 % (self.indexfile, flags >> 16))
243 243 elif fmt > REVLOGNG:
244 244 raise RevlogError(_("index %s unknown format %d")
245 245 % (self.indexfile, fmt))
246 246
247 247 self._io = revlogio()
248 248 if self.version == REVLOGV0:
249 249 self._io = revlogoldio()
250 250 try:
251 251 d = self._io.parseindex(i, self._inline)
252 252 except (ValueError, IndexError):
253 253 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
254 254 self.index, nodemap, self._chunkcache = d
255 255 if nodemap is not None:
256 256 self.nodemap = self._nodecache = nodemap
257 257 if not self._chunkcache:
258 258 self._chunkclear()
259 259
260 260 def tip(self):
261 261 return self.node(len(self.index) - 2)
262 262 def __len__(self):
263 263 return len(self.index) - 1
264 264 def __iter__(self):
265 265 return iter(xrange(len(self)))
266 266 def revs(self, start=0, stop=None):
267 267 """iterate over all rev in this revlog (from start to stop)"""
268 268 step = 1
269 269 if stop is not None:
270 270 if start > stop:
271 271 step = -1
272 272 stop += step
273 273 else:
274 274 stop = len(self)
275 275 return xrange(start, stop, step)
276 276
277 277 @util.propertycache
278 278 def nodemap(self):
279 279 self.rev(self.node(0))
280 280 return self._nodecache
281 281
282 282 def hasnode(self, node):
283 283 try:
284 284 self.rev(node)
285 285 return True
286 286 except KeyError:
287 287 return False
288 288
289 289 def clearcaches(self):
290 290 try:
291 291 self._nodecache.clearcaches()
292 292 except AttributeError:
293 293 self._nodecache = {nullid: nullrev}
294 294 self._nodepos = None
295 295
296 296 def rev(self, node):
297 297 try:
298 298 return self._nodecache[node]
299 299 except RevlogError:
300 300 # parsers.c radix tree lookup failed
301 301 raise LookupError(node, self.indexfile, _('no node'))
302 302 except KeyError:
303 303 # pure python cache lookup failed
304 304 n = self._nodecache
305 305 i = self.index
306 306 p = self._nodepos
307 307 if p is None:
308 308 p = len(i) - 2
309 309 for r in xrange(p, -1, -1):
310 310 v = i[r][7]
311 311 n[v] = r
312 312 if v == node:
313 313 self._nodepos = r - 1
314 314 return r
315 315 raise LookupError(node, self.indexfile, _('no node'))
316 316
317 317 def node(self, rev):
318 318 return self.index[rev][7]
319 319 def linkrev(self, rev):
320 320 return self.index[rev][4]
321 321 def parents(self, node):
322 322 i = self.index
323 323 d = i[self.rev(node)]
324 324 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
325 325 def parentrevs(self, rev):
326 326 return self.index[rev][5:7]
327 327 def start(self, rev):
328 328 return int(self.index[rev][0] >> 16)
329 329 def end(self, rev):
330 330 return self.start(rev) + self.length(rev)
331 331 def length(self, rev):
332 332 return self.index[rev][1]
333 333 def chainbase(self, rev):
334 334 index = self.index
335 335 base = index[rev][3]
336 336 while base != rev:
337 337 rev = base
338 338 base = index[rev][3]
339 339 return base
340 340 def flags(self, rev):
341 341 return self.index[rev][0] & 0xFFFF
342 342 def rawsize(self, rev):
343 343 """return the length of the uncompressed text for a given revision"""
344 344 l = self.index[rev][2]
345 345 if l >= 0:
346 346 return l
347 347
348 348 t = self.revision(self.node(rev))
349 349 return len(t)
350 350 size = rawsize
351 351
352 352 def ancestors(self, revs, stoprev=0, inclusive=False):
353 353 """Generate the ancestors of 'revs' in reverse topological order.
354 354 Does not generate revs lower than stoprev.
355 355
356 356 See the documentation for ancestor.lazyancestors for more details."""
357 357
358 358 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
359 359 inclusive=inclusive)
360 360
361 361 def descendants(self, revs):
362 362 """Generate the descendants of 'revs' in revision order.
363 363
364 364 Yield a sequence of revision numbers starting with a child of
365 365 some rev in revs, i.e., each revision is *not* considered a
366 366 descendant of itself. Results are ordered by revision number (a
367 367 topological sort)."""
368 368 first = min(revs)
369 369 if first == nullrev:
370 370 for i in self:
371 371 yield i
372 372 return
373 373
374 374 seen = set(revs)
375 375 for i in self.revs(start=first + 1):
376 376 for x in self.parentrevs(i):
377 377 if x != nullrev and x in seen:
378 378 seen.add(i)
379 379 yield i
380 380 break
381 381
382 382 def findcommonmissing(self, common=None, heads=None):
383 383 """Return a tuple of the ancestors of common and the ancestors of heads
384 384 that are not ancestors of common. In revset terminology, we return the
385 385 tuple:
386 386
387 387 ::common, (::heads) - (::common)
388 388
389 389 The list is sorted by revision number, meaning it is
390 390 topologically sorted.
391 391
392 392 'heads' and 'common' are both lists of node IDs. If heads is
393 393 not supplied, uses all of the revlog's heads. If common is not
394 394 supplied, uses nullid."""
395 395 if common is None:
396 396 common = [nullid]
397 397 if heads is None:
398 398 heads = self.heads()
399 399
400 400 common = [self.rev(n) for n in common]
401 401 heads = [self.rev(n) for n in heads]
402 402
403 403 # we want the ancestors, but inclusive
404 404 class lazyset(object):
405 405 def __init__(self, lazyvalues):
406 406 self.addedvalues = set()
407 407 self.lazyvalues = lazyvalues
408 408
409 409 def __contains__(self, value):
410 410 return value in self.addedvalues or value in self.lazyvalues
411 411
412 412 def __iter__(self):
413 413 added = self.addedvalues
414 414 for r in added:
415 415 yield r
416 416 for r in self.lazyvalues:
417 417 if not r in added:
418 418 yield r
419 419
420 420 def add(self, value):
421 421 self.addedvalues.add(value)
422 422
423 423 def update(self, values):
424 424 self.addedvalues.update(values)
425 425
426 426 has = lazyset(self.ancestors(common))
427 427 has.add(nullrev)
428 428 has.update(common)
429 429
430 430 # take all ancestors from heads that aren't in has
431 431 missing = set()
432 432 visit = util.deque(r for r in heads if r not in has)
433 433 while visit:
434 434 r = visit.popleft()
435 435 if r in missing:
436 436 continue
437 437 else:
438 438 missing.add(r)
439 439 for p in self.parentrevs(r):
440 440 if p not in has:
441 441 visit.append(p)
442 442 missing = list(missing)
443 443 missing.sort()
444 444 return has, [self.node(r) for r in missing]
445 445
446 446 def findmissingrevs(self, common=None, heads=None):
447 447 """Return the revision numbers of the ancestors of heads that
448 448 are not ancestors of common.
449 449
450 450 More specifically, return a list of revision numbers corresponding to
451 451 nodes N such that every N satisfies the following constraints:
452 452
453 453 1. N is an ancestor of some node in 'heads'
454 454 2. N is not an ancestor of any node in 'common'
455 455
456 456 The list is sorted by revision number, meaning it is
457 457 topologically sorted.
458 458
459 459 'heads' and 'common' are both lists of revision numbers. If heads is
460 460 not supplied, uses all of the revlog's heads. If common is not
461 461 supplied, uses nullid."""
462 462 if common is None:
463 463 common = [nullrev]
464 464 if heads is None:
465 465 heads = self.headrevs()
466 466
467 467 return ancestor.missingancestors(heads, common, self.parentrevs)
468 468
469 469 def findmissing(self, common=None, heads=None):
470 470 """Return the ancestors of heads that are not ancestors of common.
471 471
472 472 More specifically, return a list of nodes N such that every N
473 473 satisfies the following constraints:
474 474
475 475 1. N is an ancestor of some node in 'heads'
476 476 2. N is not an ancestor of any node in 'common'
477 477
478 478 The list is sorted by revision number, meaning it is
479 479 topologically sorted.
480 480
481 481 'heads' and 'common' are both lists of node IDs. If heads is
482 482 not supplied, uses all of the revlog's heads. If common is not
483 483 supplied, uses nullid."""
484 484 if common is None:
485 485 common = [nullid]
486 486 if heads is None:
487 487 heads = self.heads()
488 488
489 489 common = [self.rev(n) for n in common]
490 490 heads = [self.rev(n) for n in heads]
491 491
492 492 return [self.node(r) for r in
493 493 ancestor.missingancestors(heads, common, self.parentrevs)]
494 494
495 495 def nodesbetween(self, roots=None, heads=None):
496 496 """Return a topological path from 'roots' to 'heads'.
497 497
498 498 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
499 499 topologically sorted list of all nodes N that satisfy both of
500 500 these constraints:
501 501
502 502 1. N is a descendant of some node in 'roots'
503 503 2. N is an ancestor of some node in 'heads'
504 504
505 505 Every node is considered to be both a descendant and an ancestor
506 506 of itself, so every reachable node in 'roots' and 'heads' will be
507 507 included in 'nodes'.
508 508
509 509 'outroots' is the list of reachable nodes in 'roots', i.e., the
510 510 subset of 'roots' that is returned in 'nodes'. Likewise,
511 511 'outheads' is the subset of 'heads' that is also in 'nodes'.
512 512
513 513 'roots' and 'heads' are both lists of node IDs. If 'roots' is
514 514 unspecified, uses nullid as the only root. If 'heads' is
515 515 unspecified, uses list of all of the revlog's heads."""
516 516 nonodes = ([], [], [])
517 517 if roots is not None:
518 518 roots = list(roots)
519 519 if not roots:
520 520 return nonodes
521 521 lowestrev = min([self.rev(n) for n in roots])
522 522 else:
523 523 roots = [nullid] # Everybody's a descendant of nullid
524 524 lowestrev = nullrev
525 525 if (lowestrev == nullrev) and (heads is None):
526 526 # We want _all_ the nodes!
527 527 return ([self.node(r) for r in self], [nullid], list(self.heads()))
528 528 if heads is None:
529 529 # All nodes are ancestors, so the latest ancestor is the last
530 530 # node.
531 531 highestrev = len(self) - 1
532 532 # Set ancestors to None to signal that every node is an ancestor.
533 533 ancestors = None
534 534 # Set heads to an empty dictionary for later discovery of heads
535 535 heads = {}
536 536 else:
537 537 heads = list(heads)
538 538 if not heads:
539 539 return nonodes
540 540 ancestors = set()
541 541 # Turn heads into a dictionary so we can remove 'fake' heads.
542 542 # Also, later we will be using it to filter out the heads we can't
543 543 # find from roots.
544 544 heads = dict.fromkeys(heads, False)
545 545 # Start at the top and keep marking parents until we're done.
546 546 nodestotag = set(heads)
547 547 # Remember where the top was so we can use it as a limit later.
548 548 highestrev = max([self.rev(n) for n in nodestotag])
549 549 while nodestotag:
550 550 # grab a node to tag
551 551 n = nodestotag.pop()
552 552 # Never tag nullid
553 553 if n == nullid:
554 554 continue
555 555 # A node's revision number represents its place in a
556 556 # topologically sorted list of nodes.
557 557 r = self.rev(n)
558 558 if r >= lowestrev:
559 559 if n not in ancestors:
560 560 # If we are possibly a descendant of one of the roots
561 561 # and we haven't already been marked as an ancestor
562 562 ancestors.add(n) # Mark as ancestor
563 563 # Add non-nullid parents to list of nodes to tag.
564 564 nodestotag.update([p for p in self.parents(n) if
565 565 p != nullid])
566 566 elif n in heads: # We've seen it before, is it a fake head?
567 567 # So it is, real heads should not be the ancestors of
568 568 # any other heads.
569 569 heads.pop(n)
570 570 if not ancestors:
571 571 return nonodes
572 572 # Now that we have our set of ancestors, we want to remove any
573 573 # roots that are not ancestors.
574 574
575 575 # If one of the roots was nullid, everything is included anyway.
576 576 if lowestrev > nullrev:
577 577 # But, since we weren't, let's recompute the lowest rev to not
578 578 # include roots that aren't ancestors.
579 579
580 580 # Filter out roots that aren't ancestors of heads
581 581 roots = [n for n in roots if n in ancestors]
582 582 # Recompute the lowest revision
583 583 if roots:
584 584 lowestrev = min([self.rev(n) for n in roots])
585 585 else:
586 586 # No more roots? Return empty list
587 587 return nonodes
588 588 else:
589 589 # We are descending from nullid, and don't need to care about
590 590 # any other roots.
591 591 lowestrev = nullrev
592 592 roots = [nullid]
593 593 # Transform our roots list into a set.
594 594 descendants = set(roots)
595 595 # Also, keep the original roots so we can filter out roots that aren't
596 596 # 'real' roots (i.e. are descended from other roots).
597 597 roots = descendants.copy()
598 598 # Our topologically sorted list of output nodes.
599 599 orderedout = []
600 600 # Don't start at nullid since we don't want nullid in our output list,
601 601 # and if nullid shows up in descendants, empty parents will look like
602 602 # they're descendants.
603 603 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
604 604 n = self.node(r)
605 605 isdescendant = False
606 606 if lowestrev == nullrev: # Everybody is a descendant of nullid
607 607 isdescendant = True
608 608 elif n in descendants:
609 609 # n is already a descendant
610 610 isdescendant = True
611 611 # This check only needs to be done here because all the roots
612 612 # will start being marked is descendants before the loop.
613 613 if n in roots:
614 614 # If n was a root, check if it's a 'real' root.
615 615 p = tuple(self.parents(n))
616 616 # If any of its parents are descendants, it's not a root.
617 617 if (p[0] in descendants) or (p[1] in descendants):
618 618 roots.remove(n)
619 619 else:
620 620 p = tuple(self.parents(n))
621 621 # A node is a descendant if either of its parents are
622 622 # descendants. (We seeded the dependents list with the roots
623 623 # up there, remember?)
624 624 if (p[0] in descendants) or (p[1] in descendants):
625 625 descendants.add(n)
626 626 isdescendant = True
627 627 if isdescendant and ((ancestors is None) or (n in ancestors)):
628 628 # Only include nodes that are both descendants and ancestors.
629 629 orderedout.append(n)
630 630 if (ancestors is not None) and (n in heads):
631 631 # We're trying to figure out which heads are reachable
632 632 # from roots.
633 633 # Mark this head as having been reached
634 634 heads[n] = True
635 635 elif ancestors is None:
636 636 # Otherwise, we're trying to discover the heads.
637 637 # Assume this is a head because if it isn't, the next step
638 638 # will eventually remove it.
639 639 heads[n] = True
640 640 # But, obviously its parents aren't.
641 641 for p in self.parents(n):
642 642 heads.pop(p, None)
643 643 heads = [n for n, flag in heads.iteritems() if flag]
644 644 roots = list(roots)
645 645 assert orderedout
646 646 assert roots
647 647 assert heads
648 648 return (orderedout, roots, heads)
649 649
650 650 def headrevs(self):
651 651 try:
652 652 return self.index.headrevs()
653 653 except AttributeError:
654 654 return self._headrevs()
655 655
656 656 def _headrevs(self):
657 657 count = len(self)
658 658 if not count:
659 659 return [nullrev]
660 660 # we won't iter over filtered rev so nobody is a head at start
661 661 ishead = [0] * (count + 1)
662 662 index = self.index
663 663 for r in self:
664 664 ishead[r] = 1 # I may be an head
665 665 e = index[r]
666 666 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
667 667 return [r for r, val in enumerate(ishead) if val]
668 668
669 669 def heads(self, start=None, stop=None):
670 670 """return the list of all nodes that have no children
671 671
672 672 if start is specified, only heads that are descendants of
673 673 start will be returned
674 674 if stop is specified, it will consider all the revs from stop
675 675 as if they had no children
676 676 """
677 677 if start is None and stop is None:
678 678 if not len(self):
679 679 return [nullid]
680 680 return [self.node(r) for r in self.headrevs()]
681 681
682 682 if start is None:
683 683 start = nullid
684 684 if stop is None:
685 685 stop = []
686 686 stoprevs = set([self.rev(n) for n in stop])
687 687 startrev = self.rev(start)
688 688 reachable = set((startrev,))
689 689 heads = set((startrev,))
690 690
691 691 parentrevs = self.parentrevs
692 692 for r in self.revs(start=startrev + 1):
693 693 for p in parentrevs(r):
694 694 if p in reachable:
695 695 if r not in stoprevs:
696 696 reachable.add(r)
697 697 heads.add(r)
698 698 if p in heads and p not in stoprevs:
699 699 heads.remove(p)
700 700
701 701 return [self.node(r) for r in heads]
702 702
703 703 def children(self, node):
704 704 """find the children of a given node"""
705 705 c = []
706 706 p = self.rev(node)
707 707 for r in self.revs(start=p + 1):
708 708 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
709 709 if prevs:
710 710 for pr in prevs:
711 711 if pr == p:
712 712 c.append(self.node(r))
713 713 elif p == nullrev:
714 714 c.append(self.node(r))
715 715 return c
716 716
717 717 def descendant(self, start, end):
718 718 if start == nullrev:
719 719 return True
720 720 for i in self.descendants([start]):
721 721 if i == end:
722 722 return True
723 723 elif i > end:
724 724 break
725 725 return False
726 726
727 727 def ancestor(self, a, b):
728 728 """calculate the least common ancestor of nodes a and b"""
729 729
730 730 a, b = self.rev(a), self.rev(b)
731 731 try:
732 732 ancs = self.index.ancestors(a, b)
733 733 except (AttributeError, OverflowError):
734 734 ancs = ancestor.ancestors(self.parentrevs, a, b)
735 735 if ancs:
736 736 # choose a consistent winner when there's a tie
737 737 return min(map(self.node, ancs))
738 738 return nullid
739 739
740 740 def _match(self, id):
741 741 if isinstance(id, int):
742 742 # rev
743 743 return self.node(id)
744 744 if len(id) == 20:
745 745 # possibly a binary node
746 746 # odds of a binary node being all hex in ASCII are 1 in 10**25
747 747 try:
748 748 node = id
749 749 self.rev(node) # quick search the index
750 750 return node
751 751 except LookupError:
752 752 pass # may be partial hex id
753 753 try:
754 754 # str(rev)
755 755 rev = int(id)
756 756 if str(rev) != id:
757 757 raise ValueError
758 758 if rev < 0:
759 759 rev = len(self) + rev
760 760 if rev < 0 or rev >= len(self):
761 761 raise ValueError
762 762 return self.node(rev)
763 763 except (ValueError, OverflowError):
764 764 pass
765 765 if len(id) == 40:
766 766 try:
767 767 # a full hex nodeid?
768 768 node = bin(id)
769 769 self.rev(node)
770 770 return node
771 771 except (TypeError, LookupError):
772 772 pass
773 773
774 774 def _partialmatch(self, id):
775 775 try:
776 776 n = self.index.partialmatch(id)
777 777 if n and self.hasnode(n):
778 778 return n
779 779 return None
780 780 except RevlogError:
781 781 # parsers.c radix tree lookup gave multiple matches
782 782 # fall through to slow path that filters hidden revisions
783 783 pass
784 784 except (AttributeError, ValueError):
785 785 # we are pure python, or key was too short to search radix tree
786 786 pass
787 787
788 788 if id in self._pcache:
789 789 return self._pcache[id]
790 790
791 791 if len(id) < 40:
792 792 try:
793 793 # hex(node)[:...]
794 794 l = len(id) // 2 # grab an even number of digits
795 795 prefix = bin(id[:l * 2])
796 796 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
797 797 nl = [n for n in nl if hex(n).startswith(id) and
798 798 self.hasnode(n)]
799 799 if len(nl) > 0:
800 800 if len(nl) == 1:
801 801 self._pcache[id] = nl[0]
802 802 return nl[0]
803 803 raise LookupError(id, self.indexfile,
804 804 _('ambiguous identifier'))
805 805 return None
806 806 except TypeError:
807 807 pass
808 808
809 809 def lookup(self, id):
810 810 """locate a node based on:
811 811 - revision number or str(revision number)
812 812 - nodeid or subset of hex nodeid
813 813 """
814 814 n = self._match(id)
815 815 if n is not None:
816 816 return n
817 817 n = self._partialmatch(id)
818 818 if n:
819 819 return n
820 820
821 821 raise LookupError(id, self.indexfile, _('no match found'))
822 822
823 823 def cmp(self, node, text):
824 824 """compare text with a given file revision
825 825
826 826 returns True if text is different than what is stored.
827 827 """
828 828 p1, p2 = self.parents(node)
829 829 return hash(text, p1, p2) != node
830 830
831 831 def _addchunk(self, offset, data):
832 832 o, d = self._chunkcache
833 833 # try to add to existing cache
834 834 if o + len(d) == offset and len(d) + len(data) < _chunksize:
835 835 self._chunkcache = o, d + data
836 836 else:
837 837 self._chunkcache = offset, data
838 838
839 839 def _loadchunk(self, offset, length):
840 840 if self._inline:
841 841 df = self.opener(self.indexfile)
842 842 else:
843 843 df = self.opener(self.datafile)
844 844
845 845 readahead = max(65536, length)
846 846 df.seek(offset)
847 847 d = df.read(readahead)
848 848 df.close()
849 849 self._addchunk(offset, d)
850 850 if readahead > length:
851 851 return util.buffer(d, 0, length)
852 852 return d
853 853
854 854 def _getchunk(self, offset, length):
855 855 o, d = self._chunkcache
856 856 l = len(d)
857 857
858 858 # is it in the cache?
859 859 cachestart = offset - o
860 860 cacheend = cachestart + length
861 861 if cachestart >= 0 and cacheend <= l:
862 862 if cachestart == 0 and cacheend == l:
863 863 return d # avoid a copy
864 864 return util.buffer(d, cachestart, cacheend - cachestart)
865 865
866 866 return self._loadchunk(offset, length)
867 867
868 868 def _chunkraw(self, startrev, endrev):
869 869 start = self.start(startrev)
870 870 end = self.end(endrev)
871 871 if self._inline:
872 872 start += (startrev + 1) * self._io.size
873 873 end += (endrev + 1) * self._io.size
874 874 length = end - start
875 875 return self._getchunk(start, length)
876 876
877 877 def _chunk(self, rev):
878 878 return decompress(self._chunkraw(rev, rev))
879 879
880 880 def _chunks(self, revs):
881 881 '''faster version of [self._chunk(rev) for rev in revs]
882 882
883 883 Assumes that revs is in ascending order.'''
884 884 if not revs:
885 885 return []
886 886 start = self.start
887 887 length = self.length
888 888 inline = self._inline
889 889 iosize = self._io.size
890 890 buffer = util.buffer
891 891
892 892 l = []
893 893 ladd = l.append
894 894
895 895 # preload the cache
896 896 self._chunkraw(revs[0], revs[-1])
897 897 offset, data = self._chunkcache
898 898
899 899 for rev in revs:
900 900 chunkstart = start(rev)
901 901 if inline:
902 902 chunkstart += (rev + 1) * iosize
903 903 chunklength = length(rev)
904 904 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
905 905
906 906 return l
907 907
908 908 def _chunkclear(self):
909 909 self._chunkcache = (0, '')
910 910
911 911 def deltaparent(self, rev):
912 912 """return deltaparent of the given revision"""
913 913 base = self.index[rev][3]
914 914 if base == rev:
915 915 return nullrev
916 916 elif self._generaldelta:
917 917 return base
918 918 else:
919 919 return rev - 1
920 920
921 921 def revdiff(self, rev1, rev2):
922 922 """return or calculate a delta between two revisions"""
923 923 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
924 924 return str(self._chunk(rev2))
925 925
926 926 return mdiff.textdiff(self.revision(rev1),
927 927 self.revision(rev2))
928 928
929 929 def revision(self, nodeorrev):
930 930 """return an uncompressed revision of a given node or revision
931 931 number.
932 932 """
933 933 if isinstance(nodeorrev, int):
934 934 rev = nodeorrev
935 935 node = self.node(rev)
936 936 else:
937 937 node = nodeorrev
938 938 rev = None
939 939
940 940 cachedrev = None
941 941 if node == nullid:
942 942 return ""
943 943 if self._cache:
944 944 if self._cache[0] == node:
945 945 return self._cache[2]
946 946 cachedrev = self._cache[1]
947 947
948 948 # look up what we need to read
949 949 text = None
950 950 if rev is None:
951 951 rev = self.rev(node)
952 952
953 953 # check rev flags
954 954 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
955 955 raise RevlogError(_('incompatible revision flag %x') %
956 956 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
957 957
958 958 # build delta chain
959 959 chain = []
960 960 index = self.index # for performance
961 961 generaldelta = self._generaldelta
962 962 iterrev = rev
963 963 e = index[iterrev]
964 964 while iterrev != e[3] and iterrev != cachedrev:
965 965 chain.append(iterrev)
966 966 if generaldelta:
967 967 iterrev = e[3]
968 968 else:
969 969 iterrev -= 1
970 970 e = index[iterrev]
971 971
972 972 if iterrev == cachedrev:
973 973 # cache hit
974 974 text = self._cache[2]
975 975 else:
976 976 chain.append(iterrev)
977 977 chain.reverse()
978 978
979 979 # drop cache to save memory
980 980 self._cache = None
981 981
982 982 bins = self._chunks(chain)
983 983 if text is None:
984 984 text = str(bins[0])
985 985 bins = bins[1:]
986 986
987 987 text = mdiff.patches(text, bins)
988 988
989 989 text = self._checkhash(text, node, rev)
990 990
991 991 self._cache = (node, rev, text)
992 992 return text
993 993
994 994 def _checkhash(self, text, node, rev):
995 995 p1, p2 = self.parents(node)
996 996 self.checkhash(text, p1, p2, node, rev)
997 997 return text
998 998
999 999 def checkhash(self, text, p1, p2, node, rev=None):
1000 1000 if node != hash(text, p1, p2):
1001 1001 revornode = rev
1002 1002 if revornode is None:
1003 1003 revornode = templatefilters.short(hex(node))
1004 1004 raise RevlogError(_("integrity check failed on %s:%s")
1005 1005 % (self.indexfile, revornode))
1006 1006
1007 1007 def checkinlinesize(self, tr, fp=None):
1008 1008 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1009 1009 return
1010 1010
1011 1011 trinfo = tr.find(self.indexfile)
1012 1012 if trinfo is None:
1013 1013 raise RevlogError(_("%s not found in the transaction")
1014 1014 % self.indexfile)
1015 1015
1016 1016 trindex = trinfo[2]
1017 1017 dataoff = self.start(trindex)
1018 1018
1019 1019 tr.add(self.datafile, dataoff)
1020 1020
1021 1021 if fp:
1022 1022 fp.flush()
1023 1023 fp.close()
1024 1024
1025 1025 df = self.opener(self.datafile, 'w')
1026 1026 try:
1027 1027 for r in self:
1028 1028 df.write(self._chunkraw(r, r))
1029 1029 finally:
1030 1030 df.close()
1031 1031
1032 1032 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1033 1033 self.version &= ~(REVLOGNGINLINEDATA)
1034 1034 self._inline = False
1035 1035 for i in self:
1036 1036 e = self._io.packentry(self.index[i], self.node, self.version, i)
1037 1037 fp.write(e)
1038 1038
1039 1039 # if we don't call close, the temp file will never replace the
1040 1040 # real index
1041 1041 fp.close()
1042 1042
1043 1043 tr.replace(self.indexfile, trindex * self._io.size)
1044 1044 self._chunkclear()
1045 1045
1046 1046 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1047 1047 node=None):
1048 1048 """add a revision to the log
1049 1049
1050 1050 text - the revision data to add
1051 1051 transaction - the transaction object used for rollback
1052 1052 link - the linkrev data to add
1053 1053 p1, p2 - the parent nodeids of the revision
1054 1054 cachedelta - an optional precomputed delta
1055 1055 node - nodeid of revision; typically node is not specified, and it is
1056 1056 computed by default as hash(text, p1, p2), however subclasses might
1057 1057 use different hashing method (and override checkhash() in such case)
1058 1058 """
1059 1059 if link == nullrev:
1060 1060 raise RevlogError(_("attempted to add linkrev -1 to %s")
1061 1061 % self.indexfile)
1062 1062 node = node or hash(text, p1, p2)
1063 1063 if node in self.nodemap:
1064 1064 return node
1065 1065
1066 1066 dfh = None
1067 1067 if not self._inline:
1068 1068 dfh = self.opener(self.datafile, "a")
1069 1069 ifh = self.opener(self.indexfile, "a+")
1070 1070 try:
1071 1071 return self._addrevision(node, text, transaction, link, p1, p2,
1072 1072 cachedelta, ifh, dfh)
1073 1073 finally:
1074 1074 if dfh:
1075 1075 dfh.close()
1076 1076 ifh.close()
1077 1077
1078 1078 def compress(self, text):
1079 1079 """ generate a possibly-compressed representation of text """
1080 1080 if not text:
1081 1081 return ("", text)
1082 1082 l = len(text)
1083 1083 bin = None
1084 1084 if l < 44:
1085 1085 pass
1086 1086 elif l > 1000000:
1087 1087 # zlib makes an internal copy, thus doubling memory usage for
1088 1088 # large files, so lets do this in pieces
1089 1089 z = zlib.compressobj()
1090 1090 p = []
1091 1091 pos = 0
1092 1092 while pos < l:
1093 1093 pos2 = pos + 2**20
1094 1094 p.append(z.compress(text[pos:pos2]))
1095 1095 pos = pos2
1096 1096 p.append(z.flush())
1097 1097 if sum(map(len, p)) < l:
1098 1098 bin = "".join(p)
1099 1099 else:
1100 1100 bin = _compress(text)
1101 1101 if bin is None or len(bin) > l:
1102 1102 if text[0] == '\0':
1103 1103 return ("", text)
1104 1104 return ('u', text)
1105 1105 return ("", bin)
1106 1106
1107 1107 def _addrevision(self, node, text, transaction, link, p1, p2,
1108 1108 cachedelta, ifh, dfh):
1109 1109 """internal function to add revisions to the log
1110 1110
1111 1111 see addrevision for argument descriptions.
1112 1112 invariants:
1113 1113 - text is optional (can be None); if not set, cachedelta must be set.
1114 1114 if both are set, they must correspond to each other.
1115 1115 """
1116 1116 btext = [text]
1117 1117 def buildtext():
1118 1118 if btext[0] is not None:
1119 1119 return btext[0]
1120 1120 # flush any pending writes here so we can read it in revision
1121 1121 if dfh:
1122 1122 dfh.flush()
1123 1123 ifh.flush()
1124 1124 basetext = self.revision(self.node(cachedelta[0]))
1125 1125 btext[0] = mdiff.patch(basetext, cachedelta[1])
1126 1126 self.checkhash(btext[0], p1, p2, node)
1127 1127 return btext[0]
1128 1128
1129 1129 def builddelta(rev):
1130 1130 # can we use the cached delta?
1131 1131 if cachedelta and cachedelta[0] == rev:
1132 1132 delta = cachedelta[1]
1133 1133 else:
1134 1134 t = buildtext()
1135 1135 ptext = self.revision(self.node(rev))
1136 1136 delta = mdiff.textdiff(ptext, t)
1137 1137 data = self.compress(delta)
1138 1138 l = len(data[1]) + len(data[0])
1139 1139 if basecache[0] == rev:
1140 1140 chainbase = basecache[1]
1141 1141 else:
1142 1142 chainbase = self.chainbase(rev)
1143 1143 dist = l + offset - self.start(chainbase)
1144 1144 if self._generaldelta:
1145 1145 base = rev
1146 1146 else:
1147 1147 base = chainbase
1148 1148 return dist, l, data, base, chainbase
1149 1149
1150 1150 curr = len(self)
1151 1151 prev = curr - 1
1152 1152 base = chainbase = curr
1153 1153 offset = self.end(prev)
1154 1154 flags = 0
1155 1155 d = None
1156 1156 if self._basecache is None:
1157 1157 self._basecache = (prev, self.chainbase(prev))
1158 1158 basecache = self._basecache
1159 1159 p1r, p2r = self.rev(p1), self.rev(p2)
1160 1160
1161 1161 # should we try to build a delta?
1162 1162 if prev != nullrev:
1163 1163 if self._generaldelta:
1164 1164 if p1r >= basecache[1]:
1165 1165 d = builddelta(p1r)
1166 1166 elif p2r >= basecache[1]:
1167 1167 d = builddelta(p2r)
1168 1168 else:
1169 1169 d = builddelta(prev)
1170 1170 else:
1171 1171 d = builddelta(prev)
1172 1172 dist, l, data, base, chainbase = d
1173 1173
1174 1174 # full versions are inserted when the needed deltas
1175 1175 # become comparable to the uncompressed text
1176 1176 if text is None:
1177 1177 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1178 1178 cachedelta[1])
1179 1179 else:
1180 1180 textlen = len(text)
1181 1181 if d is None or dist > textlen * 2:
1182 1182 text = buildtext()
1183 1183 data = self.compress(text)
1184 1184 l = len(data[1]) + len(data[0])
1185 1185 base = chainbase = curr
1186 1186
1187 1187 e = (offset_type(offset, flags), l, textlen,
1188 1188 base, link, p1r, p2r, node)
1189 1189 self.index.insert(-1, e)
1190 1190 self.nodemap[node] = curr
1191 1191
1192 1192 entry = self._io.packentry(e, self.node, self.version, curr)
1193 1193 if not self._inline:
1194 1194 transaction.add(self.datafile, offset)
1195 1195 transaction.add(self.indexfile, curr * len(entry))
1196 1196 if data[0]:
1197 1197 dfh.write(data[0])
1198 1198 dfh.write(data[1])
1199 1199 dfh.flush()
1200 1200 ifh.write(entry)
1201 1201 else:
1202 1202 offset += curr * self._io.size
1203 1203 transaction.add(self.indexfile, offset, curr)
1204 1204 ifh.write(entry)
1205 1205 ifh.write(data[0])
1206 1206 ifh.write(data[1])
1207 1207 self.checkinlinesize(transaction, ifh)
1208 1208
1209 1209 if type(text) == str: # only accept immutable objects
1210 1210 self._cache = (node, curr, text)
1211 1211 self._basecache = (curr, chainbase)
1212 1212 return node
1213 1213
1214 1214 def addgroup(self, bundle, linkmapper, transaction):
1215 1215 """
1216 1216 add a delta group
1217 1217
1218 1218 given a set of deltas, add them to the revision log. the
1219 1219 first delta is against its parent, which should be in our
1220 1220 log, the rest are against the previous delta.
1221 1221 """
1222 1222
1223 1223 # track the base of the current delta log
1224 1224 content = []
1225 1225 node = None
1226 1226
1227 1227 r = len(self)
1228 1228 end = 0
1229 1229 if r:
1230 1230 end = self.end(r - 1)
1231 1231 ifh = self.opener(self.indexfile, "a+")
1232 1232 isize = r * self._io.size
1233 1233 if self._inline:
1234 1234 transaction.add(self.indexfile, end + isize, r)
1235 1235 dfh = None
1236 1236 else:
1237 1237 transaction.add(self.indexfile, isize, r)
1238 1238 transaction.add(self.datafile, end)
1239 1239 dfh = self.opener(self.datafile, "a")
1240 1240
1241 1241 try:
1242 1242 # loop through our set of deltas
1243 1243 chain = None
1244 1244 while True:
1245 1245 chunkdata = bundle.deltachunk(chain)
1246 1246 if not chunkdata:
1247 1247 break
1248 1248 node = chunkdata['node']
1249 1249 p1 = chunkdata['p1']
1250 1250 p2 = chunkdata['p2']
1251 1251 cs = chunkdata['cs']
1252 1252 deltabase = chunkdata['deltabase']
1253 1253 delta = chunkdata['delta']
1254 1254
1255 1255 content.append(node)
1256 1256
1257 1257 link = linkmapper(cs)
1258 1258 if node in self.nodemap:
1259 1259 # this can happen if two branches make the same change
1260 1260 chain = node
1261 1261 continue
1262 1262
1263 1263 for p in (p1, p2):
1264 1264 if p not in self.nodemap:
1265 1265 raise LookupError(p, self.indexfile,
1266 1266 _('unknown parent'))
1267 1267
1268 1268 if deltabase not in self.nodemap:
1269 1269 raise LookupError(deltabase, self.indexfile,
1270 1270 _('unknown delta base'))
1271 1271
1272 1272 baserev = self.rev(deltabase)
1273 1273 chain = self._addrevision(node, None, transaction, link,
1274 1274 p1, p2, (baserev, delta), ifh, dfh)
1275 1275 if not dfh and not self._inline:
1276 1276 # addrevision switched from inline to conventional
1277 1277 # reopen the index
1278 1278 ifh.close()
1279 1279 dfh = self.opener(self.datafile, "a")
1280 1280 ifh = self.opener(self.indexfile, "a")
1281 1281 finally:
1282 1282 if dfh:
1283 1283 dfh.close()
1284 1284 ifh.close()
1285 1285
1286 1286 return content
1287 1287
1288 def getstrippoint(self, minlink):
1289 """find the minimum rev that must be stripped to strip the linkrev
1290
1291 Returns a tuple containing the minimum rev and a set of all revs that
1292 have linkrevs that will be broken by this strip.
1293 """
1294 brokenrevs = set()
1295 strippoint = len(self)
1296
1297 heads = {}
1298 futurelargelinkrevs = set()
1299 for head in self.headrevs():
1300 headlinkrev = self.linkrev(head)
1301 heads[head] = headlinkrev
1302 if headlinkrev >= minlink:
1303 futurelargelinkrevs.add(headlinkrev)
1304
1305 # This algorithm involves walking down the rev graph, starting at the
1306 # heads. Since the revs are topologically sorted according to linkrev,
1307 # once all head linkrevs are below the minlink, we know there are
1308 # no more revs that could have a linkrev greater than minlink.
1309 # So we can stop walking.
1310 while futurelargelinkrevs:
1311 strippoint -= 1
1312 linkrev = heads.pop(strippoint)
1313
1314 if linkrev < minlink:
1315 brokenrevs.add(strippoint)
1316 else:
1317 futurelargelinkrevs.remove(linkrev)
1318
1319 for p in self.parentrevs(strippoint):
1320 if p != nullrev:
1321 plinkrev = self.linkrev(p)
1322 heads[p] = plinkrev
1323 if plinkrev >= minlink:
1324 futurelargelinkrevs.add(plinkrev)
1325
1326 return strippoint, brokenrevs
1327
1288 1328 def strip(self, minlink, transaction):
1289 1329 """truncate the revlog on the first revision with a linkrev >= minlink
1290 1330
1291 1331 This function is called when we're stripping revision minlink and
1292 1332 its descendants from the repository.
1293 1333
1294 1334 We have to remove all revisions with linkrev >= minlink, because
1295 1335 the equivalent changelog revisions will be renumbered after the
1296 1336 strip.
1297 1337
1298 1338 So we truncate the revlog on the first of these revisions, and
1299 1339 trust that the caller has saved the revisions that shouldn't be
1300 1340 removed and that it'll re-add them after this truncation.
1301 1341 """
1302 1342 if len(self) == 0:
1303 1343 return
1304 1344
1305 for rev in self:
1306 if self.index[rev][4] >= minlink:
1307 break
1308 else:
1345 rev, _ = self.getstrippoint(minlink)
1346 if rev == len(self):
1309 1347 return
1310 1348
1311 1349 # first truncate the files on disk
1312 1350 end = self.start(rev)
1313 1351 if not self._inline:
1314 1352 transaction.add(self.datafile, end)
1315 1353 end = rev * self._io.size
1316 1354 else:
1317 1355 end += rev * self._io.size
1318 1356
1319 1357 transaction.add(self.indexfile, end)
1320 1358
1321 1359 # then reset internal state in memory to forget those revisions
1322 1360 self._cache = None
1323 1361 self._chunkclear()
1324 1362 for x in xrange(rev, len(self)):
1325 1363 del self.nodemap[self.node(x)]
1326 1364
1327 1365 del self.index[rev:-1]
1328 1366
1329 1367 def checksize(self):
1330 1368 expected = 0
1331 1369 if len(self):
1332 1370 expected = max(0, self.end(len(self) - 1))
1333 1371
1334 1372 try:
1335 1373 f = self.opener(self.datafile)
1336 1374 f.seek(0, 2)
1337 1375 actual = f.tell()
1338 1376 f.close()
1339 1377 dd = actual - expected
1340 1378 except IOError, inst:
1341 1379 if inst.errno != errno.ENOENT:
1342 1380 raise
1343 1381 dd = 0
1344 1382
1345 1383 try:
1346 1384 f = self.opener(self.indexfile)
1347 1385 f.seek(0, 2)
1348 1386 actual = f.tell()
1349 1387 f.close()
1350 1388 s = self._io.size
1351 1389 i = max(0, actual // s)
1352 1390 di = actual - (i * s)
1353 1391 if self._inline:
1354 1392 databytes = 0
1355 1393 for r in self:
1356 1394 databytes += max(0, self.length(r))
1357 1395 dd = 0
1358 1396 di = actual - len(self) * s - databytes
1359 1397 except IOError, inst:
1360 1398 if inst.errno != errno.ENOENT:
1361 1399 raise
1362 1400 di = 0
1363 1401
1364 1402 return (dd, di)
1365 1403
1366 1404 def files(self):
1367 1405 res = [self.indexfile]
1368 1406 if not self._inline:
1369 1407 res.append(self.datafile)
1370 1408 return res
General Comments 0
You need to be logged in to leave comments. Login now