##// END OF EJS Templates
revlog: move ancestor generation out to a new class...
Siddharth Agarwal -
r18090:9abc55ef default
parent child Browse files
Show More
@@ -1,173 +1,221 b''
1 1 # ancestor.py - generic DAG ancestor algorithm for mercurial
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 import heapq
8 import heapq, util
9 9 from node import nullrev
10 10
11 11 def ancestor(a, b, pfunc):
12 12 """
13 13 Returns the common ancestor of a and b that is furthest from a
14 14 root (as measured by longest path) or None if no ancestor is
15 15 found. If there are multiple common ancestors at the same
16 16 distance, the first one found is returned.
17 17
18 18 pfunc must return a list of parent vertices for a given vertex
19 19 """
20 20
21 21 if a == b:
22 22 return a
23 23
24 24 a, b = sorted([a, b])
25 25
26 26 # find depth from root of all ancestors
27 27 # depth is stored as a negative for heapq
28 28 parentcache = {}
29 29 visit = [a, b]
30 30 depth = {}
31 31 while visit:
32 32 vertex = visit[-1]
33 33 pl = pfunc(vertex)
34 34 parentcache[vertex] = pl
35 35 if not pl:
36 36 depth[vertex] = 0
37 37 visit.pop()
38 38 else:
39 39 for p in pl:
40 40 if p == a or p == b: # did we find a or b as a parent?
41 41 return p # we're done
42 42 if p not in depth:
43 43 visit.append(p)
44 44 if visit[-1] == vertex:
45 45 # -(maximum distance of parents + 1)
46 46 depth[vertex] = min([depth[p] for p in pl]) - 1
47 47 visit.pop()
48 48
49 49 # traverse ancestors in order of decreasing distance from root
50 50 def ancestors(vertex):
51 51 h = [(depth[vertex], vertex)]
52 52 seen = set()
53 53 while h:
54 54 d, n = heapq.heappop(h)
55 55 if n not in seen:
56 56 seen.add(n)
57 57 yield (d, n)
58 58 for p in parentcache[n]:
59 59 heapq.heappush(h, (depth[p], p))
60 60
61 61 def generations(vertex):
62 62 sg, s = None, set()
63 63 for g, v in ancestors(vertex):
64 64 if g != sg:
65 65 if sg:
66 66 yield sg, s
67 67 sg, s = g, set((v,))
68 68 else:
69 69 s.add(v)
70 70 yield sg, s
71 71
72 72 x = generations(a)
73 73 y = generations(b)
74 74 gx = x.next()
75 75 gy = y.next()
76 76
77 77 # increment each ancestor list until it is closer to root than
78 78 # the other, or they match
79 79 try:
80 80 while True:
81 81 if gx[0] == gy[0]:
82 82 for v in gx[1]:
83 83 if v in gy[1]:
84 84 return v
85 85 gy = y.next()
86 86 gx = x.next()
87 87 elif gx[0] > gy[0]:
88 88 gy = y.next()
89 89 else:
90 90 gx = x.next()
91 91 except StopIteration:
92 92 return None
93 93
94 94 def missingancestors(revs, bases, pfunc):
95 95 """Return all the ancestors of revs that are not ancestors of bases.
96 96
97 97 This may include elements from revs.
98 98
99 99 Equivalent to the revset (::revs - ::bases). Revs are returned in
100 100 revision number order, which is a topological order.
101 101
102 102 revs and bases should both be iterables. pfunc must return a list of
103 103 parent revs for a given revs.
104 104 """
105 105
106 106 revsvisit = set(revs)
107 107 basesvisit = set(bases)
108 108 if not revsvisit:
109 109 return []
110 110 if not basesvisit:
111 111 basesvisit.add(nullrev)
112 112 start = max(max(revsvisit), max(basesvisit))
113 113 bothvisit = revsvisit.intersection(basesvisit)
114 114 revsvisit.difference_update(bothvisit)
115 115 basesvisit.difference_update(bothvisit)
116 116 # At this point, we hold the invariants that:
117 117 # - revsvisit is the set of nodes we know are an ancestor of at least one
118 118 # of the nodes in revs
119 119 # - basesvisit is the same for bases
120 120 # - bothvisit is the set of nodes we know are ancestors of at least one of
121 121 # the nodes in revs and one of the nodes in bases
122 122 # - a node may be in none or one, but not more, of revsvisit, basesvisit
123 123 # and bothvisit at any given time
124 124 # Now we walk down in reverse topo order, adding parents of nodes already
125 125 # visited to the sets while maintaining the invariants. When a node is
126 126 # found in both revsvisit and basesvisit, it is removed from them and
127 127 # added to bothvisit instead. When revsvisit becomes empty, there are no
128 128 # more ancestors of revs that aren't also ancestors of bases, so exit.
129 129
130 130 missing = []
131 131 for curr in xrange(start, nullrev, -1):
132 132 if not revsvisit:
133 133 break
134 134
135 135 if curr in bothvisit:
136 136 bothvisit.remove(curr)
137 137 # curr's parents might have made it into revsvisit or basesvisit
138 138 # through another path
139 139 for p in pfunc(curr):
140 140 revsvisit.discard(p)
141 141 basesvisit.discard(p)
142 142 bothvisit.add(p)
143 143 continue
144 144
145 145 # curr will never be in both revsvisit and basesvisit, since if it
146 146 # were it'd have been pushed to bothvisit
147 147 if curr in revsvisit:
148 148 missing.append(curr)
149 149 thisvisit = revsvisit
150 150 othervisit = basesvisit
151 151 elif curr in basesvisit:
152 152 thisvisit = basesvisit
153 153 othervisit = revsvisit
154 154 else:
155 155 # not an ancestor of revs or bases: ignore
156 156 continue
157 157
158 158 thisvisit.remove(curr)
159 159 for p in pfunc(curr):
160 160 if p == nullrev:
161 161 pass
162 162 elif p in othervisit or p in bothvisit:
163 163 # p is implicitly in thisvisit. This means p is or should be
164 164 # in bothvisit
165 165 revsvisit.discard(p)
166 166 basesvisit.discard(p)
167 167 bothvisit.add(p)
168 168 else:
169 169 # visit later
170 170 thisvisit.add(p)
171 171
172 172 missing.reverse()
173 173 return missing
174
175 class lazyancestors(object):
176 def __init__(self, cl, revs, stoprev=0, inclusive=False):
177 """Create a new object generating ancestors for the given revs. Does
178 not generate revs lower than stoprev.
179
180 This is computed lazily starting from revs. The object only supports
181 iteration.
182
183 cl should be a changelog and revs should be an iterable. inclusive is
184 a boolean that indicates whether revs should be included. Revs lower
185 than stoprev will not be generated.
186
187 Result does not include the null revision."""
188 self._parentrevs = cl.parentrevs
189 self._initrevs = revs
190 self._stoprev = stoprev
191 self._inclusive = inclusive
192
193 def __iter__(self):
194 """Generate the ancestors of _initrevs in reverse topological order.
195
196 If inclusive is False, yield a sequence of revision numbers starting
197 with the parents of each revision in revs, i.e., each revision is *not*
198 considered an ancestor of itself. Results are in breadth-first order:
199 parents of each rev in revs, then parents of those, etc.
200
201 If inclusive is True, yield all the revs first (ignoring stoprev),
202 then yield all the ancestors of revs as when inclusive is False.
203 If an element in revs is an ancestor of a different rev it is not
204 yielded again."""
205 seen = set()
206 revs = self._initrevs
207 if self._inclusive:
208 for rev in revs:
209 yield rev
210 seen.update(revs)
211
212 parentrevs = self._parentrevs
213 stoprev = self._stoprev
214 visit = util.deque(revs)
215
216 while visit:
217 for parent in parentrevs(visit.popleft()):
218 if parent >= stoprev and parent not in seen:
219 visit.append(parent)
220 seen.add(parent)
221 yield parent
@@ -1,1358 +1,1337 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, dagutil
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def decompress(bin):
79 79 """ decompress the given input """
80 80 if not bin:
81 81 return bin
82 82 t = bin[0]
83 83 if t == '\0':
84 84 return bin
85 85 if t == 'x':
86 86 try:
87 87 return _decompress(bin)
88 88 except zlib.error, e:
89 89 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 90 if t == 'u':
91 91 return bin[1:]
92 92 raise RevlogError(_("unknown compression type %r") % t)
93 93
94 94 indexformatv0 = ">4l20s20s20s"
95 95 v0shaoffset = 56
96 96
97 97 class revlogoldio(object):
98 98 def __init__(self):
99 99 self.size = struct.calcsize(indexformatv0)
100 100
101 101 def parseindex(self, data, inline):
102 102 s = self.size
103 103 index = []
104 104 nodemap = {nullid: nullrev}
105 105 n = off = 0
106 106 l = len(data)
107 107 while off + s <= l:
108 108 cur = data[off:off + s]
109 109 off += s
110 110 e = _unpack(indexformatv0, cur)
111 111 # transform to revlogv1 format
112 112 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
113 113 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
114 114 index.append(e2)
115 115 nodemap[e[6]] = n
116 116 n += 1
117 117
118 118 # add the magic null revision at -1
119 119 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
120 120
121 121 return index, nodemap, None
122 122
123 123 def packentry(self, entry, node, version, rev):
124 124 if gettype(entry[0]):
125 125 raise RevlogError(_("index entry flags need RevlogNG"))
126 126 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
127 127 node(entry[5]), node(entry[6]), entry[7])
128 128 return _pack(indexformatv0, *e2)
129 129
130 130 # index ng:
131 131 # 6 bytes: offset
132 132 # 2 bytes: flags
133 133 # 4 bytes: compressed length
134 134 # 4 bytes: uncompressed length
135 135 # 4 bytes: base rev
136 136 # 4 bytes: link rev
137 137 # 4 bytes: parent 1 rev
138 138 # 4 bytes: parent 2 rev
139 139 # 32 bytes: nodeid
140 140 indexformatng = ">Qiiiiii20s12x"
141 141 ngshaoffset = 32
142 142 versionformat = ">I"
143 143
144 144 class revlogio(object):
145 145 def __init__(self):
146 146 self.size = struct.calcsize(indexformatng)
147 147
148 148 def parseindex(self, data, inline):
149 149 # call the C implementation to parse the index data
150 150 index, cache = parsers.parse_index2(data, inline)
151 151 return index, getattr(index, 'nodemap', None), cache
152 152
153 153 def packentry(self, entry, node, version, rev):
154 154 p = _pack(indexformatng, *entry)
155 155 if rev == 0:
156 156 p = _pack(versionformat, version) + p[4:]
157 157 return p
158 158
159 159 class revlog(object):
160 160 """
161 161 the underlying revision storage object
162 162
163 163 A revlog consists of two parts, an index and the revision data.
164 164
165 165 The index is a file with a fixed record size containing
166 166 information on each revision, including its nodeid (hash), the
167 167 nodeids of its parents, the position and offset of its data within
168 168 the data file, and the revision it's based on. Finally, each entry
169 169 contains a linkrev entry that can serve as a pointer to external
170 170 data.
171 171
172 172 The revision data itself is a linear collection of data chunks.
173 173 Each chunk represents a revision and is usually represented as a
174 174 delta against the previous chunk. To bound lookup time, runs of
175 175 deltas are limited to about 2 times the length of the original
176 176 version data. This makes retrieval of a version proportional to
177 177 its size, or O(1) relative to the number of revisions.
178 178
179 179 Both pieces of the revlog are written to in an append-only
180 180 fashion, which means we never need to rewrite a file to insert or
181 181 remove data, and can use some simple techniques to avoid the need
182 182 for locking while reading.
183 183 """
184 184 def __init__(self, opener, indexfile):
185 185 """
186 186 create a revlog object
187 187
188 188 opener is a function that abstracts the file opening operation
189 189 and can be used to implement COW semantics or the like.
190 190 """
191 191 self.indexfile = indexfile
192 192 self.datafile = indexfile[:-2] + ".d"
193 193 self.opener = opener
194 194 self._cache = None
195 195 self._basecache = (0, 0)
196 196 self._chunkcache = (0, '')
197 197 self.index = []
198 198 self._pcache = {}
199 199 self._nodecache = {nullid: nullrev}
200 200 self._nodepos = None
201 201
202 202 v = REVLOG_DEFAULT_VERSION
203 203 opts = getattr(opener, 'options', None)
204 204 if opts is not None:
205 205 if 'revlogv1' in opts:
206 206 if 'generaldelta' in opts:
207 207 v |= REVLOGGENERALDELTA
208 208 else:
209 209 v = 0
210 210
211 211 i = ''
212 212 self._initempty = True
213 213 try:
214 214 f = self.opener(self.indexfile)
215 215 i = f.read()
216 216 f.close()
217 217 if len(i) > 0:
218 218 v = struct.unpack(versionformat, i[:4])[0]
219 219 self._initempty = False
220 220 except IOError, inst:
221 221 if inst.errno != errno.ENOENT:
222 222 raise
223 223
224 224 self.version = v
225 225 self._inline = v & REVLOGNGINLINEDATA
226 226 self._generaldelta = v & REVLOGGENERALDELTA
227 227 flags = v & ~0xFFFF
228 228 fmt = v & 0xFFFF
229 229 if fmt == REVLOGV0 and flags:
230 230 raise RevlogError(_("index %s unknown flags %#04x for format v0")
231 231 % (self.indexfile, flags >> 16))
232 232 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
233 233 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
234 234 % (self.indexfile, flags >> 16))
235 235 elif fmt > REVLOGNG:
236 236 raise RevlogError(_("index %s unknown format %d")
237 237 % (self.indexfile, fmt))
238 238
239 239 self._io = revlogio()
240 240 if self.version == REVLOGV0:
241 241 self._io = revlogoldio()
242 242 try:
243 243 d = self._io.parseindex(i, self._inline)
244 244 except (ValueError, IndexError):
245 245 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
246 246 self.index, nodemap, self._chunkcache = d
247 247 if nodemap is not None:
248 248 self.nodemap = self._nodecache = nodemap
249 249 if not self._chunkcache:
250 250 self._chunkclear()
251 251
252 252 def tip(self):
253 253 return self.node(len(self.index) - 2)
254 254 def __len__(self):
255 255 return len(self.index) - 1
256 256 def __iter__(self):
257 257 return iter(xrange(len(self)))
258 258 def revs(self, start=0, stop=None):
259 259 """iterate over all rev in this revlog (from start to stop)"""
260 260 step = 1
261 261 if stop is not None:
262 262 if start > stop:
263 263 step = -1
264 264 stop += step
265 265 else:
266 266 stop = len(self)
267 267 return xrange(start, stop, step)
268 268
269 269 @util.propertycache
270 270 def nodemap(self):
271 271 self.rev(self.node(0))
272 272 return self._nodecache
273 273
274 274 def hasnode(self, node):
275 275 try:
276 276 self.rev(node)
277 277 return True
278 278 except KeyError:
279 279 return False
280 280
281 281 def clearcaches(self):
282 282 try:
283 283 self._nodecache.clearcaches()
284 284 except AttributeError:
285 285 self._nodecache = {nullid: nullrev}
286 286 self._nodepos = None
287 287
288 288 def rev(self, node):
289 289 try:
290 290 return self._nodecache[node]
291 291 except RevlogError:
292 292 # parsers.c radix tree lookup failed
293 293 raise LookupError(node, self.indexfile, _('no node'))
294 294 except KeyError:
295 295 # pure python cache lookup failed
296 296 n = self._nodecache
297 297 i = self.index
298 298 p = self._nodepos
299 299 if p is None:
300 300 p = len(i) - 2
301 301 for r in xrange(p, -1, -1):
302 302 v = i[r][7]
303 303 n[v] = r
304 304 if v == node:
305 305 self._nodepos = r - 1
306 306 return r
307 307 raise LookupError(node, self.indexfile, _('no node'))
308 308
309 309 def node(self, rev):
310 310 return self.index[rev][7]
311 311 def linkrev(self, rev):
312 312 return self.index[rev][4]
313 313 def parents(self, node):
314 314 i = self.index
315 315 d = i[self.rev(node)]
316 316 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
317 317 def parentrevs(self, rev):
318 318 return self.index[rev][5:7]
319 319 def start(self, rev):
320 320 return int(self.index[rev][0] >> 16)
321 321 def end(self, rev):
322 322 return self.start(rev) + self.length(rev)
323 323 def length(self, rev):
324 324 return self.index[rev][1]
325 325 def chainbase(self, rev):
326 326 index = self.index
327 327 base = index[rev][3]
328 328 while base != rev:
329 329 rev = base
330 330 base = index[rev][3]
331 331 return base
332 332 def flags(self, rev):
333 333 return self.index[rev][0] & 0xFFFF
334 334 def rawsize(self, rev):
335 335 """return the length of the uncompressed text for a given revision"""
336 336 l = self.index[rev][2]
337 337 if l >= 0:
338 338 return l
339 339
340 340 t = self.revision(self.node(rev))
341 341 return len(t)
342 342 size = rawsize
343 343
344 344 def ancestors(self, revs, stoprev=0, inclusive=False):
345 345 """Generate the ancestors of 'revs' in reverse topological order.
346 346 Does not generate revs lower than stoprev.
347 347
348 If inclusive is False, yield a sequence of revision numbers starting
349 with the parents of each revision in revs, i.e., each revision is *not*
350 considered an ancestor of itself. Results are in breadth-first order:
351 parents of each rev in revs, then parents of those, etc.
352
353 If inclusive is True, yield all the revs first (ignoring stoprev),
354 then yield all the ancestors of revs as when inclusive is False.
355 If an element in revs is an ancestor of a different rev it is not
356 yielded again.
348 See the documentation for ancestor.lazyancestors for more details."""
357 349
358 Result does not include the null revision."""
359 visit = util.deque(revs)
360 seen = set([nullrev])
361 if inclusive:
362 for rev in revs:
363 yield rev
364 seen.update(revs)
365 while visit:
366 for parent in self.parentrevs(visit.popleft()):
367 if parent < stoprev:
368 continue
369 if parent not in seen:
370 visit.append(parent)
371 seen.add(parent)
372 yield parent
350 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
351 inclusive=inclusive)
373 352
374 353 def descendants(self, revs):
375 354 """Generate the descendants of 'revs' in revision order.
376 355
377 356 Yield a sequence of revision numbers starting with a child of
378 357 some rev in revs, i.e., each revision is *not* considered a
379 358 descendant of itself. Results are ordered by revision number (a
380 359 topological sort)."""
381 360 first = min(revs)
382 361 if first == nullrev:
383 362 for i in self:
384 363 yield i
385 364 return
386 365
387 366 seen = set(revs)
388 367 for i in self.revs(start=first + 1):
389 368 for x in self.parentrevs(i):
390 369 if x != nullrev and x in seen:
391 370 seen.add(i)
392 371 yield i
393 372 break
394 373
395 374 def findcommonmissing(self, common=None, heads=None):
396 375 """Return a tuple of the ancestors of common and the ancestors of heads
397 376 that are not ancestors of common. In revset terminology, we return the
398 377 tuple:
399 378
400 379 ::common, (::heads) - (::common)
401 380
402 381 The list is sorted by revision number, meaning it is
403 382 topologically sorted.
404 383
405 384 'heads' and 'common' are both lists of node IDs. If heads is
406 385 not supplied, uses all of the revlog's heads. If common is not
407 386 supplied, uses nullid."""
408 387 if common is None:
409 388 common = [nullid]
410 389 if heads is None:
411 390 heads = self.heads()
412 391
413 392 common = [self.rev(n) for n in common]
414 393 heads = [self.rev(n) for n in heads]
415 394
416 395 # we want the ancestors, but inclusive
417 396 has = set(self.ancestors(common))
418 397 has.add(nullrev)
419 398 has.update(common)
420 399
421 400 # take all ancestors from heads that aren't in has
422 401 missing = set()
423 402 visit = util.deque(r for r in heads if r not in has)
424 403 while visit:
425 404 r = visit.popleft()
426 405 if r in missing:
427 406 continue
428 407 else:
429 408 missing.add(r)
430 409 for p in self.parentrevs(r):
431 410 if p not in has:
432 411 visit.append(p)
433 412 missing = list(missing)
434 413 missing.sort()
435 414 return has, [self.node(r) for r in missing]
436 415
437 416 def findmissingrevs(self, common=None, heads=None):
438 417 """Return the revision numbers of the ancestors of heads that
439 418 are not ancestors of common.
440 419
441 420 More specifically, return a list of revision numbers corresponding to
442 421 nodes N such that every N satisfies the following constraints:
443 422
444 423 1. N is an ancestor of some node in 'heads'
445 424 2. N is not an ancestor of any node in 'common'
446 425
447 426 The list is sorted by revision number, meaning it is
448 427 topologically sorted.
449 428
450 429 'heads' and 'common' are both lists of revision numbers. If heads is
451 430 not supplied, uses all of the revlog's heads. If common is not
452 431 supplied, uses nullid."""
453 432 if common is None:
454 433 common = [nullrev]
455 434 if heads is None:
456 435 heads = self.headrevs()
457 436
458 437 return ancestor.missingancestors(heads, common, self.parentrevs)
459 438
460 439 def findmissing(self, common=None, heads=None):
461 440 """Return the ancestors of heads that are not ancestors of common.
462 441
463 442 More specifically, return a list of nodes N such that every N
464 443 satisfies the following constraints:
465 444
466 445 1. N is an ancestor of some node in 'heads'
467 446 2. N is not an ancestor of any node in 'common'
468 447
469 448 The list is sorted by revision number, meaning it is
470 449 topologically sorted.
471 450
472 451 'heads' and 'common' are both lists of node IDs. If heads is
473 452 not supplied, uses all of the revlog's heads. If common is not
474 453 supplied, uses nullid."""
475 454 if common is None:
476 455 common = [nullid]
477 456 if heads is None:
478 457 heads = self.heads()
479 458
480 459 common = [self.rev(n) for n in common]
481 460 heads = [self.rev(n) for n in heads]
482 461
483 462 return [self.node(r) for r in
484 463 ancestor.missingancestors(heads, common, self.parentrevs)]
485 464
486 465 def nodesbetween(self, roots=None, heads=None):
487 466 """Return a topological path from 'roots' to 'heads'.
488 467
489 468 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
490 469 topologically sorted list of all nodes N that satisfy both of
491 470 these constraints:
492 471
493 472 1. N is a descendant of some node in 'roots'
494 473 2. N is an ancestor of some node in 'heads'
495 474
496 475 Every node is considered to be both a descendant and an ancestor
497 476 of itself, so every reachable node in 'roots' and 'heads' will be
498 477 included in 'nodes'.
499 478
500 479 'outroots' is the list of reachable nodes in 'roots', i.e., the
501 480 subset of 'roots' that is returned in 'nodes'. Likewise,
502 481 'outheads' is the subset of 'heads' that is also in 'nodes'.
503 482
504 483 'roots' and 'heads' are both lists of node IDs. If 'roots' is
505 484 unspecified, uses nullid as the only root. If 'heads' is
506 485 unspecified, uses list of all of the revlog's heads."""
507 486 nonodes = ([], [], [])
508 487 if roots is not None:
509 488 roots = list(roots)
510 489 if not roots:
511 490 return nonodes
512 491 lowestrev = min([self.rev(n) for n in roots])
513 492 else:
514 493 roots = [nullid] # Everybody's a descendant of nullid
515 494 lowestrev = nullrev
516 495 if (lowestrev == nullrev) and (heads is None):
517 496 # We want _all_ the nodes!
518 497 return ([self.node(r) for r in self], [nullid], list(self.heads()))
519 498 if heads is None:
520 499 # All nodes are ancestors, so the latest ancestor is the last
521 500 # node.
522 501 highestrev = len(self) - 1
523 502 # Set ancestors to None to signal that every node is an ancestor.
524 503 ancestors = None
525 504 # Set heads to an empty dictionary for later discovery of heads
526 505 heads = {}
527 506 else:
528 507 heads = list(heads)
529 508 if not heads:
530 509 return nonodes
531 510 ancestors = set()
532 511 # Turn heads into a dictionary so we can remove 'fake' heads.
533 512 # Also, later we will be using it to filter out the heads we can't
534 513 # find from roots.
535 514 heads = dict.fromkeys(heads, False)
536 515 # Start at the top and keep marking parents until we're done.
537 516 nodestotag = set(heads)
538 517 # Remember where the top was so we can use it as a limit later.
539 518 highestrev = max([self.rev(n) for n in nodestotag])
540 519 while nodestotag:
541 520 # grab a node to tag
542 521 n = nodestotag.pop()
543 522 # Never tag nullid
544 523 if n == nullid:
545 524 continue
546 525 # A node's revision number represents its place in a
547 526 # topologically sorted list of nodes.
548 527 r = self.rev(n)
549 528 if r >= lowestrev:
550 529 if n not in ancestors:
551 530 # If we are possibly a descendant of one of the roots
552 531 # and we haven't already been marked as an ancestor
553 532 ancestors.add(n) # Mark as ancestor
554 533 # Add non-nullid parents to list of nodes to tag.
555 534 nodestotag.update([p for p in self.parents(n) if
556 535 p != nullid])
557 536 elif n in heads: # We've seen it before, is it a fake head?
558 537 # So it is, real heads should not be the ancestors of
559 538 # any other heads.
560 539 heads.pop(n)
561 540 if not ancestors:
562 541 return nonodes
563 542 # Now that we have our set of ancestors, we want to remove any
564 543 # roots that are not ancestors.
565 544
566 545 # If one of the roots was nullid, everything is included anyway.
567 546 if lowestrev > nullrev:
568 547 # But, since we weren't, let's recompute the lowest rev to not
569 548 # include roots that aren't ancestors.
570 549
571 550 # Filter out roots that aren't ancestors of heads
572 551 roots = [n for n in roots if n in ancestors]
573 552 # Recompute the lowest revision
574 553 if roots:
575 554 lowestrev = min([self.rev(n) for n in roots])
576 555 else:
577 556 # No more roots? Return empty list
578 557 return nonodes
579 558 else:
580 559 # We are descending from nullid, and don't need to care about
581 560 # any other roots.
582 561 lowestrev = nullrev
583 562 roots = [nullid]
584 563 # Transform our roots list into a set.
585 564 descendants = set(roots)
586 565 # Also, keep the original roots so we can filter out roots that aren't
587 566 # 'real' roots (i.e. are descended from other roots).
588 567 roots = descendants.copy()
589 568 # Our topologically sorted list of output nodes.
590 569 orderedout = []
591 570 # Don't start at nullid since we don't want nullid in our output list,
592 571 # and if nullid shows up in descendants, empty parents will look like
593 572 # they're descendants.
594 573 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
595 574 n = self.node(r)
596 575 isdescendant = False
597 576 if lowestrev == nullrev: # Everybody is a descendant of nullid
598 577 isdescendant = True
599 578 elif n in descendants:
600 579 # n is already a descendant
601 580 isdescendant = True
602 581 # This check only needs to be done here because all the roots
603 582 # will start being marked is descendants before the loop.
604 583 if n in roots:
605 584 # If n was a root, check if it's a 'real' root.
606 585 p = tuple(self.parents(n))
607 586 # If any of its parents are descendants, it's not a root.
608 587 if (p[0] in descendants) or (p[1] in descendants):
609 588 roots.remove(n)
610 589 else:
611 590 p = tuple(self.parents(n))
612 591 # A node is a descendant if either of its parents are
613 592 # descendants. (We seeded the dependents list with the roots
614 593 # up there, remember?)
615 594 if (p[0] in descendants) or (p[1] in descendants):
616 595 descendants.add(n)
617 596 isdescendant = True
618 597 if isdescendant and ((ancestors is None) or (n in ancestors)):
619 598 # Only include nodes that are both descendants and ancestors.
620 599 orderedout.append(n)
621 600 if (ancestors is not None) and (n in heads):
622 601 # We're trying to figure out which heads are reachable
623 602 # from roots.
624 603 # Mark this head as having been reached
625 604 heads[n] = True
626 605 elif ancestors is None:
627 606 # Otherwise, we're trying to discover the heads.
628 607 # Assume this is a head because if it isn't, the next step
629 608 # will eventually remove it.
630 609 heads[n] = True
631 610 # But, obviously its parents aren't.
632 611 for p in self.parents(n):
633 612 heads.pop(p, None)
634 613 heads = [n for n, flag in heads.iteritems() if flag]
635 614 roots = list(roots)
636 615 assert orderedout
637 616 assert roots
638 617 assert heads
639 618 return (orderedout, roots, heads)
640 619
641 620 def headrevs(self):
642 621 try:
643 622 return self.index.headrevs()
644 623 except AttributeError:
645 624 return self._headrevs()
646 625
647 626 def _headrevs(self):
648 627 count = len(self)
649 628 if not count:
650 629 return [nullrev]
651 630 # we won't iter over filtered rev so nobody is a head at start
652 631 ishead = [0] * (count + 1)
653 632 index = self.index
654 633 for r in self:
655 634 ishead[r] = 1 # I may be an head
656 635 e = index[r]
657 636 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
658 637 return [r for r, val in enumerate(ishead) if val]
659 638
660 639 def heads(self, start=None, stop=None):
661 640 """return the list of all nodes that have no children
662 641
663 642 if start is specified, only heads that are descendants of
664 643 start will be returned
665 644 if stop is specified, it will consider all the revs from stop
666 645 as if they had no children
667 646 """
668 647 if start is None and stop is None:
669 648 if not len(self):
670 649 return [nullid]
671 650 return [self.node(r) for r in self.headrevs()]
672 651
673 652 if start is None:
674 653 start = nullid
675 654 if stop is None:
676 655 stop = []
677 656 stoprevs = set([self.rev(n) for n in stop])
678 657 startrev = self.rev(start)
679 658 reachable = set((startrev,))
680 659 heads = set((startrev,))
681 660
682 661 parentrevs = self.parentrevs
683 662 for r in self.revs(start=startrev + 1):
684 663 for p in parentrevs(r):
685 664 if p in reachable:
686 665 if r not in stoprevs:
687 666 reachable.add(r)
688 667 heads.add(r)
689 668 if p in heads and p not in stoprevs:
690 669 heads.remove(p)
691 670
692 671 return [self.node(r) for r in heads]
693 672
694 673 def children(self, node):
695 674 """find the children of a given node"""
696 675 c = []
697 676 p = self.rev(node)
698 677 for r in self.revs(start=p + 1):
699 678 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
700 679 if prevs:
701 680 for pr in prevs:
702 681 if pr == p:
703 682 c.append(self.node(r))
704 683 elif p == nullrev:
705 684 c.append(self.node(r))
706 685 return c
707 686
708 687 def descendant(self, start, end):
709 688 if start == nullrev:
710 689 return True
711 690 for i in self.descendants([start]):
712 691 if i == end:
713 692 return True
714 693 elif i > end:
715 694 break
716 695 return False
717 696
718 697 def ancestor(self, a, b):
719 698 """calculate the least common ancestor of nodes a and b"""
720 699
721 700 # fast path, check if it is a descendant
722 701 a, b = self.rev(a), self.rev(b)
723 702 start, end = sorted((a, b))
724 703 if self.descendant(start, end):
725 704 return self.node(start)
726 705
727 706 def parents(rev):
728 707 return [p for p in self.parentrevs(rev) if p != nullrev]
729 708
730 709 c = ancestor.ancestor(a, b, parents)
731 710 if c is None:
732 711 return nullid
733 712
734 713 return self.node(c)
735 714
736 715 def _match(self, id):
737 716 if isinstance(id, int):
738 717 # rev
739 718 return self.node(id)
740 719 if len(id) == 20:
741 720 # possibly a binary node
742 721 # odds of a binary node being all hex in ASCII are 1 in 10**25
743 722 try:
744 723 node = id
745 724 self.rev(node) # quick search the index
746 725 return node
747 726 except LookupError:
748 727 pass # may be partial hex id
749 728 try:
750 729 # str(rev)
751 730 rev = int(id)
752 731 if str(rev) != id:
753 732 raise ValueError
754 733 if rev < 0:
755 734 rev = len(self) + rev
756 735 if rev < 0 or rev >= len(self):
757 736 raise ValueError
758 737 return self.node(rev)
759 738 except (ValueError, OverflowError):
760 739 pass
761 740 if len(id) == 40:
762 741 try:
763 742 # a full hex nodeid?
764 743 node = bin(id)
765 744 self.rev(node)
766 745 return node
767 746 except (TypeError, LookupError):
768 747 pass
769 748
770 749 def _partialmatch(self, id):
771 750 try:
772 751 return self.index.partialmatch(id)
773 752 except RevlogError:
774 753 # parsers.c radix tree lookup gave multiple matches
775 754 raise LookupError(id, self.indexfile, _("ambiguous identifier"))
776 755 except (AttributeError, ValueError):
777 756 # we are pure python, or key was too short to search radix tree
778 757 pass
779 758
780 759 if id in self._pcache:
781 760 return self._pcache[id]
782 761
783 762 if len(id) < 40:
784 763 try:
785 764 # hex(node)[:...]
786 765 l = len(id) // 2 # grab an even number of digits
787 766 prefix = bin(id[:l * 2])
788 767 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
789 768 nl = [n for n in nl if hex(n).startswith(id)]
790 769 if len(nl) > 0:
791 770 if len(nl) == 1:
792 771 self._pcache[id] = nl[0]
793 772 return nl[0]
794 773 raise LookupError(id, self.indexfile,
795 774 _('ambiguous identifier'))
796 775 return None
797 776 except TypeError:
798 777 pass
799 778
800 779 def lookup(self, id):
801 780 """locate a node based on:
802 781 - revision number or str(revision number)
803 782 - nodeid or subset of hex nodeid
804 783 """
805 784 n = self._match(id)
806 785 if n is not None:
807 786 return n
808 787 n = self._partialmatch(id)
809 788 if n:
810 789 return n
811 790
812 791 raise LookupError(id, self.indexfile, _('no match found'))
813 792
814 793 def cmp(self, node, text):
815 794 """compare text with a given file revision
816 795
817 796 returns True if text is different than what is stored.
818 797 """
819 798 p1, p2 = self.parents(node)
820 799 return hash(text, p1, p2) != node
821 800
822 801 def _addchunk(self, offset, data):
823 802 o, d = self._chunkcache
824 803 # try to add to existing cache
825 804 if o + len(d) == offset and len(d) + len(data) < _chunksize:
826 805 self._chunkcache = o, d + data
827 806 else:
828 807 self._chunkcache = offset, data
829 808
830 809 def _loadchunk(self, offset, length):
831 810 if self._inline:
832 811 df = self.opener(self.indexfile)
833 812 else:
834 813 df = self.opener(self.datafile)
835 814
836 815 readahead = max(65536, length)
837 816 df.seek(offset)
838 817 d = df.read(readahead)
839 818 df.close()
840 819 self._addchunk(offset, d)
841 820 if readahead > length:
842 821 return util.buffer(d, 0, length)
843 822 return d
844 823
845 824 def _getchunk(self, offset, length):
846 825 o, d = self._chunkcache
847 826 l = len(d)
848 827
849 828 # is it in the cache?
850 829 cachestart = offset - o
851 830 cacheend = cachestart + length
852 831 if cachestart >= 0 and cacheend <= l:
853 832 if cachestart == 0 and cacheend == l:
854 833 return d # avoid a copy
855 834 return util.buffer(d, cachestart, cacheend - cachestart)
856 835
857 836 return self._loadchunk(offset, length)
858 837
859 838 def _chunkraw(self, startrev, endrev):
860 839 start = self.start(startrev)
861 840 length = self.end(endrev) - start
862 841 if self._inline:
863 842 start += (startrev + 1) * self._io.size
864 843 return self._getchunk(start, length)
865 844
866 845 def _chunk(self, rev):
867 846 return decompress(self._chunkraw(rev, rev))
868 847
869 848 def _chunkbase(self, rev):
870 849 return self._chunk(rev)
871 850
872 851 def _chunkclear(self):
873 852 self._chunkcache = (0, '')
874 853
875 854 def deltaparent(self, rev):
876 855 """return deltaparent of the given revision"""
877 856 base = self.index[rev][3]
878 857 if base == rev:
879 858 return nullrev
880 859 elif self._generaldelta:
881 860 return base
882 861 else:
883 862 return rev - 1
884 863
885 864 def revdiff(self, rev1, rev2):
886 865 """return or calculate a delta between two revisions"""
887 866 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
888 867 return str(self._chunk(rev2))
889 868
890 869 return mdiff.textdiff(self.revision(rev1),
891 870 self.revision(rev2))
892 871
893 872 def revision(self, nodeorrev):
894 873 """return an uncompressed revision of a given node or revision
895 874 number.
896 875 """
897 876 if isinstance(nodeorrev, int):
898 877 rev = nodeorrev
899 878 node = self.node(rev)
900 879 else:
901 880 node = nodeorrev
902 881 rev = None
903 882
904 883 cachedrev = None
905 884 if node == nullid:
906 885 return ""
907 886 if self._cache:
908 887 if self._cache[0] == node:
909 888 return self._cache[2]
910 889 cachedrev = self._cache[1]
911 890
912 891 # look up what we need to read
913 892 text = None
914 893 if rev is None:
915 894 rev = self.rev(node)
916 895
917 896 # check rev flags
918 897 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
919 898 raise RevlogError(_('incompatible revision flag %x') %
920 899 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
921 900
922 901 # build delta chain
923 902 chain = []
924 903 index = self.index # for performance
925 904 generaldelta = self._generaldelta
926 905 iterrev = rev
927 906 e = index[iterrev]
928 907 while iterrev != e[3] and iterrev != cachedrev:
929 908 chain.append(iterrev)
930 909 if generaldelta:
931 910 iterrev = e[3]
932 911 else:
933 912 iterrev -= 1
934 913 e = index[iterrev]
935 914 chain.reverse()
936 915 base = iterrev
937 916
938 917 if iterrev == cachedrev:
939 918 # cache hit
940 919 text = self._cache[2]
941 920
942 921 # drop cache to save memory
943 922 self._cache = None
944 923
945 924 self._chunkraw(base, rev)
946 925 if text is None:
947 926 text = str(self._chunkbase(base))
948 927
949 928 bins = [self._chunk(r) for r in chain]
950 929 text = mdiff.patches(text, bins)
951 930
952 931 text = self._checkhash(text, node, rev)
953 932
954 933 self._cache = (node, rev, text)
955 934 return text
956 935
957 936 def _checkhash(self, text, node, rev):
958 937 p1, p2 = self.parents(node)
959 938 if node != hash(text, p1, p2):
960 939 raise RevlogError(_("integrity check failed on %s:%d")
961 940 % (self.indexfile, rev))
962 941 return text
963 942
964 943 def checkinlinesize(self, tr, fp=None):
965 944 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
966 945 return
967 946
968 947 trinfo = tr.find(self.indexfile)
969 948 if trinfo is None:
970 949 raise RevlogError(_("%s not found in the transaction")
971 950 % self.indexfile)
972 951
973 952 trindex = trinfo[2]
974 953 dataoff = self.start(trindex)
975 954
976 955 tr.add(self.datafile, dataoff)
977 956
978 957 if fp:
979 958 fp.flush()
980 959 fp.close()
981 960
982 961 df = self.opener(self.datafile, 'w')
983 962 try:
984 963 for r in self:
985 964 df.write(self._chunkraw(r, r))
986 965 finally:
987 966 df.close()
988 967
989 968 fp = self.opener(self.indexfile, 'w', atomictemp=True)
990 969 self.version &= ~(REVLOGNGINLINEDATA)
991 970 self._inline = False
992 971 for i in self:
993 972 e = self._io.packentry(self.index[i], self.node, self.version, i)
994 973 fp.write(e)
995 974
996 975 # if we don't call close, the temp file will never replace the
997 976 # real index
998 977 fp.close()
999 978
1000 979 tr.replace(self.indexfile, trindex * self._io.size)
1001 980 self._chunkclear()
1002 981
1003 982 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
1004 983 """add a revision to the log
1005 984
1006 985 text - the revision data to add
1007 986 transaction - the transaction object used for rollback
1008 987 link - the linkrev data to add
1009 988 p1, p2 - the parent nodeids of the revision
1010 989 cachedelta - an optional precomputed delta
1011 990 """
1012 991 node = hash(text, p1, p2)
1013 992 if node in self.nodemap:
1014 993 return node
1015 994
1016 995 dfh = None
1017 996 if not self._inline:
1018 997 dfh = self.opener(self.datafile, "a")
1019 998 ifh = self.opener(self.indexfile, "a+")
1020 999 try:
1021 1000 return self._addrevision(node, text, transaction, link, p1, p2,
1022 1001 cachedelta, ifh, dfh)
1023 1002 finally:
1024 1003 if dfh:
1025 1004 dfh.close()
1026 1005 ifh.close()
1027 1006
1028 1007 def compress(self, text):
1029 1008 """ generate a possibly-compressed representation of text """
1030 1009 if not text:
1031 1010 return ("", text)
1032 1011 l = len(text)
1033 1012 bin = None
1034 1013 if l < 44:
1035 1014 pass
1036 1015 elif l > 1000000:
1037 1016 # zlib makes an internal copy, thus doubling memory usage for
1038 1017 # large files, so lets do this in pieces
1039 1018 z = zlib.compressobj()
1040 1019 p = []
1041 1020 pos = 0
1042 1021 while pos < l:
1043 1022 pos2 = pos + 2**20
1044 1023 p.append(z.compress(text[pos:pos2]))
1045 1024 pos = pos2
1046 1025 p.append(z.flush())
1047 1026 if sum(map(len, p)) < l:
1048 1027 bin = "".join(p)
1049 1028 else:
1050 1029 bin = _compress(text)
1051 1030 if bin is None or len(bin) > l:
1052 1031 if text[0] == '\0':
1053 1032 return ("", text)
1054 1033 return ('u', text)
1055 1034 return ("", bin)
1056 1035
1057 1036 def _addrevision(self, node, text, transaction, link, p1, p2,
1058 1037 cachedelta, ifh, dfh):
1059 1038 """internal function to add revisions to the log
1060 1039
1061 1040 see addrevision for argument descriptions.
1062 1041 invariants:
1063 1042 - text is optional (can be None); if not set, cachedelta must be set.
1064 1043 if both are set, they must correspond to each other.
1065 1044 """
1066 1045 btext = [text]
1067 1046 def buildtext():
1068 1047 if btext[0] is not None:
1069 1048 return btext[0]
1070 1049 # flush any pending writes here so we can read it in revision
1071 1050 if dfh:
1072 1051 dfh.flush()
1073 1052 ifh.flush()
1074 1053 basetext = self.revision(self.node(cachedelta[0]))
1075 1054 btext[0] = mdiff.patch(basetext, cachedelta[1])
1076 1055 chk = hash(btext[0], p1, p2)
1077 1056 if chk != node:
1078 1057 raise RevlogError(_("consistency error in delta"))
1079 1058 return btext[0]
1080 1059
1081 1060 def builddelta(rev):
1082 1061 # can we use the cached delta?
1083 1062 if cachedelta and cachedelta[0] == rev:
1084 1063 delta = cachedelta[1]
1085 1064 else:
1086 1065 t = buildtext()
1087 1066 ptext = self.revision(self.node(rev))
1088 1067 delta = mdiff.textdiff(ptext, t)
1089 1068 data = self.compress(delta)
1090 1069 l = len(data[1]) + len(data[0])
1091 1070 if basecache[0] == rev:
1092 1071 chainbase = basecache[1]
1093 1072 else:
1094 1073 chainbase = self.chainbase(rev)
1095 1074 dist = l + offset - self.start(chainbase)
1096 1075 if self._generaldelta:
1097 1076 base = rev
1098 1077 else:
1099 1078 base = chainbase
1100 1079 return dist, l, data, base, chainbase
1101 1080
1102 1081 curr = len(self)
1103 1082 prev = curr - 1
1104 1083 base = chainbase = curr
1105 1084 offset = self.end(prev)
1106 1085 flags = 0
1107 1086 d = None
1108 1087 basecache = self._basecache
1109 1088 p1r, p2r = self.rev(p1), self.rev(p2)
1110 1089
1111 1090 # should we try to build a delta?
1112 1091 if prev != nullrev:
1113 1092 if self._generaldelta:
1114 1093 if p1r >= basecache[1]:
1115 1094 d = builddelta(p1r)
1116 1095 elif p2r >= basecache[1]:
1117 1096 d = builddelta(p2r)
1118 1097 else:
1119 1098 d = builddelta(prev)
1120 1099 else:
1121 1100 d = builddelta(prev)
1122 1101 dist, l, data, base, chainbase = d
1123 1102
1124 1103 # full versions are inserted when the needed deltas
1125 1104 # become comparable to the uncompressed text
1126 1105 if text is None:
1127 1106 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1128 1107 cachedelta[1])
1129 1108 else:
1130 1109 textlen = len(text)
1131 1110 if d is None or dist > textlen * 2:
1132 1111 text = buildtext()
1133 1112 data = self.compress(text)
1134 1113 l = len(data[1]) + len(data[0])
1135 1114 base = chainbase = curr
1136 1115
1137 1116 e = (offset_type(offset, flags), l, textlen,
1138 1117 base, link, p1r, p2r, node)
1139 1118 self.index.insert(-1, e)
1140 1119 self.nodemap[node] = curr
1141 1120
1142 1121 entry = self._io.packentry(e, self.node, self.version, curr)
1143 1122 if not self._inline:
1144 1123 transaction.add(self.datafile, offset)
1145 1124 transaction.add(self.indexfile, curr * len(entry))
1146 1125 if data[0]:
1147 1126 dfh.write(data[0])
1148 1127 dfh.write(data[1])
1149 1128 dfh.flush()
1150 1129 ifh.write(entry)
1151 1130 else:
1152 1131 offset += curr * self._io.size
1153 1132 transaction.add(self.indexfile, offset, curr)
1154 1133 ifh.write(entry)
1155 1134 ifh.write(data[0])
1156 1135 ifh.write(data[1])
1157 1136 self.checkinlinesize(transaction, ifh)
1158 1137
1159 1138 if type(text) == str: # only accept immutable objects
1160 1139 self._cache = (node, curr, text)
1161 1140 self._basecache = (curr, chainbase)
1162 1141 return node
1163 1142
1164 1143 def group(self, nodelist, bundler, reorder=None):
1165 1144 """Calculate a delta group, yielding a sequence of changegroup chunks
1166 1145 (strings).
1167 1146
1168 1147 Given a list of changeset revs, return a set of deltas and
1169 1148 metadata corresponding to nodes. The first delta is
1170 1149 first parent(nodelist[0]) -> nodelist[0], the receiver is
1171 1150 guaranteed to have this parent as it has all history before
1172 1151 these changesets. In the case firstparent is nullrev the
1173 1152 changegroup starts with a full revision.
1174 1153 """
1175 1154
1176 1155 # if we don't have any revisions touched by these changesets, bail
1177 1156 if len(nodelist) == 0:
1178 1157 yield bundler.close()
1179 1158 return
1180 1159
1181 1160 # for generaldelta revlogs, we linearize the revs; this will both be
1182 1161 # much quicker and generate a much smaller bundle
1183 1162 if (self._generaldelta and reorder is not False) or reorder:
1184 1163 dag = dagutil.revlogdag(self)
1185 1164 revs = set(self.rev(n) for n in nodelist)
1186 1165 revs = dag.linearize(revs)
1187 1166 else:
1188 1167 revs = sorted([self.rev(n) for n in nodelist])
1189 1168
1190 1169 # add the parent of the first rev
1191 1170 p = self.parentrevs(revs[0])[0]
1192 1171 revs.insert(0, p)
1193 1172
1194 1173 # build deltas
1195 1174 for r in xrange(len(revs) - 1):
1196 1175 prev, curr = revs[r], revs[r + 1]
1197 1176 for c in bundler.revchunk(self, curr, prev):
1198 1177 yield c
1199 1178
1200 1179 yield bundler.close()
1201 1180
1202 1181 def addgroup(self, bundle, linkmapper, transaction):
1203 1182 """
1204 1183 add a delta group
1205 1184
1206 1185 given a set of deltas, add them to the revision log. the
1207 1186 first delta is against its parent, which should be in our
1208 1187 log, the rest are against the previous delta.
1209 1188 """
1210 1189
1211 1190 # track the base of the current delta log
1212 1191 content = []
1213 1192 node = None
1214 1193
1215 1194 r = len(self)
1216 1195 end = 0
1217 1196 if r:
1218 1197 end = self.end(r - 1)
1219 1198 ifh = self.opener(self.indexfile, "a+")
1220 1199 isize = r * self._io.size
1221 1200 if self._inline:
1222 1201 transaction.add(self.indexfile, end + isize, r)
1223 1202 dfh = None
1224 1203 else:
1225 1204 transaction.add(self.indexfile, isize, r)
1226 1205 transaction.add(self.datafile, end)
1227 1206 dfh = self.opener(self.datafile, "a")
1228 1207
1229 1208 try:
1230 1209 # loop through our set of deltas
1231 1210 chain = None
1232 1211 while True:
1233 1212 chunkdata = bundle.deltachunk(chain)
1234 1213 if not chunkdata:
1235 1214 break
1236 1215 node = chunkdata['node']
1237 1216 p1 = chunkdata['p1']
1238 1217 p2 = chunkdata['p2']
1239 1218 cs = chunkdata['cs']
1240 1219 deltabase = chunkdata['deltabase']
1241 1220 delta = chunkdata['delta']
1242 1221
1243 1222 content.append(node)
1244 1223
1245 1224 link = linkmapper(cs)
1246 1225 if node in self.nodemap:
1247 1226 # this can happen if two branches make the same change
1248 1227 chain = node
1249 1228 continue
1250 1229
1251 1230 for p in (p1, p2):
1252 1231 if p not in self.nodemap:
1253 1232 raise LookupError(p, self.indexfile,
1254 1233 _('unknown parent'))
1255 1234
1256 1235 if deltabase not in self.nodemap:
1257 1236 raise LookupError(deltabase, self.indexfile,
1258 1237 _('unknown delta base'))
1259 1238
1260 1239 baserev = self.rev(deltabase)
1261 1240 chain = self._addrevision(node, None, transaction, link,
1262 1241 p1, p2, (baserev, delta), ifh, dfh)
1263 1242 if not dfh and not self._inline:
1264 1243 # addrevision switched from inline to conventional
1265 1244 # reopen the index
1266 1245 ifh.close()
1267 1246 dfh = self.opener(self.datafile, "a")
1268 1247 ifh = self.opener(self.indexfile, "a")
1269 1248 finally:
1270 1249 if dfh:
1271 1250 dfh.close()
1272 1251 ifh.close()
1273 1252
1274 1253 return content
1275 1254
1276 1255 def strip(self, minlink, transaction):
1277 1256 """truncate the revlog on the first revision with a linkrev >= minlink
1278 1257
1279 1258 This function is called when we're stripping revision minlink and
1280 1259 its descendants from the repository.
1281 1260
1282 1261 We have to remove all revisions with linkrev >= minlink, because
1283 1262 the equivalent changelog revisions will be renumbered after the
1284 1263 strip.
1285 1264
1286 1265 So we truncate the revlog on the first of these revisions, and
1287 1266 trust that the caller has saved the revisions that shouldn't be
1288 1267 removed and that it'll re-add them after this truncation.
1289 1268 """
1290 1269 if len(self) == 0:
1291 1270 return
1292 1271
1293 1272 for rev in self:
1294 1273 if self.index[rev][4] >= minlink:
1295 1274 break
1296 1275 else:
1297 1276 return
1298 1277
1299 1278 # first truncate the files on disk
1300 1279 end = self.start(rev)
1301 1280 if not self._inline:
1302 1281 transaction.add(self.datafile, end)
1303 1282 end = rev * self._io.size
1304 1283 else:
1305 1284 end += rev * self._io.size
1306 1285
1307 1286 transaction.add(self.indexfile, end)
1308 1287
1309 1288 # then reset internal state in memory to forget those revisions
1310 1289 self._cache = None
1311 1290 self._chunkclear()
1312 1291 for x in xrange(rev, len(self)):
1313 1292 del self.nodemap[self.node(x)]
1314 1293
1315 1294 del self.index[rev:-1]
1316 1295
1317 1296 def checksize(self):
1318 1297 expected = 0
1319 1298 if len(self):
1320 1299 expected = max(0, self.end(len(self) - 1))
1321 1300
1322 1301 try:
1323 1302 f = self.opener(self.datafile)
1324 1303 f.seek(0, 2)
1325 1304 actual = f.tell()
1326 1305 f.close()
1327 1306 dd = actual - expected
1328 1307 except IOError, inst:
1329 1308 if inst.errno != errno.ENOENT:
1330 1309 raise
1331 1310 dd = 0
1332 1311
1333 1312 try:
1334 1313 f = self.opener(self.indexfile)
1335 1314 f.seek(0, 2)
1336 1315 actual = f.tell()
1337 1316 f.close()
1338 1317 s = self._io.size
1339 1318 i = max(0, actual // s)
1340 1319 di = actual - (i * s)
1341 1320 if self._inline:
1342 1321 databytes = 0
1343 1322 for r in self:
1344 1323 databytes += max(0, self.length(r))
1345 1324 dd = 0
1346 1325 di = actual - len(self) * s - databytes
1347 1326 except IOError, inst:
1348 1327 if inst.errno != errno.ENOENT:
1349 1328 raise
1350 1329 di = 0
1351 1330
1352 1331 return (dd, di)
1353 1332
1354 1333 def files(self):
1355 1334 res = [self.indexfile]
1356 1335 if not self._inline:
1357 1336 res.append(self.datafile)
1358 1337 return res
General Comments 0
You need to be logged in to leave comments. Login now