##// END OF EJS Templates
revlog: improve delta generation heuristics for generaldelta...
Sune Foldager -
r14301:f9499376 default
parent child Browse files
Show More
@@ -1,1265 +1,1270
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import ancestor, mdiff, parsers, error, util
17 import ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog header flags
26 # revlog header flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGGENERALDELTA = (1 << 17)
30 REVLOGGENERALDELTA = (1 << 17)
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35
35
36 # revlog index flags
36 # revlog index flags
37 REVIDX_KNOWN_FLAGS = 0
37 REVIDX_KNOWN_FLAGS = 0
38
38
39 # max size of revlog with inline data
39 # max size of revlog with inline data
40 _maxinline = 131072
40 _maxinline = 131072
41 _chunksize = 1048576
41 _chunksize = 1048576
42
42
43 RevlogError = error.RevlogError
43 RevlogError = error.RevlogError
44 LookupError = error.LookupError
44 LookupError = error.LookupError
45
45
46 def getoffset(q):
46 def getoffset(q):
47 return int(q >> 16)
47 return int(q >> 16)
48
48
49 def gettype(q):
49 def gettype(q):
50 return int(q & 0xFFFF)
50 return int(q & 0xFFFF)
51
51
52 def offset_type(offset, type):
52 def offset_type(offset, type):
53 return long(long(offset) << 16 | type)
53 return long(long(offset) << 16 | type)
54
54
55 nullhash = _sha(nullid)
55 nullhash = _sha(nullid)
56
56
57 def hash(text, p1, p2):
57 def hash(text, p1, p2):
58 """generate a hash from the given text and its parent hashes
58 """generate a hash from the given text and its parent hashes
59
59
60 This hash combines both the current file contents and its history
60 This hash combines both the current file contents and its history
61 in a manner that makes it easy to distinguish nodes with the same
61 in a manner that makes it easy to distinguish nodes with the same
62 content in the revision graph.
62 content in the revision graph.
63 """
63 """
64 # As of now, if one of the parent node is null, p2 is null
64 # As of now, if one of the parent node is null, p2 is null
65 if p2 == nullid:
65 if p2 == nullid:
66 # deep copy of a hash is faster than creating one
66 # deep copy of a hash is faster than creating one
67 s = nullhash.copy()
67 s = nullhash.copy()
68 s.update(p1)
68 s.update(p1)
69 else:
69 else:
70 # none of the parent nodes are nullid
70 # none of the parent nodes are nullid
71 l = [p1, p2]
71 l = [p1, p2]
72 l.sort()
72 l.sort()
73 s = _sha(l[0])
73 s = _sha(l[0])
74 s.update(l[1])
74 s.update(l[1])
75 s.update(text)
75 s.update(text)
76 return s.digest()
76 return s.digest()
77
77
78 def compress(text):
78 def compress(text):
79 """ generate a possibly-compressed representation of text """
79 """ generate a possibly-compressed representation of text """
80 if not text:
80 if not text:
81 return ("", text)
81 return ("", text)
82 l = len(text)
82 l = len(text)
83 bin = None
83 bin = None
84 if l < 44:
84 if l < 44:
85 pass
85 pass
86 elif l > 1000000:
86 elif l > 1000000:
87 # zlib makes an internal copy, thus doubling memory usage for
87 # zlib makes an internal copy, thus doubling memory usage for
88 # large files, so lets do this in pieces
88 # large files, so lets do this in pieces
89 z = zlib.compressobj()
89 z = zlib.compressobj()
90 p = []
90 p = []
91 pos = 0
91 pos = 0
92 while pos < l:
92 while pos < l:
93 pos2 = pos + 2**20
93 pos2 = pos + 2**20
94 p.append(z.compress(text[pos:pos2]))
94 p.append(z.compress(text[pos:pos2]))
95 pos = pos2
95 pos = pos2
96 p.append(z.flush())
96 p.append(z.flush())
97 if sum(map(len, p)) < l:
97 if sum(map(len, p)) < l:
98 bin = "".join(p)
98 bin = "".join(p)
99 else:
99 else:
100 bin = _compress(text)
100 bin = _compress(text)
101 if bin is None or len(bin) > l:
101 if bin is None or len(bin) > l:
102 if text[0] == '\0':
102 if text[0] == '\0':
103 return ("", text)
103 return ("", text)
104 return ('u', text)
104 return ('u', text)
105 return ("", bin)
105 return ("", bin)
106
106
107 def decompress(bin):
107 def decompress(bin):
108 """ decompress the given input """
108 """ decompress the given input """
109 if not bin:
109 if not bin:
110 return bin
110 return bin
111 t = bin[0]
111 t = bin[0]
112 if t == '\0':
112 if t == '\0':
113 return bin
113 return bin
114 if t == 'x':
114 if t == 'x':
115 return _decompress(bin)
115 return _decompress(bin)
116 if t == 'u':
116 if t == 'u':
117 return bin[1:]
117 return bin[1:]
118 raise RevlogError(_("unknown compression type %r") % t)
118 raise RevlogError(_("unknown compression type %r") % t)
119
119
120 indexformatv0 = ">4l20s20s20s"
120 indexformatv0 = ">4l20s20s20s"
121 v0shaoffset = 56
121 v0shaoffset = 56
122
122
123 class revlogoldio(object):
123 class revlogoldio(object):
124 def __init__(self):
124 def __init__(self):
125 self.size = struct.calcsize(indexformatv0)
125 self.size = struct.calcsize(indexformatv0)
126
126
127 def parseindex(self, data, inline):
127 def parseindex(self, data, inline):
128 s = self.size
128 s = self.size
129 index = []
129 index = []
130 nodemap = {nullid: nullrev}
130 nodemap = {nullid: nullrev}
131 n = off = 0
131 n = off = 0
132 l = len(data)
132 l = len(data)
133 while off + s <= l:
133 while off + s <= l:
134 cur = data[off:off + s]
134 cur = data[off:off + s]
135 off += s
135 off += s
136 e = _unpack(indexformatv0, cur)
136 e = _unpack(indexformatv0, cur)
137 # transform to revlogv1 format
137 # transform to revlogv1 format
138 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
138 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
139 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
139 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
140 index.append(e2)
140 index.append(e2)
141 nodemap[e[6]] = n
141 nodemap[e[6]] = n
142 n += 1
142 n += 1
143
143
144 # add the magic null revision at -1
144 # add the magic null revision at -1
145 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
145 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
146
146
147 return index, nodemap, None
147 return index, nodemap, None
148
148
149 def packentry(self, entry, node, version, rev):
149 def packentry(self, entry, node, version, rev):
150 if gettype(entry[0]):
150 if gettype(entry[0]):
151 raise RevlogError(_("index entry flags need RevlogNG"))
151 raise RevlogError(_("index entry flags need RevlogNG"))
152 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
152 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
153 node(entry[5]), node(entry[6]), entry[7])
153 node(entry[5]), node(entry[6]), entry[7])
154 return _pack(indexformatv0, *e2)
154 return _pack(indexformatv0, *e2)
155
155
156 # index ng:
156 # index ng:
157 # 6 bytes: offset
157 # 6 bytes: offset
158 # 2 bytes: flags
158 # 2 bytes: flags
159 # 4 bytes: compressed length
159 # 4 bytes: compressed length
160 # 4 bytes: uncompressed length
160 # 4 bytes: uncompressed length
161 # 4 bytes: base rev
161 # 4 bytes: base rev
162 # 4 bytes: link rev
162 # 4 bytes: link rev
163 # 4 bytes: parent 1 rev
163 # 4 bytes: parent 1 rev
164 # 4 bytes: parent 2 rev
164 # 4 bytes: parent 2 rev
165 # 32 bytes: nodeid
165 # 32 bytes: nodeid
166 indexformatng = ">Qiiiiii20s12x"
166 indexformatng = ">Qiiiiii20s12x"
167 ngshaoffset = 32
167 ngshaoffset = 32
168 versionformat = ">I"
168 versionformat = ">I"
169
169
170 class revlogio(object):
170 class revlogio(object):
171 def __init__(self):
171 def __init__(self):
172 self.size = struct.calcsize(indexformatng)
172 self.size = struct.calcsize(indexformatng)
173
173
174 def parseindex(self, data, inline):
174 def parseindex(self, data, inline):
175 # call the C implementation to parse the index data
175 # call the C implementation to parse the index data
176 index, cache = parsers.parse_index2(data, inline)
176 index, cache = parsers.parse_index2(data, inline)
177 return index, None, cache
177 return index, None, cache
178
178
179 def packentry(self, entry, node, version, rev):
179 def packentry(self, entry, node, version, rev):
180 p = _pack(indexformatng, *entry)
180 p = _pack(indexformatng, *entry)
181 if rev == 0:
181 if rev == 0:
182 p = _pack(versionformat, version) + p[4:]
182 p = _pack(versionformat, version) + p[4:]
183 return p
183 return p
184
184
185 class revlog(object):
185 class revlog(object):
186 """
186 """
187 the underlying revision storage object
187 the underlying revision storage object
188
188
189 A revlog consists of two parts, an index and the revision data.
189 A revlog consists of two parts, an index and the revision data.
190
190
191 The index is a file with a fixed record size containing
191 The index is a file with a fixed record size containing
192 information on each revision, including its nodeid (hash), the
192 information on each revision, including its nodeid (hash), the
193 nodeids of its parents, the position and offset of its data within
193 nodeids of its parents, the position and offset of its data within
194 the data file, and the revision it's based on. Finally, each entry
194 the data file, and the revision it's based on. Finally, each entry
195 contains a linkrev entry that can serve as a pointer to external
195 contains a linkrev entry that can serve as a pointer to external
196 data.
196 data.
197
197
198 The revision data itself is a linear collection of data chunks.
198 The revision data itself is a linear collection of data chunks.
199 Each chunk represents a revision and is usually represented as a
199 Each chunk represents a revision and is usually represented as a
200 delta against the previous chunk. To bound lookup time, runs of
200 delta against the previous chunk. To bound lookup time, runs of
201 deltas are limited to about 2 times the length of the original
201 deltas are limited to about 2 times the length of the original
202 version data. This makes retrieval of a version proportional to
202 version data. This makes retrieval of a version proportional to
203 its size, or O(1) relative to the number of revisions.
203 its size, or O(1) relative to the number of revisions.
204
204
205 Both pieces of the revlog are written to in an append-only
205 Both pieces of the revlog are written to in an append-only
206 fashion, which means we never need to rewrite a file to insert or
206 fashion, which means we never need to rewrite a file to insert or
207 remove data, and can use some simple techniques to avoid the need
207 remove data, and can use some simple techniques to avoid the need
208 for locking while reading.
208 for locking while reading.
209 """
209 """
210 def __init__(self, opener, indexfile):
210 def __init__(self, opener, indexfile):
211 """
211 """
212 create a revlog object
212 create a revlog object
213
213
214 opener is a function that abstracts the file opening operation
214 opener is a function that abstracts the file opening operation
215 and can be used to implement COW semantics or the like.
215 and can be used to implement COW semantics or the like.
216 """
216 """
217 self.indexfile = indexfile
217 self.indexfile = indexfile
218 self.datafile = indexfile[:-2] + ".d"
218 self.datafile = indexfile[:-2] + ".d"
219 self.opener = opener
219 self.opener = opener
220 self._cache = None
220 self._cache = None
221 self._basecache = (0, 0)
221 self._basecache = (0, 0)
222 self._chunkcache = (0, '')
222 self._chunkcache = (0, '')
223 self.index = []
223 self.index = []
224 self._pcache = {}
224 self._pcache = {}
225 self._nodecache = {nullid: nullrev}
225 self._nodecache = {nullid: nullrev}
226 self._nodepos = None
226 self._nodepos = None
227
227
228 v = REVLOG_DEFAULT_VERSION
228 v = REVLOG_DEFAULT_VERSION
229 if hasattr(opener, 'options'):
229 if hasattr(opener, 'options'):
230 if 'defversion' in opener.options:
230 if 'defversion' in opener.options:
231 v = opener.options['defversion']
231 v = opener.options['defversion']
232 if v & REVLOGNG:
232 if v & REVLOGNG:
233 v |= REVLOGNGINLINEDATA
233 v |= REVLOGNGINLINEDATA
234 if v & REVLOGNG and 'generaldelta' in opener.options:
234 if v & REVLOGNG and 'generaldelta' in opener.options:
235 v |= REVLOGGENERALDELTA
235 v |= REVLOGGENERALDELTA
236
236
237 i = ''
237 i = ''
238 try:
238 try:
239 f = self.opener(self.indexfile)
239 f = self.opener(self.indexfile)
240 i = f.read()
240 i = f.read()
241 f.close()
241 f.close()
242 if len(i) > 0:
242 if len(i) > 0:
243 v = struct.unpack(versionformat, i[:4])[0]
243 v = struct.unpack(versionformat, i[:4])[0]
244 except IOError, inst:
244 except IOError, inst:
245 if inst.errno != errno.ENOENT:
245 if inst.errno != errno.ENOENT:
246 raise
246 raise
247
247
248 self.version = v
248 self.version = v
249 self._inline = v & REVLOGNGINLINEDATA
249 self._inline = v & REVLOGNGINLINEDATA
250 self._generaldelta = v & REVLOGGENERALDELTA
250 self._generaldelta = v & REVLOGGENERALDELTA
251 flags = v & ~0xFFFF
251 flags = v & ~0xFFFF
252 fmt = v & 0xFFFF
252 fmt = v & 0xFFFF
253 if fmt == REVLOGV0 and flags:
253 if fmt == REVLOGV0 and flags:
254 raise RevlogError(_("index %s unknown flags %#04x for format v0")
254 raise RevlogError(_("index %s unknown flags %#04x for format v0")
255 % (self.indexfile, flags >> 16))
255 % (self.indexfile, flags >> 16))
256 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
256 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
257 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
257 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
258 % (self.indexfile, flags >> 16))
258 % (self.indexfile, flags >> 16))
259 elif fmt > REVLOGNG:
259 elif fmt > REVLOGNG:
260 raise RevlogError(_("index %s unknown format %d")
260 raise RevlogError(_("index %s unknown format %d")
261 % (self.indexfile, fmt))
261 % (self.indexfile, fmt))
262
262
263 self._io = revlogio()
263 self._io = revlogio()
264 if self.version == REVLOGV0:
264 if self.version == REVLOGV0:
265 self._io = revlogoldio()
265 self._io = revlogoldio()
266 try:
266 try:
267 d = self._io.parseindex(i, self._inline)
267 d = self._io.parseindex(i, self._inline)
268 except (ValueError, IndexError):
268 except (ValueError, IndexError):
269 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
269 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
270 self.index, nodemap, self._chunkcache = d
270 self.index, nodemap, self._chunkcache = d
271 if nodemap is not None:
271 if nodemap is not None:
272 self.nodemap = self._nodecache = nodemap
272 self.nodemap = self._nodecache = nodemap
273 if not self._chunkcache:
273 if not self._chunkcache:
274 self._chunkclear()
274 self._chunkclear()
275
275
276 def tip(self):
276 def tip(self):
277 return self.node(len(self.index) - 2)
277 return self.node(len(self.index) - 2)
278 def __len__(self):
278 def __len__(self):
279 return len(self.index) - 1
279 return len(self.index) - 1
280 def __iter__(self):
280 def __iter__(self):
281 for i in xrange(len(self)):
281 for i in xrange(len(self)):
282 yield i
282 yield i
283
283
284 @util.propertycache
284 @util.propertycache
285 def nodemap(self):
285 def nodemap(self):
286 self.rev(self.node(0))
286 self.rev(self.node(0))
287 return self._nodecache
287 return self._nodecache
288
288
289 def rev(self, node):
289 def rev(self, node):
290 try:
290 try:
291 return self._nodecache[node]
291 return self._nodecache[node]
292 except KeyError:
292 except KeyError:
293 n = self._nodecache
293 n = self._nodecache
294 i = self.index
294 i = self.index
295 p = self._nodepos
295 p = self._nodepos
296 if p is None:
296 if p is None:
297 p = len(i) - 2
297 p = len(i) - 2
298 for r in xrange(p, -1, -1):
298 for r in xrange(p, -1, -1):
299 v = i[r][7]
299 v = i[r][7]
300 n[v] = r
300 n[v] = r
301 if v == node:
301 if v == node:
302 self._nodepos = r - 1
302 self._nodepos = r - 1
303 return r
303 return r
304 raise LookupError(node, self.indexfile, _('no node'))
304 raise LookupError(node, self.indexfile, _('no node'))
305
305
306 def node(self, rev):
306 def node(self, rev):
307 return self.index[rev][7]
307 return self.index[rev][7]
308 def linkrev(self, rev):
308 def linkrev(self, rev):
309 return self.index[rev][4]
309 return self.index[rev][4]
310 def parents(self, node):
310 def parents(self, node):
311 i = self.index
311 i = self.index
312 d = i[self.rev(node)]
312 d = i[self.rev(node)]
313 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
313 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
314 def parentrevs(self, rev):
314 def parentrevs(self, rev):
315 return self.index[rev][5:7]
315 return self.index[rev][5:7]
316 def start(self, rev):
316 def start(self, rev):
317 return int(self.index[rev][0] >> 16)
317 return int(self.index[rev][0] >> 16)
318 def end(self, rev):
318 def end(self, rev):
319 return self.start(rev) + self.length(rev)
319 return self.start(rev) + self.length(rev)
320 def length(self, rev):
320 def length(self, rev):
321 return self.index[rev][1]
321 return self.index[rev][1]
322 def chainbase(self, rev):
322 def chainbase(self, rev):
323 index = self.index
323 index = self.index
324 base = index[rev][3]
324 base = index[rev][3]
325 while base != rev:
325 while base != rev:
326 rev = base
326 rev = base
327 base = index[rev][3]
327 base = index[rev][3]
328 return base
328 return base
329 def flags(self, rev):
329 def flags(self, rev):
330 return self.index[rev][0] & 0xFFFF
330 return self.index[rev][0] & 0xFFFF
331 def rawsize(self, rev):
331 def rawsize(self, rev):
332 """return the length of the uncompressed text for a given revision"""
332 """return the length of the uncompressed text for a given revision"""
333 l = self.index[rev][2]
333 l = self.index[rev][2]
334 if l >= 0:
334 if l >= 0:
335 return l
335 return l
336
336
337 t = self.revision(self.node(rev))
337 t = self.revision(self.node(rev))
338 return len(t)
338 return len(t)
339 size = rawsize
339 size = rawsize
340
340
341 def reachable(self, node, stop=None):
341 def reachable(self, node, stop=None):
342 """return the set of all nodes ancestral to a given node, including
342 """return the set of all nodes ancestral to a given node, including
343 the node itself, stopping when stop is matched"""
343 the node itself, stopping when stop is matched"""
344 reachable = set((node,))
344 reachable = set((node,))
345 visit = [node]
345 visit = [node]
346 if stop:
346 if stop:
347 stopn = self.rev(stop)
347 stopn = self.rev(stop)
348 else:
348 else:
349 stopn = 0
349 stopn = 0
350 while visit:
350 while visit:
351 n = visit.pop(0)
351 n = visit.pop(0)
352 if n == stop:
352 if n == stop:
353 continue
353 continue
354 if n == nullid:
354 if n == nullid:
355 continue
355 continue
356 for p in self.parents(n):
356 for p in self.parents(n):
357 if self.rev(p) < stopn:
357 if self.rev(p) < stopn:
358 continue
358 continue
359 if p not in reachable:
359 if p not in reachable:
360 reachable.add(p)
360 reachable.add(p)
361 visit.append(p)
361 visit.append(p)
362 return reachable
362 return reachable
363
363
364 def ancestors(self, *revs):
364 def ancestors(self, *revs):
365 """Generate the ancestors of 'revs' in reverse topological order.
365 """Generate the ancestors of 'revs' in reverse topological order.
366
366
367 Yield a sequence of revision numbers starting with the parents
367 Yield a sequence of revision numbers starting with the parents
368 of each revision in revs, i.e., each revision is *not* considered
368 of each revision in revs, i.e., each revision is *not* considered
369 an ancestor of itself. Results are in breadth-first order:
369 an ancestor of itself. Results are in breadth-first order:
370 parents of each rev in revs, then parents of those, etc. Result
370 parents of each rev in revs, then parents of those, etc. Result
371 does not include the null revision."""
371 does not include the null revision."""
372 visit = list(revs)
372 visit = list(revs)
373 seen = set([nullrev])
373 seen = set([nullrev])
374 while visit:
374 while visit:
375 for parent in self.parentrevs(visit.pop(0)):
375 for parent in self.parentrevs(visit.pop(0)):
376 if parent not in seen:
376 if parent not in seen:
377 visit.append(parent)
377 visit.append(parent)
378 seen.add(parent)
378 seen.add(parent)
379 yield parent
379 yield parent
380
380
381 def descendants(self, *revs):
381 def descendants(self, *revs):
382 """Generate the descendants of 'revs' in revision order.
382 """Generate the descendants of 'revs' in revision order.
383
383
384 Yield a sequence of revision numbers starting with a child of
384 Yield a sequence of revision numbers starting with a child of
385 some rev in revs, i.e., each revision is *not* considered a
385 some rev in revs, i.e., each revision is *not* considered a
386 descendant of itself. Results are ordered by revision number (a
386 descendant of itself. Results are ordered by revision number (a
387 topological sort)."""
387 topological sort)."""
388 first = min(revs)
388 first = min(revs)
389 if first == nullrev:
389 if first == nullrev:
390 for i in self:
390 for i in self:
391 yield i
391 yield i
392 return
392 return
393
393
394 seen = set(revs)
394 seen = set(revs)
395 for i in xrange(first + 1, len(self)):
395 for i in xrange(first + 1, len(self)):
396 for x in self.parentrevs(i):
396 for x in self.parentrevs(i):
397 if x != nullrev and x in seen:
397 if x != nullrev and x in seen:
398 seen.add(i)
398 seen.add(i)
399 yield i
399 yield i
400 break
400 break
401
401
402 def findcommonmissing(self, common=None, heads=None):
402 def findcommonmissing(self, common=None, heads=None):
403 """Return a tuple of the ancestors of common and the ancestors of heads
403 """Return a tuple of the ancestors of common and the ancestors of heads
404 that are not ancestors of common.
404 that are not ancestors of common.
405
405
406 More specifically, the second element is a list of nodes N such that
406 More specifically, the second element is a list of nodes N such that
407 every N satisfies the following constraints:
407 every N satisfies the following constraints:
408
408
409 1. N is an ancestor of some node in 'heads'
409 1. N is an ancestor of some node in 'heads'
410 2. N is not an ancestor of any node in 'common'
410 2. N is not an ancestor of any node in 'common'
411
411
412 The list is sorted by revision number, meaning it is
412 The list is sorted by revision number, meaning it is
413 topologically sorted.
413 topologically sorted.
414
414
415 'heads' and 'common' are both lists of node IDs. If heads is
415 'heads' and 'common' are both lists of node IDs. If heads is
416 not supplied, uses all of the revlog's heads. If common is not
416 not supplied, uses all of the revlog's heads. If common is not
417 supplied, uses nullid."""
417 supplied, uses nullid."""
418 if common is None:
418 if common is None:
419 common = [nullid]
419 common = [nullid]
420 if heads is None:
420 if heads is None:
421 heads = self.heads()
421 heads = self.heads()
422
422
423 common = [self.rev(n) for n in common]
423 common = [self.rev(n) for n in common]
424 heads = [self.rev(n) for n in heads]
424 heads = [self.rev(n) for n in heads]
425
425
426 # we want the ancestors, but inclusive
426 # we want the ancestors, but inclusive
427 has = set(self.ancestors(*common))
427 has = set(self.ancestors(*common))
428 has.add(nullrev)
428 has.add(nullrev)
429 has.update(common)
429 has.update(common)
430
430
431 # take all ancestors from heads that aren't in has
431 # take all ancestors from heads that aren't in has
432 missing = set()
432 missing = set()
433 visit = [r for r in heads if r not in has]
433 visit = [r for r in heads if r not in has]
434 while visit:
434 while visit:
435 r = visit.pop(0)
435 r = visit.pop(0)
436 if r in missing:
436 if r in missing:
437 continue
437 continue
438 else:
438 else:
439 missing.add(r)
439 missing.add(r)
440 for p in self.parentrevs(r):
440 for p in self.parentrevs(r):
441 if p not in has:
441 if p not in has:
442 visit.append(p)
442 visit.append(p)
443 missing = list(missing)
443 missing = list(missing)
444 missing.sort()
444 missing.sort()
445 return has, [self.node(r) for r in missing]
445 return has, [self.node(r) for r in missing]
446
446
447 def findmissing(self, common=None, heads=None):
447 def findmissing(self, common=None, heads=None):
448 """Return the ancestors of heads that are not ancestors of common.
448 """Return the ancestors of heads that are not ancestors of common.
449
449
450 More specifically, return a list of nodes N such that every N
450 More specifically, return a list of nodes N such that every N
451 satisfies the following constraints:
451 satisfies the following constraints:
452
452
453 1. N is an ancestor of some node in 'heads'
453 1. N is an ancestor of some node in 'heads'
454 2. N is not an ancestor of any node in 'common'
454 2. N is not an ancestor of any node in 'common'
455
455
456 The list is sorted by revision number, meaning it is
456 The list is sorted by revision number, meaning it is
457 topologically sorted.
457 topologically sorted.
458
458
459 'heads' and 'common' are both lists of node IDs. If heads is
459 'heads' and 'common' are both lists of node IDs. If heads is
460 not supplied, uses all of the revlog's heads. If common is not
460 not supplied, uses all of the revlog's heads. If common is not
461 supplied, uses nullid."""
461 supplied, uses nullid."""
462 _common, missing = self.findcommonmissing(common, heads)
462 _common, missing = self.findcommonmissing(common, heads)
463 return missing
463 return missing
464
464
465 def nodesbetween(self, roots=None, heads=None):
465 def nodesbetween(self, roots=None, heads=None):
466 """Return a topological path from 'roots' to 'heads'.
466 """Return a topological path from 'roots' to 'heads'.
467
467
468 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
468 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
469 topologically sorted list of all nodes N that satisfy both of
469 topologically sorted list of all nodes N that satisfy both of
470 these constraints:
470 these constraints:
471
471
472 1. N is a descendant of some node in 'roots'
472 1. N is a descendant of some node in 'roots'
473 2. N is an ancestor of some node in 'heads'
473 2. N is an ancestor of some node in 'heads'
474
474
475 Every node is considered to be both a descendant and an ancestor
475 Every node is considered to be both a descendant and an ancestor
476 of itself, so every reachable node in 'roots' and 'heads' will be
476 of itself, so every reachable node in 'roots' and 'heads' will be
477 included in 'nodes'.
477 included in 'nodes'.
478
478
479 'outroots' is the list of reachable nodes in 'roots', i.e., the
479 'outroots' is the list of reachable nodes in 'roots', i.e., the
480 subset of 'roots' that is returned in 'nodes'. Likewise,
480 subset of 'roots' that is returned in 'nodes'. Likewise,
481 'outheads' is the subset of 'heads' that is also in 'nodes'.
481 'outheads' is the subset of 'heads' that is also in 'nodes'.
482
482
483 'roots' and 'heads' are both lists of node IDs. If 'roots' is
483 'roots' and 'heads' are both lists of node IDs. If 'roots' is
484 unspecified, uses nullid as the only root. If 'heads' is
484 unspecified, uses nullid as the only root. If 'heads' is
485 unspecified, uses list of all of the revlog's heads."""
485 unspecified, uses list of all of the revlog's heads."""
486 nonodes = ([], [], [])
486 nonodes = ([], [], [])
487 if roots is not None:
487 if roots is not None:
488 roots = list(roots)
488 roots = list(roots)
489 if not roots:
489 if not roots:
490 return nonodes
490 return nonodes
491 lowestrev = min([self.rev(n) for n in roots])
491 lowestrev = min([self.rev(n) for n in roots])
492 else:
492 else:
493 roots = [nullid] # Everybody's a descendent of nullid
493 roots = [nullid] # Everybody's a descendent of nullid
494 lowestrev = nullrev
494 lowestrev = nullrev
495 if (lowestrev == nullrev) and (heads is None):
495 if (lowestrev == nullrev) and (heads is None):
496 # We want _all_ the nodes!
496 # We want _all_ the nodes!
497 return ([self.node(r) for r in self], [nullid], list(self.heads()))
497 return ([self.node(r) for r in self], [nullid], list(self.heads()))
498 if heads is None:
498 if heads is None:
499 # All nodes are ancestors, so the latest ancestor is the last
499 # All nodes are ancestors, so the latest ancestor is the last
500 # node.
500 # node.
501 highestrev = len(self) - 1
501 highestrev = len(self) - 1
502 # Set ancestors to None to signal that every node is an ancestor.
502 # Set ancestors to None to signal that every node is an ancestor.
503 ancestors = None
503 ancestors = None
504 # Set heads to an empty dictionary for later discovery of heads
504 # Set heads to an empty dictionary for later discovery of heads
505 heads = {}
505 heads = {}
506 else:
506 else:
507 heads = list(heads)
507 heads = list(heads)
508 if not heads:
508 if not heads:
509 return nonodes
509 return nonodes
510 ancestors = set()
510 ancestors = set()
511 # Turn heads into a dictionary so we can remove 'fake' heads.
511 # Turn heads into a dictionary so we can remove 'fake' heads.
512 # Also, later we will be using it to filter out the heads we can't
512 # Also, later we will be using it to filter out the heads we can't
513 # find from roots.
513 # find from roots.
514 heads = dict.fromkeys(heads, False)
514 heads = dict.fromkeys(heads, False)
515 # Start at the top and keep marking parents until we're done.
515 # Start at the top and keep marking parents until we're done.
516 nodestotag = set(heads)
516 nodestotag = set(heads)
517 # Remember where the top was so we can use it as a limit later.
517 # Remember where the top was so we can use it as a limit later.
518 highestrev = max([self.rev(n) for n in nodestotag])
518 highestrev = max([self.rev(n) for n in nodestotag])
519 while nodestotag:
519 while nodestotag:
520 # grab a node to tag
520 # grab a node to tag
521 n = nodestotag.pop()
521 n = nodestotag.pop()
522 # Never tag nullid
522 # Never tag nullid
523 if n == nullid:
523 if n == nullid:
524 continue
524 continue
525 # A node's revision number represents its place in a
525 # A node's revision number represents its place in a
526 # topologically sorted list of nodes.
526 # topologically sorted list of nodes.
527 r = self.rev(n)
527 r = self.rev(n)
528 if r >= lowestrev:
528 if r >= lowestrev:
529 if n not in ancestors:
529 if n not in ancestors:
530 # If we are possibly a descendent of one of the roots
530 # If we are possibly a descendent of one of the roots
531 # and we haven't already been marked as an ancestor
531 # and we haven't already been marked as an ancestor
532 ancestors.add(n) # Mark as ancestor
532 ancestors.add(n) # Mark as ancestor
533 # Add non-nullid parents to list of nodes to tag.
533 # Add non-nullid parents to list of nodes to tag.
534 nodestotag.update([p for p in self.parents(n) if
534 nodestotag.update([p for p in self.parents(n) if
535 p != nullid])
535 p != nullid])
536 elif n in heads: # We've seen it before, is it a fake head?
536 elif n in heads: # We've seen it before, is it a fake head?
537 # So it is, real heads should not be the ancestors of
537 # So it is, real heads should not be the ancestors of
538 # any other heads.
538 # any other heads.
539 heads.pop(n)
539 heads.pop(n)
540 if not ancestors:
540 if not ancestors:
541 return nonodes
541 return nonodes
542 # Now that we have our set of ancestors, we want to remove any
542 # Now that we have our set of ancestors, we want to remove any
543 # roots that are not ancestors.
543 # roots that are not ancestors.
544
544
545 # If one of the roots was nullid, everything is included anyway.
545 # If one of the roots was nullid, everything is included anyway.
546 if lowestrev > nullrev:
546 if lowestrev > nullrev:
547 # But, since we weren't, let's recompute the lowest rev to not
547 # But, since we weren't, let's recompute the lowest rev to not
548 # include roots that aren't ancestors.
548 # include roots that aren't ancestors.
549
549
550 # Filter out roots that aren't ancestors of heads
550 # Filter out roots that aren't ancestors of heads
551 roots = [n for n in roots if n in ancestors]
551 roots = [n for n in roots if n in ancestors]
552 # Recompute the lowest revision
552 # Recompute the lowest revision
553 if roots:
553 if roots:
554 lowestrev = min([self.rev(n) for n in roots])
554 lowestrev = min([self.rev(n) for n in roots])
555 else:
555 else:
556 # No more roots? Return empty list
556 # No more roots? Return empty list
557 return nonodes
557 return nonodes
558 else:
558 else:
559 # We are descending from nullid, and don't need to care about
559 # We are descending from nullid, and don't need to care about
560 # any other roots.
560 # any other roots.
561 lowestrev = nullrev
561 lowestrev = nullrev
562 roots = [nullid]
562 roots = [nullid]
563 # Transform our roots list into a set.
563 # Transform our roots list into a set.
564 descendents = set(roots)
564 descendents = set(roots)
565 # Also, keep the original roots so we can filter out roots that aren't
565 # Also, keep the original roots so we can filter out roots that aren't
566 # 'real' roots (i.e. are descended from other roots).
566 # 'real' roots (i.e. are descended from other roots).
567 roots = descendents.copy()
567 roots = descendents.copy()
568 # Our topologically sorted list of output nodes.
568 # Our topologically sorted list of output nodes.
569 orderedout = []
569 orderedout = []
570 # Don't start at nullid since we don't want nullid in our output list,
570 # Don't start at nullid since we don't want nullid in our output list,
571 # and if nullid shows up in descedents, empty parents will look like
571 # and if nullid shows up in descedents, empty parents will look like
572 # they're descendents.
572 # they're descendents.
573 for r in xrange(max(lowestrev, 0), highestrev + 1):
573 for r in xrange(max(lowestrev, 0), highestrev + 1):
574 n = self.node(r)
574 n = self.node(r)
575 isdescendent = False
575 isdescendent = False
576 if lowestrev == nullrev: # Everybody is a descendent of nullid
576 if lowestrev == nullrev: # Everybody is a descendent of nullid
577 isdescendent = True
577 isdescendent = True
578 elif n in descendents:
578 elif n in descendents:
579 # n is already a descendent
579 # n is already a descendent
580 isdescendent = True
580 isdescendent = True
581 # This check only needs to be done here because all the roots
581 # This check only needs to be done here because all the roots
582 # will start being marked is descendents before the loop.
582 # will start being marked is descendents before the loop.
583 if n in roots:
583 if n in roots:
584 # If n was a root, check if it's a 'real' root.
584 # If n was a root, check if it's a 'real' root.
585 p = tuple(self.parents(n))
585 p = tuple(self.parents(n))
586 # If any of its parents are descendents, it's not a root.
586 # If any of its parents are descendents, it's not a root.
587 if (p[0] in descendents) or (p[1] in descendents):
587 if (p[0] in descendents) or (p[1] in descendents):
588 roots.remove(n)
588 roots.remove(n)
589 else:
589 else:
590 p = tuple(self.parents(n))
590 p = tuple(self.parents(n))
591 # A node is a descendent if either of its parents are
591 # A node is a descendent if either of its parents are
592 # descendents. (We seeded the dependents list with the roots
592 # descendents. (We seeded the dependents list with the roots
593 # up there, remember?)
593 # up there, remember?)
594 if (p[0] in descendents) or (p[1] in descendents):
594 if (p[0] in descendents) or (p[1] in descendents):
595 descendents.add(n)
595 descendents.add(n)
596 isdescendent = True
596 isdescendent = True
597 if isdescendent and ((ancestors is None) or (n in ancestors)):
597 if isdescendent and ((ancestors is None) or (n in ancestors)):
598 # Only include nodes that are both descendents and ancestors.
598 # Only include nodes that are both descendents and ancestors.
599 orderedout.append(n)
599 orderedout.append(n)
600 if (ancestors is not None) and (n in heads):
600 if (ancestors is not None) and (n in heads):
601 # We're trying to figure out which heads are reachable
601 # We're trying to figure out which heads are reachable
602 # from roots.
602 # from roots.
603 # Mark this head as having been reached
603 # Mark this head as having been reached
604 heads[n] = True
604 heads[n] = True
605 elif ancestors is None:
605 elif ancestors is None:
606 # Otherwise, we're trying to discover the heads.
606 # Otherwise, we're trying to discover the heads.
607 # Assume this is a head because if it isn't, the next step
607 # Assume this is a head because if it isn't, the next step
608 # will eventually remove it.
608 # will eventually remove it.
609 heads[n] = True
609 heads[n] = True
610 # But, obviously its parents aren't.
610 # But, obviously its parents aren't.
611 for p in self.parents(n):
611 for p in self.parents(n):
612 heads.pop(p, None)
612 heads.pop(p, None)
613 heads = [n for n, flag in heads.iteritems() if flag]
613 heads = [n for n, flag in heads.iteritems() if flag]
614 roots = list(roots)
614 roots = list(roots)
615 assert orderedout
615 assert orderedout
616 assert roots
616 assert roots
617 assert heads
617 assert heads
618 return (orderedout, roots, heads)
618 return (orderedout, roots, heads)
619
619
620 def headrevs(self):
620 def headrevs(self):
621 count = len(self)
621 count = len(self)
622 if not count:
622 if not count:
623 return [nullrev]
623 return [nullrev]
624 ishead = [1] * (count + 1)
624 ishead = [1] * (count + 1)
625 index = self.index
625 index = self.index
626 for r in xrange(count):
626 for r in xrange(count):
627 e = index[r]
627 e = index[r]
628 ishead[e[5]] = ishead[e[6]] = 0
628 ishead[e[5]] = ishead[e[6]] = 0
629 return [r for r in xrange(count) if ishead[r]]
629 return [r for r in xrange(count) if ishead[r]]
630
630
631 def heads(self, start=None, stop=None):
631 def heads(self, start=None, stop=None):
632 """return the list of all nodes that have no children
632 """return the list of all nodes that have no children
633
633
634 if start is specified, only heads that are descendants of
634 if start is specified, only heads that are descendants of
635 start will be returned
635 start will be returned
636 if stop is specified, it will consider all the revs from stop
636 if stop is specified, it will consider all the revs from stop
637 as if they had no children
637 as if they had no children
638 """
638 """
639 if start is None and stop is None:
639 if start is None and stop is None:
640 if not len(self):
640 if not len(self):
641 return [nullid]
641 return [nullid]
642 return [self.node(r) for r in self.headrevs()]
642 return [self.node(r) for r in self.headrevs()]
643
643
644 if start is None:
644 if start is None:
645 start = nullid
645 start = nullid
646 if stop is None:
646 if stop is None:
647 stop = []
647 stop = []
648 stoprevs = set([self.rev(n) for n in stop])
648 stoprevs = set([self.rev(n) for n in stop])
649 startrev = self.rev(start)
649 startrev = self.rev(start)
650 reachable = set((startrev,))
650 reachable = set((startrev,))
651 heads = set((startrev,))
651 heads = set((startrev,))
652
652
653 parentrevs = self.parentrevs
653 parentrevs = self.parentrevs
654 for r in xrange(startrev + 1, len(self)):
654 for r in xrange(startrev + 1, len(self)):
655 for p in parentrevs(r):
655 for p in parentrevs(r):
656 if p in reachable:
656 if p in reachable:
657 if r not in stoprevs:
657 if r not in stoprevs:
658 reachable.add(r)
658 reachable.add(r)
659 heads.add(r)
659 heads.add(r)
660 if p in heads and p not in stoprevs:
660 if p in heads and p not in stoprevs:
661 heads.remove(p)
661 heads.remove(p)
662
662
663 return [self.node(r) for r in heads]
663 return [self.node(r) for r in heads]
664
664
665 def children(self, node):
665 def children(self, node):
666 """find the children of a given node"""
666 """find the children of a given node"""
667 c = []
667 c = []
668 p = self.rev(node)
668 p = self.rev(node)
669 for r in range(p + 1, len(self)):
669 for r in range(p + 1, len(self)):
670 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
670 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
671 if prevs:
671 if prevs:
672 for pr in prevs:
672 for pr in prevs:
673 if pr == p:
673 if pr == p:
674 c.append(self.node(r))
674 c.append(self.node(r))
675 elif p == nullrev:
675 elif p == nullrev:
676 c.append(self.node(r))
676 c.append(self.node(r))
677 return c
677 return c
678
678
679 def descendant(self, start, end):
679 def descendant(self, start, end):
680 if start == nullrev:
680 if start == nullrev:
681 return True
681 return True
682 for i in self.descendants(start):
682 for i in self.descendants(start):
683 if i == end:
683 if i == end:
684 return True
684 return True
685 elif i > end:
685 elif i > end:
686 break
686 break
687 return False
687 return False
688
688
689 def ancestor(self, a, b):
689 def ancestor(self, a, b):
690 """calculate the least common ancestor of nodes a and b"""
690 """calculate the least common ancestor of nodes a and b"""
691
691
692 # fast path, check if it is a descendant
692 # fast path, check if it is a descendant
693 a, b = self.rev(a), self.rev(b)
693 a, b = self.rev(a), self.rev(b)
694 start, end = sorted((a, b))
694 start, end = sorted((a, b))
695 if self.descendant(start, end):
695 if self.descendant(start, end):
696 return self.node(start)
696 return self.node(start)
697
697
698 def parents(rev):
698 def parents(rev):
699 return [p for p in self.parentrevs(rev) if p != nullrev]
699 return [p for p in self.parentrevs(rev) if p != nullrev]
700
700
701 c = ancestor.ancestor(a, b, parents)
701 c = ancestor.ancestor(a, b, parents)
702 if c is None:
702 if c is None:
703 return nullid
703 return nullid
704
704
705 return self.node(c)
705 return self.node(c)
706
706
707 def _match(self, id):
707 def _match(self, id):
708 if isinstance(id, (long, int)):
708 if isinstance(id, (long, int)):
709 # rev
709 # rev
710 return self.node(id)
710 return self.node(id)
711 if len(id) == 20:
711 if len(id) == 20:
712 # possibly a binary node
712 # possibly a binary node
713 # odds of a binary node being all hex in ASCII are 1 in 10**25
713 # odds of a binary node being all hex in ASCII are 1 in 10**25
714 try:
714 try:
715 node = id
715 node = id
716 self.rev(node) # quick search the index
716 self.rev(node) # quick search the index
717 return node
717 return node
718 except LookupError:
718 except LookupError:
719 pass # may be partial hex id
719 pass # may be partial hex id
720 try:
720 try:
721 # str(rev)
721 # str(rev)
722 rev = int(id)
722 rev = int(id)
723 if str(rev) != id:
723 if str(rev) != id:
724 raise ValueError
724 raise ValueError
725 if rev < 0:
725 if rev < 0:
726 rev = len(self) + rev
726 rev = len(self) + rev
727 if rev < 0 or rev >= len(self):
727 if rev < 0 or rev >= len(self):
728 raise ValueError
728 raise ValueError
729 return self.node(rev)
729 return self.node(rev)
730 except (ValueError, OverflowError):
730 except (ValueError, OverflowError):
731 pass
731 pass
732 if len(id) == 40:
732 if len(id) == 40:
733 try:
733 try:
734 # a full hex nodeid?
734 # a full hex nodeid?
735 node = bin(id)
735 node = bin(id)
736 self.rev(node)
736 self.rev(node)
737 return node
737 return node
738 except (TypeError, LookupError):
738 except (TypeError, LookupError):
739 pass
739 pass
740
740
741 def _partialmatch(self, id):
741 def _partialmatch(self, id):
742 if id in self._pcache:
742 if id in self._pcache:
743 return self._pcache[id]
743 return self._pcache[id]
744
744
745 if len(id) < 40:
745 if len(id) < 40:
746 try:
746 try:
747 # hex(node)[:...]
747 # hex(node)[:...]
748 l = len(id) // 2 # grab an even number of digits
748 l = len(id) // 2 # grab an even number of digits
749 prefix = bin(id[:l * 2])
749 prefix = bin(id[:l * 2])
750 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
750 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
751 nl = [n for n in nl if hex(n).startswith(id)]
751 nl = [n for n in nl if hex(n).startswith(id)]
752 if len(nl) > 0:
752 if len(nl) > 0:
753 if len(nl) == 1:
753 if len(nl) == 1:
754 self._pcache[id] = nl[0]
754 self._pcache[id] = nl[0]
755 return nl[0]
755 return nl[0]
756 raise LookupError(id, self.indexfile,
756 raise LookupError(id, self.indexfile,
757 _('ambiguous identifier'))
757 _('ambiguous identifier'))
758 return None
758 return None
759 except TypeError:
759 except TypeError:
760 pass
760 pass
761
761
762 def lookup(self, id):
762 def lookup(self, id):
763 """locate a node based on:
763 """locate a node based on:
764 - revision number or str(revision number)
764 - revision number or str(revision number)
765 - nodeid or subset of hex nodeid
765 - nodeid or subset of hex nodeid
766 """
766 """
767 n = self._match(id)
767 n = self._match(id)
768 if n is not None:
768 if n is not None:
769 return n
769 return n
770 n = self._partialmatch(id)
770 n = self._partialmatch(id)
771 if n:
771 if n:
772 return n
772 return n
773
773
774 raise LookupError(id, self.indexfile, _('no match found'))
774 raise LookupError(id, self.indexfile, _('no match found'))
775
775
776 def cmp(self, node, text):
776 def cmp(self, node, text):
777 """compare text with a given file revision
777 """compare text with a given file revision
778
778
779 returns True if text is different than what is stored.
779 returns True if text is different than what is stored.
780 """
780 """
781 p1, p2 = self.parents(node)
781 p1, p2 = self.parents(node)
782 return hash(text, p1, p2) != node
782 return hash(text, p1, p2) != node
783
783
784 def _addchunk(self, offset, data):
784 def _addchunk(self, offset, data):
785 o, d = self._chunkcache
785 o, d = self._chunkcache
786 # try to add to existing cache
786 # try to add to existing cache
787 if o + len(d) == offset and len(d) + len(data) < _chunksize:
787 if o + len(d) == offset and len(d) + len(data) < _chunksize:
788 self._chunkcache = o, d + data
788 self._chunkcache = o, d + data
789 else:
789 else:
790 self._chunkcache = offset, data
790 self._chunkcache = offset, data
791
791
792 def _loadchunk(self, offset, length):
792 def _loadchunk(self, offset, length):
793 if self._inline:
793 if self._inline:
794 df = self.opener(self.indexfile)
794 df = self.opener(self.indexfile)
795 else:
795 else:
796 df = self.opener(self.datafile)
796 df = self.opener(self.datafile)
797
797
798 readahead = max(65536, length)
798 readahead = max(65536, length)
799 df.seek(offset)
799 df.seek(offset)
800 d = df.read(readahead)
800 d = df.read(readahead)
801 self._addchunk(offset, d)
801 self._addchunk(offset, d)
802 if readahead > length:
802 if readahead > length:
803 return d[:length]
803 return d[:length]
804 return d
804 return d
805
805
806 def _getchunk(self, offset, length):
806 def _getchunk(self, offset, length):
807 o, d = self._chunkcache
807 o, d = self._chunkcache
808 l = len(d)
808 l = len(d)
809
809
810 # is it in the cache?
810 # is it in the cache?
811 cachestart = offset - o
811 cachestart = offset - o
812 cacheend = cachestart + length
812 cacheend = cachestart + length
813 if cachestart >= 0 and cacheend <= l:
813 if cachestart >= 0 and cacheend <= l:
814 if cachestart == 0 and cacheend == l:
814 if cachestart == 0 and cacheend == l:
815 return d # avoid a copy
815 return d # avoid a copy
816 return d[cachestart:cacheend]
816 return d[cachestart:cacheend]
817
817
818 return self._loadchunk(offset, length)
818 return self._loadchunk(offset, length)
819
819
820 def _chunkraw(self, startrev, endrev):
820 def _chunkraw(self, startrev, endrev):
821 start = self.start(startrev)
821 start = self.start(startrev)
822 length = self.end(endrev) - start
822 length = self.end(endrev) - start
823 if self._inline:
823 if self._inline:
824 start += (startrev + 1) * self._io.size
824 start += (startrev + 1) * self._io.size
825 return self._getchunk(start, length)
825 return self._getchunk(start, length)
826
826
827 def _chunk(self, rev):
827 def _chunk(self, rev):
828 return decompress(self._chunkraw(rev, rev))
828 return decompress(self._chunkraw(rev, rev))
829
829
830 def _chunkbase(self, rev):
830 def _chunkbase(self, rev):
831 return self._chunk(rev)
831 return self._chunk(rev)
832
832
833 def _chunkclear(self):
833 def _chunkclear(self):
834 self._chunkcache = (0, '')
834 self._chunkcache = (0, '')
835
835
836 def deltaparent(self, rev):
836 def deltaparent(self, rev):
837 """return deltaparent of the given revision"""
837 """return deltaparent of the given revision"""
838 base = self.index[rev][3]
838 base = self.index[rev][3]
839 if base == rev:
839 if base == rev:
840 return nullrev
840 return nullrev
841 elif self._generaldelta:
841 elif self._generaldelta:
842 return base
842 return base
843 else:
843 else:
844 return rev - 1
844 return rev - 1
845
845
846 def revdiff(self, rev1, rev2):
846 def revdiff(self, rev1, rev2):
847 """return or calculate a delta between two revisions"""
847 """return or calculate a delta between two revisions"""
848 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
848 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
849 return self._chunk(rev2)
849 return self._chunk(rev2)
850
850
851 return mdiff.textdiff(self.revision(self.node(rev1)),
851 return mdiff.textdiff(self.revision(self.node(rev1)),
852 self.revision(self.node(rev2)))
852 self.revision(self.node(rev2)))
853
853
854 def revision(self, node):
854 def revision(self, node):
855 """return an uncompressed revision of a given node"""
855 """return an uncompressed revision of a given node"""
856 cachedrev = None
856 cachedrev = None
857 if node == nullid:
857 if node == nullid:
858 return ""
858 return ""
859 if self._cache:
859 if self._cache:
860 if self._cache[0] == node:
860 if self._cache[0] == node:
861 return self._cache[2]
861 return self._cache[2]
862 cachedrev = self._cache[1]
862 cachedrev = self._cache[1]
863
863
864 # look up what we need to read
864 # look up what we need to read
865 text = None
865 text = None
866 rev = self.rev(node)
866 rev = self.rev(node)
867
867
868 # check rev flags
868 # check rev flags
869 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
869 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
870 raise RevlogError(_('incompatible revision flag %x') %
870 raise RevlogError(_('incompatible revision flag %x') %
871 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
871 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
872
872
873 # build delta chain
873 # build delta chain
874 chain = []
874 chain = []
875 index = self.index # for performance
875 index = self.index # for performance
876 generaldelta = self._generaldelta
876 generaldelta = self._generaldelta
877 iterrev = rev
877 iterrev = rev
878 e = index[iterrev]
878 e = index[iterrev]
879 while iterrev != e[3] and iterrev != cachedrev:
879 while iterrev != e[3] and iterrev != cachedrev:
880 chain.append(iterrev)
880 chain.append(iterrev)
881 if generaldelta:
881 if generaldelta:
882 iterrev = e[3]
882 iterrev = e[3]
883 else:
883 else:
884 iterrev -= 1
884 iterrev -= 1
885 e = index[iterrev]
885 e = index[iterrev]
886 chain.reverse()
886 chain.reverse()
887 base = iterrev
887 base = iterrev
888
888
889 if iterrev == cachedrev:
889 if iterrev == cachedrev:
890 # cache hit
890 # cache hit
891 text = self._cache[2]
891 text = self._cache[2]
892
892
893 # drop cache to save memory
893 # drop cache to save memory
894 self._cache = None
894 self._cache = None
895
895
896 self._chunkraw(base, rev)
896 self._chunkraw(base, rev)
897 if text is None:
897 if text is None:
898 text = self._chunkbase(base)
898 text = self._chunkbase(base)
899
899
900 bins = [self._chunk(r) for r in chain]
900 bins = [self._chunk(r) for r in chain]
901 text = mdiff.patches(text, bins)
901 text = mdiff.patches(text, bins)
902
902
903 text = self._checkhash(text, node, rev)
903 text = self._checkhash(text, node, rev)
904
904
905 self._cache = (node, rev, text)
905 self._cache = (node, rev, text)
906 return text
906 return text
907
907
908 def _checkhash(self, text, node, rev):
908 def _checkhash(self, text, node, rev):
909 p1, p2 = self.parents(node)
909 p1, p2 = self.parents(node)
910 if node != hash(text, p1, p2):
910 if node != hash(text, p1, p2):
911 raise RevlogError(_("integrity check failed on %s:%d")
911 raise RevlogError(_("integrity check failed on %s:%d")
912 % (self.indexfile, rev))
912 % (self.indexfile, rev))
913 return text
913 return text
914
914
915 def checkinlinesize(self, tr, fp=None):
915 def checkinlinesize(self, tr, fp=None):
916 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
916 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
917 return
917 return
918
918
919 trinfo = tr.find(self.indexfile)
919 trinfo = tr.find(self.indexfile)
920 if trinfo is None:
920 if trinfo is None:
921 raise RevlogError(_("%s not found in the transaction")
921 raise RevlogError(_("%s not found in the transaction")
922 % self.indexfile)
922 % self.indexfile)
923
923
924 trindex = trinfo[2]
924 trindex = trinfo[2]
925 dataoff = self.start(trindex)
925 dataoff = self.start(trindex)
926
926
927 tr.add(self.datafile, dataoff)
927 tr.add(self.datafile, dataoff)
928
928
929 if fp:
929 if fp:
930 fp.flush()
930 fp.flush()
931 fp.close()
931 fp.close()
932
932
933 df = self.opener(self.datafile, 'w')
933 df = self.opener(self.datafile, 'w')
934 try:
934 try:
935 for r in self:
935 for r in self:
936 df.write(self._chunkraw(r, r))
936 df.write(self._chunkraw(r, r))
937 finally:
937 finally:
938 df.close()
938 df.close()
939
939
940 fp = self.opener(self.indexfile, 'w', atomictemp=True)
940 fp = self.opener(self.indexfile, 'w', atomictemp=True)
941 self.version &= ~(REVLOGNGINLINEDATA)
941 self.version &= ~(REVLOGNGINLINEDATA)
942 self._inline = False
942 self._inline = False
943 for i in self:
943 for i in self:
944 e = self._io.packentry(self.index[i], self.node, self.version, i)
944 e = self._io.packentry(self.index[i], self.node, self.version, i)
945 fp.write(e)
945 fp.write(e)
946
946
947 # if we don't call rename, the temp file will never replace the
947 # if we don't call rename, the temp file will never replace the
948 # real index
948 # real index
949 fp.rename()
949 fp.rename()
950
950
951 tr.replace(self.indexfile, trindex * self._io.size)
951 tr.replace(self.indexfile, trindex * self._io.size)
952 self._chunkclear()
952 self._chunkclear()
953
953
954 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
954 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
955 """add a revision to the log
955 """add a revision to the log
956
956
957 text - the revision data to add
957 text - the revision data to add
958 transaction - the transaction object used for rollback
958 transaction - the transaction object used for rollback
959 link - the linkrev data to add
959 link - the linkrev data to add
960 p1, p2 - the parent nodeids of the revision
960 p1, p2 - the parent nodeids of the revision
961 cachedelta - an optional precomputed delta
961 cachedelta - an optional precomputed delta
962 """
962 """
963 node = hash(text, p1, p2)
963 node = hash(text, p1, p2)
964 if node in self.nodemap:
964 if node in self.nodemap:
965 return node
965 return node
966
966
967 dfh = None
967 dfh = None
968 if not self._inline:
968 if not self._inline:
969 dfh = self.opener(self.datafile, "a")
969 dfh = self.opener(self.datafile, "a")
970 ifh = self.opener(self.indexfile, "a+")
970 ifh = self.opener(self.indexfile, "a+")
971 try:
971 try:
972 return self._addrevision(node, text, transaction, link, p1, p2,
972 return self._addrevision(node, text, transaction, link, p1, p2,
973 cachedelta, ifh, dfh)
973 cachedelta, ifh, dfh)
974 finally:
974 finally:
975 if dfh:
975 if dfh:
976 dfh.close()
976 dfh.close()
977 ifh.close()
977 ifh.close()
978
978
979 def _addrevision(self, node, text, transaction, link, p1, p2,
979 def _addrevision(self, node, text, transaction, link, p1, p2,
980 cachedelta, ifh, dfh):
980 cachedelta, ifh, dfh):
981 """internal function to add revisions to the log
981 """internal function to add revisions to the log
982
982
983 see addrevision for argument descriptions.
983 see addrevision for argument descriptions.
984 invariants:
984 invariants:
985 - text is optional (can be None); if not set, cachedelta must be set.
985 - text is optional (can be None); if not set, cachedelta must be set.
986 if both are set, they must correspond to eachother.
986 if both are set, they must correspond to eachother.
987 """
987 """
988 btext = [text]
988 btext = [text]
989 def buildtext():
989 def buildtext():
990 if btext[0] is not None:
990 if btext[0] is not None:
991 return btext[0]
991 return btext[0]
992 # flush any pending writes here so we can read it in revision
992 # flush any pending writes here so we can read it in revision
993 if dfh:
993 if dfh:
994 dfh.flush()
994 dfh.flush()
995 ifh.flush()
995 ifh.flush()
996 basetext = self.revision(self.node(cachedelta[0]))
996 basetext = self.revision(self.node(cachedelta[0]))
997 btext[0] = mdiff.patch(basetext, cachedelta[1])
997 btext[0] = mdiff.patch(basetext, cachedelta[1])
998 chk = hash(btext[0], p1, p2)
998 chk = hash(btext[0], p1, p2)
999 if chk != node:
999 if chk != node:
1000 raise RevlogError(_("consistency error in delta"))
1000 raise RevlogError(_("consistency error in delta"))
1001 return btext[0]
1001 return btext[0]
1002
1002
1003 def builddelta(rev):
1003 def builddelta(rev):
1004 # can we use the cached delta?
1004 # can we use the cached delta?
1005 if cachedelta and cachedelta[0] == rev:
1005 if cachedelta and cachedelta[0] == rev:
1006 delta = cachedelta[1]
1006 delta = cachedelta[1]
1007 else:
1007 else:
1008 t = buildtext()
1008 t = buildtext()
1009 ptext = self.revision(self.node(rev))
1009 ptext = self.revision(self.node(rev))
1010 delta = mdiff.textdiff(ptext, t)
1010 delta = mdiff.textdiff(ptext, t)
1011 data = compress(delta)
1011 data = compress(delta)
1012 l = len(data[1]) + len(data[0])
1012 l = len(data[1]) + len(data[0])
1013 if basecache[0] == rev:
1013 if basecache[0] == rev:
1014 chainbase = basecache[1]
1014 chainbase = basecache[1]
1015 else:
1015 else:
1016 chainbase = self.chainbase(rev)
1016 chainbase = self.chainbase(rev)
1017 dist = l + offset - self.start(chainbase)
1017 dist = l + offset - self.start(chainbase)
1018 if self._generaldelta:
1018 if self._generaldelta:
1019 base = rev
1019 base = rev
1020 else:
1020 else:
1021 base = chainbase
1021 base = chainbase
1022 return dist, l, data, base, chainbase
1022 return dist, l, data, base, chainbase
1023
1023
1024 curr = len(self)
1024 curr = len(self)
1025 prev = curr - 1
1025 prev = curr - 1
1026 base = chainbase = curr
1026 base = chainbase = curr
1027 offset = self.end(prev)
1027 offset = self.end(prev)
1028 flags = 0
1028 flags = 0
1029 d = None
1029 d = None
1030 basecache = self._basecache
1030 basecache = self._basecache
1031 p1r, p2r = self.rev(p1), self.rev(p2)
1031 p1r, p2r = self.rev(p1), self.rev(p2)
1032
1032
1033 # should we try to build a delta?
1033 # should we try to build a delta?
1034 if prev != nullrev:
1034 if prev != nullrev:
1035 if self._generaldelta:
1035 if self._generaldelta:
1036 if p1r >= basecache[1]:
1036 d = builddelta(p1r)
1037 d = builddelta(p1r)
1038 elif p2r >= basecache[1]:
1039 d = builddelta(p2r)
1040 else:
1041 d = builddelta(prev)
1037 else:
1042 else:
1038 d = builddelta(prev)
1043 d = builddelta(prev)
1039 dist, l, data, base, chainbase = d
1044 dist, l, data, base, chainbase = d
1040
1045
1041 # full versions are inserted when the needed deltas
1046 # full versions are inserted when the needed deltas
1042 # become comparable to the uncompressed text
1047 # become comparable to the uncompressed text
1043 if text is None:
1048 if text is None:
1044 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1049 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1045 cachedelta[1])
1050 cachedelta[1])
1046 else:
1051 else:
1047 textlen = len(text)
1052 textlen = len(text)
1048 if d is None or dist > textlen * 2:
1053 if d is None or dist > textlen * 2:
1049 text = buildtext()
1054 text = buildtext()
1050 data = compress(text)
1055 data = compress(text)
1051 l = len(data[1]) + len(data[0])
1056 l = len(data[1]) + len(data[0])
1052 base = chainbase = curr
1057 base = chainbase = curr
1053
1058
1054 e = (offset_type(offset, flags), l, textlen,
1059 e = (offset_type(offset, flags), l, textlen,
1055 base, link, p1r, p2r, node)
1060 base, link, p1r, p2r, node)
1056 self.index.insert(-1, e)
1061 self.index.insert(-1, e)
1057 self.nodemap[node] = curr
1062 self.nodemap[node] = curr
1058
1063
1059 entry = self._io.packentry(e, self.node, self.version, curr)
1064 entry = self._io.packentry(e, self.node, self.version, curr)
1060 if not self._inline:
1065 if not self._inline:
1061 transaction.add(self.datafile, offset)
1066 transaction.add(self.datafile, offset)
1062 transaction.add(self.indexfile, curr * len(entry))
1067 transaction.add(self.indexfile, curr * len(entry))
1063 if data[0]:
1068 if data[0]:
1064 dfh.write(data[0])
1069 dfh.write(data[0])
1065 dfh.write(data[1])
1070 dfh.write(data[1])
1066 dfh.flush()
1071 dfh.flush()
1067 ifh.write(entry)
1072 ifh.write(entry)
1068 else:
1073 else:
1069 offset += curr * self._io.size
1074 offset += curr * self._io.size
1070 transaction.add(self.indexfile, offset, curr)
1075 transaction.add(self.indexfile, offset, curr)
1071 ifh.write(entry)
1076 ifh.write(entry)
1072 ifh.write(data[0])
1077 ifh.write(data[0])
1073 ifh.write(data[1])
1078 ifh.write(data[1])
1074 self.checkinlinesize(transaction, ifh)
1079 self.checkinlinesize(transaction, ifh)
1075
1080
1076 if type(text) == str: # only accept immutable objects
1081 if type(text) == str: # only accept immutable objects
1077 self._cache = (node, curr, text)
1082 self._cache = (node, curr, text)
1078 self._basecache = (curr, chainbase)
1083 self._basecache = (curr, chainbase)
1079 return node
1084 return node
1080
1085
1081 def group(self, nodelist, bundler):
1086 def group(self, nodelist, bundler):
1082 """Calculate a delta group, yielding a sequence of changegroup chunks
1087 """Calculate a delta group, yielding a sequence of changegroup chunks
1083 (strings).
1088 (strings).
1084
1089
1085 Given a list of changeset revs, return a set of deltas and
1090 Given a list of changeset revs, return a set of deltas and
1086 metadata corresponding to nodes. The first delta is
1091 metadata corresponding to nodes. The first delta is
1087 first parent(nodelist[0]) -> nodelist[0], the receiver is
1092 first parent(nodelist[0]) -> nodelist[0], the receiver is
1088 guaranteed to have this parent as it has all history before
1093 guaranteed to have this parent as it has all history before
1089 these changesets. In the case firstparent is nullrev the
1094 these changesets. In the case firstparent is nullrev the
1090 changegroup starts with a full revision.
1095 changegroup starts with a full revision.
1091 """
1096 """
1092
1097
1093 revs = sorted([self.rev(n) for n in nodelist])
1098 revs = sorted([self.rev(n) for n in nodelist])
1094
1099
1095 # if we don't have any revisions touched by these changesets, bail
1100 # if we don't have any revisions touched by these changesets, bail
1096 if not revs:
1101 if not revs:
1097 yield bundler.close()
1102 yield bundler.close()
1098 return
1103 return
1099
1104
1100 # add the parent of the first rev
1105 # add the parent of the first rev
1101 p = self.parentrevs(revs[0])[0]
1106 p = self.parentrevs(revs[0])[0]
1102 revs.insert(0, p)
1107 revs.insert(0, p)
1103
1108
1104 # build deltas
1109 # build deltas
1105 for r in xrange(len(revs) - 1):
1110 for r in xrange(len(revs) - 1):
1106 prev, curr = revs[r], revs[r + 1]
1111 prev, curr = revs[r], revs[r + 1]
1107 for c in bundler.revchunk(self, curr, prev):
1112 for c in bundler.revchunk(self, curr, prev):
1108 yield c
1113 yield c
1109
1114
1110 yield bundler.close()
1115 yield bundler.close()
1111
1116
1112 def addgroup(self, bundle, linkmapper, transaction):
1117 def addgroup(self, bundle, linkmapper, transaction):
1113 """
1118 """
1114 add a delta group
1119 add a delta group
1115
1120
1116 given a set of deltas, add them to the revision log. the
1121 given a set of deltas, add them to the revision log. the
1117 first delta is against its parent, which should be in our
1122 first delta is against its parent, which should be in our
1118 log, the rest are against the previous delta.
1123 log, the rest are against the previous delta.
1119 """
1124 """
1120
1125
1121 # track the base of the current delta log
1126 # track the base of the current delta log
1122 node = None
1127 node = None
1123
1128
1124 r = len(self)
1129 r = len(self)
1125 end = 0
1130 end = 0
1126 if r:
1131 if r:
1127 end = self.end(r - 1)
1132 end = self.end(r - 1)
1128 ifh = self.opener(self.indexfile, "a+")
1133 ifh = self.opener(self.indexfile, "a+")
1129 isize = r * self._io.size
1134 isize = r * self._io.size
1130 if self._inline:
1135 if self._inline:
1131 transaction.add(self.indexfile, end + isize, r)
1136 transaction.add(self.indexfile, end + isize, r)
1132 dfh = None
1137 dfh = None
1133 else:
1138 else:
1134 transaction.add(self.indexfile, isize, r)
1139 transaction.add(self.indexfile, isize, r)
1135 transaction.add(self.datafile, end)
1140 transaction.add(self.datafile, end)
1136 dfh = self.opener(self.datafile, "a")
1141 dfh = self.opener(self.datafile, "a")
1137
1142
1138 try:
1143 try:
1139 # loop through our set of deltas
1144 # loop through our set of deltas
1140 chain = None
1145 chain = None
1141 while 1:
1146 while 1:
1142 chunkdata = bundle.deltachunk(chain)
1147 chunkdata = bundle.deltachunk(chain)
1143 if not chunkdata:
1148 if not chunkdata:
1144 break
1149 break
1145 node = chunkdata['node']
1150 node = chunkdata['node']
1146 p1 = chunkdata['p1']
1151 p1 = chunkdata['p1']
1147 p2 = chunkdata['p2']
1152 p2 = chunkdata['p2']
1148 cs = chunkdata['cs']
1153 cs = chunkdata['cs']
1149 deltabase = chunkdata['deltabase']
1154 deltabase = chunkdata['deltabase']
1150 delta = chunkdata['delta']
1155 delta = chunkdata['delta']
1151
1156
1152 link = linkmapper(cs)
1157 link = linkmapper(cs)
1153 if node in self.nodemap:
1158 if node in self.nodemap:
1154 # this can happen if two branches make the same change
1159 # this can happen if two branches make the same change
1155 chain = node
1160 chain = node
1156 continue
1161 continue
1157
1162
1158 for p in (p1, p2):
1163 for p in (p1, p2):
1159 if not p in self.nodemap:
1164 if not p in self.nodemap:
1160 raise LookupError(p, self.indexfile,
1165 raise LookupError(p, self.indexfile,
1161 _('unknown parent'))
1166 _('unknown parent'))
1162
1167
1163 if deltabase not in self.nodemap:
1168 if deltabase not in self.nodemap:
1164 raise LookupError(deltabase, self.indexfile,
1169 raise LookupError(deltabase, self.indexfile,
1165 _('unknown delta base'))
1170 _('unknown delta base'))
1166
1171
1167 baserev = self.rev(deltabase)
1172 baserev = self.rev(deltabase)
1168 chain = self._addrevision(node, None, transaction, link,
1173 chain = self._addrevision(node, None, transaction, link,
1169 p1, p2, (baserev, delta), ifh, dfh)
1174 p1, p2, (baserev, delta), ifh, dfh)
1170 if not dfh and not self._inline:
1175 if not dfh and not self._inline:
1171 # addrevision switched from inline to conventional
1176 # addrevision switched from inline to conventional
1172 # reopen the index
1177 # reopen the index
1173 ifh.close()
1178 ifh.close()
1174 dfh = self.opener(self.datafile, "a")
1179 dfh = self.opener(self.datafile, "a")
1175 ifh = self.opener(self.indexfile, "a")
1180 ifh = self.opener(self.indexfile, "a")
1176 finally:
1181 finally:
1177 if dfh:
1182 if dfh:
1178 dfh.close()
1183 dfh.close()
1179 ifh.close()
1184 ifh.close()
1180
1185
1181 return node
1186 return node
1182
1187
1183 def strip(self, minlink, transaction):
1188 def strip(self, minlink, transaction):
1184 """truncate the revlog on the first revision with a linkrev >= minlink
1189 """truncate the revlog on the first revision with a linkrev >= minlink
1185
1190
1186 This function is called when we're stripping revision minlink and
1191 This function is called when we're stripping revision minlink and
1187 its descendants from the repository.
1192 its descendants from the repository.
1188
1193
1189 We have to remove all revisions with linkrev >= minlink, because
1194 We have to remove all revisions with linkrev >= minlink, because
1190 the equivalent changelog revisions will be renumbered after the
1195 the equivalent changelog revisions will be renumbered after the
1191 strip.
1196 strip.
1192
1197
1193 So we truncate the revlog on the first of these revisions, and
1198 So we truncate the revlog on the first of these revisions, and
1194 trust that the caller has saved the revisions that shouldn't be
1199 trust that the caller has saved the revisions that shouldn't be
1195 removed and that it'll readd them after this truncation.
1200 removed and that it'll readd them after this truncation.
1196 """
1201 """
1197 if len(self) == 0:
1202 if len(self) == 0:
1198 return
1203 return
1199
1204
1200 for rev in self:
1205 for rev in self:
1201 if self.index[rev][4] >= minlink:
1206 if self.index[rev][4] >= minlink:
1202 break
1207 break
1203 else:
1208 else:
1204 return
1209 return
1205
1210
1206 # first truncate the files on disk
1211 # first truncate the files on disk
1207 end = self.start(rev)
1212 end = self.start(rev)
1208 if not self._inline:
1213 if not self._inline:
1209 transaction.add(self.datafile, end)
1214 transaction.add(self.datafile, end)
1210 end = rev * self._io.size
1215 end = rev * self._io.size
1211 else:
1216 else:
1212 end += rev * self._io.size
1217 end += rev * self._io.size
1213
1218
1214 transaction.add(self.indexfile, end)
1219 transaction.add(self.indexfile, end)
1215
1220
1216 # then reset internal state in memory to forget those revisions
1221 # then reset internal state in memory to forget those revisions
1217 self._cache = None
1222 self._cache = None
1218 self._chunkclear()
1223 self._chunkclear()
1219 for x in xrange(rev, len(self)):
1224 for x in xrange(rev, len(self)):
1220 del self.nodemap[self.node(x)]
1225 del self.nodemap[self.node(x)]
1221
1226
1222 del self.index[rev:-1]
1227 del self.index[rev:-1]
1223
1228
1224 def checksize(self):
1229 def checksize(self):
1225 expected = 0
1230 expected = 0
1226 if len(self):
1231 if len(self):
1227 expected = max(0, self.end(len(self) - 1))
1232 expected = max(0, self.end(len(self) - 1))
1228
1233
1229 try:
1234 try:
1230 f = self.opener(self.datafile)
1235 f = self.opener(self.datafile)
1231 f.seek(0, 2)
1236 f.seek(0, 2)
1232 actual = f.tell()
1237 actual = f.tell()
1233 f.close()
1238 f.close()
1234 dd = actual - expected
1239 dd = actual - expected
1235 except IOError, inst:
1240 except IOError, inst:
1236 if inst.errno != errno.ENOENT:
1241 if inst.errno != errno.ENOENT:
1237 raise
1242 raise
1238 dd = 0
1243 dd = 0
1239
1244
1240 try:
1245 try:
1241 f = self.opener(self.indexfile)
1246 f = self.opener(self.indexfile)
1242 f.seek(0, 2)
1247 f.seek(0, 2)
1243 actual = f.tell()
1248 actual = f.tell()
1244 f.close()
1249 f.close()
1245 s = self._io.size
1250 s = self._io.size
1246 i = max(0, actual // s)
1251 i = max(0, actual // s)
1247 di = actual - (i * s)
1252 di = actual - (i * s)
1248 if self._inline:
1253 if self._inline:
1249 databytes = 0
1254 databytes = 0
1250 for r in self:
1255 for r in self:
1251 databytes += max(0, self.length(r))
1256 databytes += max(0, self.length(r))
1252 dd = 0
1257 dd = 0
1253 di = actual - len(self) * s - databytes
1258 di = actual - len(self) * s - databytes
1254 except IOError, inst:
1259 except IOError, inst:
1255 if inst.errno != errno.ENOENT:
1260 if inst.errno != errno.ENOENT:
1256 raise
1261 raise
1257 di = 0
1262 di = 0
1258
1263
1259 return (dd, di)
1264 return (dd, di)
1260
1265
1261 def files(self):
1266 def files(self):
1262 res = [self.indexfile]
1267 res = [self.indexfile]
1263 if not self._inline:
1268 if not self._inline:
1264 res.append(self.datafile)
1269 res.append(self.datafile)
1265 return res
1270 return res
General Comments 0
You need to be logged in to leave comments. Login now