##// END OF EJS Templates
revlog: add __contains__ for fast membership test...
Yuya Nishihara -
r24030:828dc8db default
parent child Browse files
Show More
@@ -1,380 +1,385 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from node import bin, hex, nullid
9 9 from i18n import _
10 10 import util, error, revlog, encoding
11 11
12 12 _defaultextra = {'branch': 'default'}
13 13
14 14 def _string_escape(text):
15 15 """
16 16 >>> d = {'nl': chr(10), 'bs': chr(92), 'cr': chr(13), 'nul': chr(0)}
17 17 >>> s = "ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
18 18 >>> s
19 19 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
20 20 >>> res = _string_escape(s)
21 21 >>> s == res.decode('string_escape')
22 22 True
23 23 """
24 24 # subset of the string_escape codec
25 25 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
26 26 return text.replace('\0', '\\0')
27 27
28 28 def decodeextra(text):
29 29 """
30 30 >>> sorted(decodeextra(encodeextra({'foo': 'bar', 'baz': chr(0) + '2'})
31 31 ... ).iteritems())
32 32 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
33 33 >>> sorted(decodeextra(encodeextra({'foo': 'bar',
34 34 ... 'baz': chr(92) + chr(0) + '2'})
35 35 ... ).iteritems())
36 36 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
37 37 """
38 38 extra = _defaultextra.copy()
39 39 for l in text.split('\0'):
40 40 if l:
41 41 if '\\0' in l:
42 42 # fix up \0 without getting into trouble with \\0
43 43 l = l.replace('\\\\', '\\\\\n')
44 44 l = l.replace('\\0', '\0')
45 45 l = l.replace('\n', '')
46 46 k, v = l.decode('string_escape').split(':', 1)
47 47 extra[k] = v
48 48 return extra
49 49
50 50 def encodeextra(d):
51 51 # keys must be sorted to produce a deterministic changelog entry
52 52 items = [_string_escape('%s:%s' % (k, d[k])) for k in sorted(d)]
53 53 return "\0".join(items)
54 54
55 55 def stripdesc(desc):
56 56 """strip trailing whitespace and leading and trailing empty lines"""
57 57 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
58 58
59 59 class appender(object):
60 60 '''the changelog index must be updated last on disk, so we use this class
61 61 to delay writes to it'''
62 62 def __init__(self, vfs, name, mode, buf):
63 63 self.data = buf
64 64 fp = vfs(name, mode)
65 65 self.fp = fp
66 66 self.offset = fp.tell()
67 67 self.size = vfs.fstat(fp).st_size
68 68
69 69 def end(self):
70 70 return self.size + len("".join(self.data))
71 71 def tell(self):
72 72 return self.offset
73 73 def flush(self):
74 74 pass
75 75 def close(self):
76 76 self.fp.close()
77 77
78 78 def seek(self, offset, whence=0):
79 79 '''virtual file offset spans real file and data'''
80 80 if whence == 0:
81 81 self.offset = offset
82 82 elif whence == 1:
83 83 self.offset += offset
84 84 elif whence == 2:
85 85 self.offset = self.end() + offset
86 86 if self.offset < self.size:
87 87 self.fp.seek(self.offset)
88 88
89 89 def read(self, count=-1):
90 90 '''only trick here is reads that span real file and data'''
91 91 ret = ""
92 92 if self.offset < self.size:
93 93 s = self.fp.read(count)
94 94 ret = s
95 95 self.offset += len(s)
96 96 if count > 0:
97 97 count -= len(s)
98 98 if count != 0:
99 99 doff = self.offset - self.size
100 100 self.data.insert(0, "".join(self.data))
101 101 del self.data[1:]
102 102 s = self.data[0][doff:doff + count]
103 103 self.offset += len(s)
104 104 ret += s
105 105 return ret
106 106
107 107 def write(self, s):
108 108 self.data.append(str(s))
109 109 self.offset += len(s)
110 110
111 111 def _divertopener(opener, target):
112 112 """build an opener that writes in 'target.a' instead of 'target'"""
113 113 def _divert(name, mode='r'):
114 114 if name != target:
115 115 return opener(name, mode)
116 116 return opener(name + ".a", mode)
117 117 return _divert
118 118
119 119 def _delayopener(opener, target, buf):
120 120 """build an opener that stores chunks in 'buf' instead of 'target'"""
121 121 def _delay(name, mode='r'):
122 122 if name != target:
123 123 return opener(name, mode)
124 124 return appender(opener, name, mode, buf)
125 125 return _delay
126 126
127 127 class changelog(revlog.revlog):
128 128 def __init__(self, opener):
129 129 revlog.revlog.__init__(self, opener, "00changelog.i")
130 130 if self._initempty:
131 131 # changelogs don't benefit from generaldelta
132 132 self.version &= ~revlog.REVLOGGENERALDELTA
133 133 self._generaldelta = False
134 134 self._realopener = opener
135 135 self._delayed = False
136 136 self._delaybuf = None
137 137 self._divert = False
138 138 self.filteredrevs = frozenset()
139 139
140 140 def tip(self):
141 141 """filtered version of revlog.tip"""
142 142 for i in xrange(len(self) -1, -2, -1):
143 143 if i not in self.filteredrevs:
144 144 return self.node(i)
145 145
146 def __contains__(self, rev):
147 """filtered version of revlog.__contains__"""
148 return (revlog.revlog.__contains__(self, rev)
149 and rev not in self.filteredrevs)
150
146 151 def __iter__(self):
147 152 """filtered version of revlog.__iter__"""
148 153 if len(self.filteredrevs) == 0:
149 154 return revlog.revlog.__iter__(self)
150 155
151 156 def filterediter():
152 157 for i in xrange(len(self)):
153 158 if i not in self.filteredrevs:
154 159 yield i
155 160
156 161 return filterediter()
157 162
158 163 def revs(self, start=0, stop=None):
159 164 """filtered version of revlog.revs"""
160 165 for i in super(changelog, self).revs(start, stop):
161 166 if i not in self.filteredrevs:
162 167 yield i
163 168
164 169 @util.propertycache
165 170 def nodemap(self):
166 171 # XXX need filtering too
167 172 self.rev(self.node(0))
168 173 return self._nodecache
169 174
170 175 def hasnode(self, node):
171 176 """filtered version of revlog.hasnode"""
172 177 try:
173 178 i = self.rev(node)
174 179 return i not in self.filteredrevs
175 180 except KeyError:
176 181 return False
177 182
178 183 def headrevs(self):
179 184 if self.filteredrevs:
180 185 try:
181 186 return self.index.headrevsfiltered(self.filteredrevs)
182 187 # AttributeError covers non-c-extension environments and
183 188 # old c extensions without filter handling.
184 189 except AttributeError:
185 190 return self._headrevs()
186 191
187 192 return super(changelog, self).headrevs()
188 193
189 194 def strip(self, *args, **kwargs):
190 195 # XXX make something better than assert
191 196 # We can't expect proper strip behavior if we are filtered.
192 197 assert not self.filteredrevs
193 198 super(changelog, self).strip(*args, **kwargs)
194 199
195 200 def rev(self, node):
196 201 """filtered version of revlog.rev"""
197 202 r = super(changelog, self).rev(node)
198 203 if r in self.filteredrevs:
199 204 raise error.FilteredLookupError(hex(node), self.indexfile,
200 205 _('filtered node'))
201 206 return r
202 207
203 208 def node(self, rev):
204 209 """filtered version of revlog.node"""
205 210 if rev in self.filteredrevs:
206 211 raise error.FilteredIndexError(rev)
207 212 return super(changelog, self).node(rev)
208 213
209 214 def linkrev(self, rev):
210 215 """filtered version of revlog.linkrev"""
211 216 if rev in self.filteredrevs:
212 217 raise error.FilteredIndexError(rev)
213 218 return super(changelog, self).linkrev(rev)
214 219
215 220 def parentrevs(self, rev):
216 221 """filtered version of revlog.parentrevs"""
217 222 if rev in self.filteredrevs:
218 223 raise error.FilteredIndexError(rev)
219 224 return super(changelog, self).parentrevs(rev)
220 225
221 226 def flags(self, rev):
222 227 """filtered version of revlog.flags"""
223 228 if rev in self.filteredrevs:
224 229 raise error.FilteredIndexError(rev)
225 230 return super(changelog, self).flags(rev)
226 231
227 232 def delayupdate(self, tr):
228 233 "delay visibility of index updates to other readers"
229 234
230 235 if not self._delayed:
231 236 if len(self) == 0:
232 237 self._divert = True
233 238 if self._realopener.exists(self.indexfile + '.a'):
234 239 self._realopener.unlink(self.indexfile + '.a')
235 240 self.opener = _divertopener(self._realopener, self.indexfile)
236 241 else:
237 242 self._delaybuf = []
238 243 self.opener = _delayopener(self._realopener, self.indexfile,
239 244 self._delaybuf)
240 245 self._delayed = True
241 246 tr.addpending('cl-%i' % id(self), self._writepending)
242 247 tr.addfinalize('cl-%i' % id(self), self._finalize)
243 248
244 249 def _finalize(self, tr):
245 250 "finalize index updates"
246 251 self._delayed = False
247 252 self.opener = self._realopener
248 253 # move redirected index data back into place
249 254 if self._divert:
250 255 assert not self._delaybuf
251 256 tmpname = self.indexfile + ".a"
252 257 nfile = self.opener.open(tmpname)
253 258 nfile.close()
254 259 self.opener.rename(tmpname, self.indexfile)
255 260 elif self._delaybuf:
256 261 fp = self.opener(self.indexfile, 'a')
257 262 fp.write("".join(self._delaybuf))
258 263 fp.close()
259 264 self._delaybuf = None
260 265 self._divert = False
261 266 # split when we're done
262 267 self.checkinlinesize(tr)
263 268
264 269 def readpending(self, file):
265 270 r = revlog.revlog(self.opener, file)
266 271 self.index = r.index
267 272 self.nodemap = r.nodemap
268 273 self._nodecache = r._nodecache
269 274 self._chunkcache = r._chunkcache
270 275
271 276 def _writepending(self, tr):
272 277 "create a file containing the unfinalized state for pretxnchangegroup"
273 278 if self._delaybuf:
274 279 # make a temporary copy of the index
275 280 fp1 = self._realopener(self.indexfile)
276 281 pendingfilename = self.indexfile + ".a"
277 282 # register as a temp file to ensure cleanup on failure
278 283 tr.registertmp(pendingfilename)
279 284 # write existing data
280 285 fp2 = self._realopener(pendingfilename, "w")
281 286 fp2.write(fp1.read())
282 287 # add pending data
283 288 fp2.write("".join(self._delaybuf))
284 289 fp2.close()
285 290 # switch modes so finalize can simply rename
286 291 self._delaybuf = None
287 292 self._divert = True
288 293 self.opener = _divertopener(self._realopener, self.indexfile)
289 294
290 295 if self._divert:
291 296 return True
292 297
293 298 return False
294 299
295 300 def checkinlinesize(self, tr, fp=None):
296 301 if not self._delayed:
297 302 revlog.revlog.checkinlinesize(self, tr, fp)
298 303
299 304 def read(self, node):
300 305 """
301 306 format used:
302 307 nodeid\n : manifest node in ascii
303 308 user\n : user, no \n or \r allowed
304 309 time tz extra\n : date (time is int or float, timezone is int)
305 310 : extra is metadata, encoded and separated by '\0'
306 311 : older versions ignore it
307 312 files\n\n : files modified by the cset, no \n or \r allowed
308 313 (.*) : comment (free text, ideally utf-8)
309 314
310 315 changelog v0 doesn't use extra
311 316 """
312 317 text = self.revision(node)
313 318 if not text:
314 319 return (nullid, "", (0, 0), [], "", _defaultextra)
315 320 last = text.index("\n\n")
316 321 desc = encoding.tolocal(text[last + 2:])
317 322 l = text[:last].split('\n')
318 323 manifest = bin(l[0])
319 324 user = encoding.tolocal(l[1])
320 325
321 326 tdata = l[2].split(' ', 2)
322 327 if len(tdata) != 3:
323 328 time = float(tdata[0])
324 329 try:
325 330 # various tools did silly things with the time zone field.
326 331 timezone = int(tdata[1])
327 332 except ValueError:
328 333 timezone = 0
329 334 extra = _defaultextra
330 335 else:
331 336 time, timezone = float(tdata[0]), int(tdata[1])
332 337 extra = decodeextra(tdata[2])
333 338
334 339 files = l[3:]
335 340 return (manifest, user, (time, timezone), files, desc, extra)
336 341
337 342 def add(self, manifest, files, desc, transaction, p1, p2,
338 343 user, date=None, extra=None):
339 344 # Convert to UTF-8 encoded bytestrings as the very first
340 345 # thing: calling any method on a localstr object will turn it
341 346 # into a str object and the cached UTF-8 string is thus lost.
342 347 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
343 348
344 349 user = user.strip()
345 350 # An empty username or a username with a "\n" will make the
346 351 # revision text contain two "\n\n" sequences -> corrupt
347 352 # repository since read cannot unpack the revision.
348 353 if not user:
349 354 raise error.RevlogError(_("empty username"))
350 355 if "\n" in user:
351 356 raise error.RevlogError(_("username %s contains a newline")
352 357 % repr(user))
353 358
354 359 desc = stripdesc(desc)
355 360
356 361 if date:
357 362 parseddate = "%d %d" % util.parsedate(date)
358 363 else:
359 364 parseddate = "%d %d" % util.makedate()
360 365 if extra:
361 366 branch = extra.get("branch")
362 367 if branch in ("default", ""):
363 368 del extra["branch"]
364 369 elif branch in (".", "null", "tip"):
365 370 raise error.RevlogError(_('the name \'%s\' is reserved')
366 371 % branch)
367 372 if extra:
368 373 extra = encodeextra(extra)
369 374 parseddate = "%s %s" % (parseddate, extra)
370 375 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
371 376 text = "\n".join(l)
372 377 return self.addrevision(text, transaction, len(self), p1, p2)
373 378
374 379 def branchinfo(self, rev):
375 380 """return the branch name and open/close state of a revision
376 381
377 382 This function exists because creating a changectx object
378 383 just to access this is costly."""
379 384 extra = self.read(rev)[5]
380 385 return encoding.tolocal(extra.get("branch")), 'close' in extra
@@ -1,1541 +1,1543 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, templatefilters
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
38 38 REVIDX_DEFAULT_FLAGS = 0
39 39 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
40 40
41 41 # max size of revlog with inline data
42 42 _maxinline = 131072
43 43 _chunksize = 1048576
44 44
45 45 RevlogError = error.RevlogError
46 46 LookupError = error.LookupError
47 47 CensoredNodeError = error.CensoredNodeError
48 48
49 49 def getoffset(q):
50 50 return int(q >> 16)
51 51
52 52 def gettype(q):
53 53 return int(q & 0xFFFF)
54 54
55 55 def offset_type(offset, type):
56 56 return long(long(offset) << 16 | type)
57 57
58 58 _nullhash = _sha(nullid)
59 59
60 60 def hash(text, p1, p2):
61 61 """generate a hash from the given text and its parent hashes
62 62
63 63 This hash combines both the current file contents and its history
64 64 in a manner that makes it easy to distinguish nodes with the same
65 65 content in the revision graph.
66 66 """
67 67 # As of now, if one of the parent node is null, p2 is null
68 68 if p2 == nullid:
69 69 # deep copy of a hash is faster than creating one
70 70 s = _nullhash.copy()
71 71 s.update(p1)
72 72 else:
73 73 # none of the parent nodes are nullid
74 74 l = [p1, p2]
75 75 l.sort()
76 76 s = _sha(l[0])
77 77 s.update(l[1])
78 78 s.update(text)
79 79 return s.digest()
80 80
81 81 def decompress(bin):
82 82 """ decompress the given input """
83 83 if not bin:
84 84 return bin
85 85 t = bin[0]
86 86 if t == '\0':
87 87 return bin
88 88 if t == 'x':
89 89 try:
90 90 return _decompress(bin)
91 91 except zlib.error, e:
92 92 raise RevlogError(_("revlog decompress error: %s") % str(e))
93 93 if t == 'u':
94 94 return bin[1:]
95 95 raise RevlogError(_("unknown compression type %r") % t)
96 96
97 97 # index v0:
98 98 # 4 bytes: offset
99 99 # 4 bytes: compressed length
100 100 # 4 bytes: base rev
101 101 # 4 bytes: link rev
102 102 # 32 bytes: parent 1 nodeid
103 103 # 32 bytes: parent 2 nodeid
104 104 # 32 bytes: nodeid
105 105 indexformatv0 = ">4l20s20s20s"
106 106 v0shaoffset = 56
107 107
108 108 class revlogoldio(object):
109 109 def __init__(self):
110 110 self.size = struct.calcsize(indexformatv0)
111 111
112 112 def parseindex(self, data, inline):
113 113 s = self.size
114 114 index = []
115 115 nodemap = {nullid: nullrev}
116 116 n = off = 0
117 117 l = len(data)
118 118 while off + s <= l:
119 119 cur = data[off:off + s]
120 120 off += s
121 121 e = _unpack(indexformatv0, cur)
122 122 # transform to revlogv1 format
123 123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
124 124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
125 125 index.append(e2)
126 126 nodemap[e[6]] = n
127 127 n += 1
128 128
129 129 # add the magic null revision at -1
130 130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
131 131
132 132 return index, nodemap, None
133 133
134 134 def packentry(self, entry, node, version, rev):
135 135 if gettype(entry[0]):
136 136 raise RevlogError(_("index entry flags need RevlogNG"))
137 137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
138 138 node(entry[5]), node(entry[6]), entry[7])
139 139 return _pack(indexformatv0, *e2)
140 140
141 141 # index ng:
142 142 # 6 bytes: offset
143 143 # 2 bytes: flags
144 144 # 4 bytes: compressed length
145 145 # 4 bytes: uncompressed length
146 146 # 4 bytes: base rev
147 147 # 4 bytes: link rev
148 148 # 4 bytes: parent 1 rev
149 149 # 4 bytes: parent 2 rev
150 150 # 32 bytes: nodeid
151 151 indexformatng = ">Qiiiiii20s12x"
152 152 ngshaoffset = 32
153 153 versionformat = ">I"
154 154
155 155 class revlogio(object):
156 156 def __init__(self):
157 157 self.size = struct.calcsize(indexformatng)
158 158
159 159 def parseindex(self, data, inline):
160 160 # call the C implementation to parse the index data
161 161 index, cache = parsers.parse_index2(data, inline)
162 162 return index, getattr(index, 'nodemap', None), cache
163 163
164 164 def packentry(self, entry, node, version, rev):
165 165 p = _pack(indexformatng, *entry)
166 166 if rev == 0:
167 167 p = _pack(versionformat, version) + p[4:]
168 168 return p
169 169
170 170 class revlog(object):
171 171 """
172 172 the underlying revision storage object
173 173
174 174 A revlog consists of two parts, an index and the revision data.
175 175
176 176 The index is a file with a fixed record size containing
177 177 information on each revision, including its nodeid (hash), the
178 178 nodeids of its parents, the position and offset of its data within
179 179 the data file, and the revision it's based on. Finally, each entry
180 180 contains a linkrev entry that can serve as a pointer to external
181 181 data.
182 182
183 183 The revision data itself is a linear collection of data chunks.
184 184 Each chunk represents a revision and is usually represented as a
185 185 delta against the previous chunk. To bound lookup time, runs of
186 186 deltas are limited to about 2 times the length of the original
187 187 version data. This makes retrieval of a version proportional to
188 188 its size, or O(1) relative to the number of revisions.
189 189
190 190 Both pieces of the revlog are written to in an append-only
191 191 fashion, which means we never need to rewrite a file to insert or
192 192 remove data, and can use some simple techniques to avoid the need
193 193 for locking while reading.
194 194 """
195 195 def __init__(self, opener, indexfile):
196 196 """
197 197 create a revlog object
198 198
199 199 opener is a function that abstracts the file opening operation
200 200 and can be used to implement COW semantics or the like.
201 201 """
202 202 self.indexfile = indexfile
203 203 self.datafile = indexfile[:-2] + ".d"
204 204 self.opener = opener
205 205 self._cache = None
206 206 self._basecache = None
207 207 self._chunkcache = (0, '')
208 208 self._chunkcachesize = 65536
209 209 self._maxchainlen = None
210 210 self.index = []
211 211 self._pcache = {}
212 212 self._nodecache = {nullid: nullrev}
213 213 self._nodepos = None
214 214
215 215 v = REVLOG_DEFAULT_VERSION
216 216 opts = getattr(opener, 'options', None)
217 217 if opts is not None:
218 218 if 'revlogv1' in opts:
219 219 if 'generaldelta' in opts:
220 220 v |= REVLOGGENERALDELTA
221 221 else:
222 222 v = 0
223 223 if 'chunkcachesize' in opts:
224 224 self._chunkcachesize = opts['chunkcachesize']
225 225 if 'maxchainlen' in opts:
226 226 self._maxchainlen = opts['maxchainlen']
227 227
228 228 if self._chunkcachesize <= 0:
229 229 raise RevlogError(_('revlog chunk cache size %r is not greater '
230 230 'than 0') % self._chunkcachesize)
231 231 elif self._chunkcachesize & (self._chunkcachesize - 1):
232 232 raise RevlogError(_('revlog chunk cache size %r is not a power '
233 233 'of 2') % self._chunkcachesize)
234 234
235 235 i = ''
236 236 self._initempty = True
237 237 try:
238 238 f = self.opener(self.indexfile)
239 239 i = f.read()
240 240 f.close()
241 241 if len(i) > 0:
242 242 v = struct.unpack(versionformat, i[:4])[0]
243 243 self._initempty = False
244 244 except IOError, inst:
245 245 if inst.errno != errno.ENOENT:
246 246 raise
247 247
248 248 self.version = v
249 249 self._inline = v & REVLOGNGINLINEDATA
250 250 self._generaldelta = v & REVLOGGENERALDELTA
251 251 flags = v & ~0xFFFF
252 252 fmt = v & 0xFFFF
253 253 if fmt == REVLOGV0 and flags:
254 254 raise RevlogError(_("index %s unknown flags %#04x for format v0")
255 255 % (self.indexfile, flags >> 16))
256 256 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
257 257 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
258 258 % (self.indexfile, flags >> 16))
259 259 elif fmt > REVLOGNG:
260 260 raise RevlogError(_("index %s unknown format %d")
261 261 % (self.indexfile, fmt))
262 262
263 263 self._io = revlogio()
264 264 if self.version == REVLOGV0:
265 265 self._io = revlogoldio()
266 266 try:
267 267 d = self._io.parseindex(i, self._inline)
268 268 except (ValueError, IndexError):
269 269 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
270 270 self.index, nodemap, self._chunkcache = d
271 271 if nodemap is not None:
272 272 self.nodemap = self._nodecache = nodemap
273 273 if not self._chunkcache:
274 274 self._chunkclear()
275 275 # revnum -> (chain-length, sum-delta-length)
276 276 self._chaininfocache = {}
277 277
278 278 def tip(self):
279 279 return self.node(len(self.index) - 2)
280 def __contains__(self, rev):
281 return 0 <= rev < len(self)
280 282 def __len__(self):
281 283 return len(self.index) - 1
282 284 def __iter__(self):
283 285 return iter(xrange(len(self)))
284 286 def revs(self, start=0, stop=None):
285 287 """iterate over all rev in this revlog (from start to stop)"""
286 288 step = 1
287 289 if stop is not None:
288 290 if start > stop:
289 291 step = -1
290 292 stop += step
291 293 else:
292 294 stop = len(self)
293 295 return xrange(start, stop, step)
294 296
295 297 @util.propertycache
296 298 def nodemap(self):
297 299 self.rev(self.node(0))
298 300 return self._nodecache
299 301
300 302 def hasnode(self, node):
301 303 try:
302 304 self.rev(node)
303 305 return True
304 306 except KeyError:
305 307 return False
306 308
307 309 def clearcaches(self):
308 310 try:
309 311 self._nodecache.clearcaches()
310 312 except AttributeError:
311 313 self._nodecache = {nullid: nullrev}
312 314 self._nodepos = None
313 315
314 316 def rev(self, node):
315 317 try:
316 318 return self._nodecache[node]
317 319 except TypeError:
318 320 raise
319 321 except RevlogError:
320 322 # parsers.c radix tree lookup failed
321 323 raise LookupError(node, self.indexfile, _('no node'))
322 324 except KeyError:
323 325 # pure python cache lookup failed
324 326 n = self._nodecache
325 327 i = self.index
326 328 p = self._nodepos
327 329 if p is None:
328 330 p = len(i) - 2
329 331 for r in xrange(p, -1, -1):
330 332 v = i[r][7]
331 333 n[v] = r
332 334 if v == node:
333 335 self._nodepos = r - 1
334 336 return r
335 337 raise LookupError(node, self.indexfile, _('no node'))
336 338
337 339 def node(self, rev):
338 340 return self.index[rev][7]
339 341 def linkrev(self, rev):
340 342 return self.index[rev][4]
341 343 def parents(self, node):
342 344 i = self.index
343 345 d = i[self.rev(node)]
344 346 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
345 347 def parentrevs(self, rev):
346 348 return self.index[rev][5:7]
347 349 def start(self, rev):
348 350 return int(self.index[rev][0] >> 16)
349 351 def end(self, rev):
350 352 return self.start(rev) + self.length(rev)
351 353 def length(self, rev):
352 354 return self.index[rev][1]
353 355 def chainbase(self, rev):
354 356 index = self.index
355 357 base = index[rev][3]
356 358 while base != rev:
357 359 rev = base
358 360 base = index[rev][3]
359 361 return base
360 362 def chainlen(self, rev):
361 363 return self._chaininfo(rev)[0]
362 364
363 365 def _chaininfo(self, rev):
364 366 chaininfocache = self._chaininfocache
365 367 if rev in chaininfocache:
366 368 return chaininfocache[rev]
367 369 index = self.index
368 370 generaldelta = self._generaldelta
369 371 iterrev = rev
370 372 e = index[iterrev]
371 373 clen = 0
372 374 compresseddeltalen = 0
373 375 while iterrev != e[3]:
374 376 clen += 1
375 377 compresseddeltalen += e[1]
376 378 if generaldelta:
377 379 iterrev = e[3]
378 380 else:
379 381 iterrev -= 1
380 382 if iterrev in chaininfocache:
381 383 t = chaininfocache[iterrev]
382 384 clen += t[0]
383 385 compresseddeltalen += t[1]
384 386 break
385 387 e = index[iterrev]
386 388 else:
387 389 # Add text length of base since decompressing that also takes
388 390 # work. For cache hits the length is already included.
389 391 compresseddeltalen += e[1]
390 392 r = (clen, compresseddeltalen)
391 393 chaininfocache[rev] = r
392 394 return r
393 395
394 396 def flags(self, rev):
395 397 return self.index[rev][0] & 0xFFFF
396 398 def rawsize(self, rev):
397 399 """return the length of the uncompressed text for a given revision"""
398 400 l = self.index[rev][2]
399 401 if l >= 0:
400 402 return l
401 403
402 404 t = self.revision(self.node(rev))
403 405 return len(t)
404 406 size = rawsize
405 407
406 408 def ancestors(self, revs, stoprev=0, inclusive=False):
407 409 """Generate the ancestors of 'revs' in reverse topological order.
408 410 Does not generate revs lower than stoprev.
409 411
410 412 See the documentation for ancestor.lazyancestors for more details."""
411 413
412 414 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
413 415 inclusive=inclusive)
414 416
415 417 def descendants(self, revs):
416 418 """Generate the descendants of 'revs' in revision order.
417 419
418 420 Yield a sequence of revision numbers starting with a child of
419 421 some rev in revs, i.e., each revision is *not* considered a
420 422 descendant of itself. Results are ordered by revision number (a
421 423 topological sort)."""
422 424 first = min(revs)
423 425 if first == nullrev:
424 426 for i in self:
425 427 yield i
426 428 return
427 429
428 430 seen = set(revs)
429 431 for i in self.revs(start=first + 1):
430 432 for x in self.parentrevs(i):
431 433 if x != nullrev and x in seen:
432 434 seen.add(i)
433 435 yield i
434 436 break
435 437
436 438 def findcommonmissing(self, common=None, heads=None):
437 439 """Return a tuple of the ancestors of common and the ancestors of heads
438 440 that are not ancestors of common. In revset terminology, we return the
439 441 tuple:
440 442
441 443 ::common, (::heads) - (::common)
442 444
443 445 The list is sorted by revision number, meaning it is
444 446 topologically sorted.
445 447
446 448 'heads' and 'common' are both lists of node IDs. If heads is
447 449 not supplied, uses all of the revlog's heads. If common is not
448 450 supplied, uses nullid."""
449 451 if common is None:
450 452 common = [nullid]
451 453 if heads is None:
452 454 heads = self.heads()
453 455
454 456 common = [self.rev(n) for n in common]
455 457 heads = [self.rev(n) for n in heads]
456 458
457 459 # we want the ancestors, but inclusive
458 460 class lazyset(object):
459 461 def __init__(self, lazyvalues):
460 462 self.addedvalues = set()
461 463 self.lazyvalues = lazyvalues
462 464
463 465 def __contains__(self, value):
464 466 return value in self.addedvalues or value in self.lazyvalues
465 467
466 468 def __iter__(self):
467 469 added = self.addedvalues
468 470 for r in added:
469 471 yield r
470 472 for r in self.lazyvalues:
471 473 if not r in added:
472 474 yield r
473 475
474 476 def add(self, value):
475 477 self.addedvalues.add(value)
476 478
477 479 def update(self, values):
478 480 self.addedvalues.update(values)
479 481
480 482 has = lazyset(self.ancestors(common))
481 483 has.add(nullrev)
482 484 has.update(common)
483 485
484 486 # take all ancestors from heads that aren't in has
485 487 missing = set()
486 488 visit = util.deque(r for r in heads if r not in has)
487 489 while visit:
488 490 r = visit.popleft()
489 491 if r in missing:
490 492 continue
491 493 else:
492 494 missing.add(r)
493 495 for p in self.parentrevs(r):
494 496 if p not in has:
495 497 visit.append(p)
496 498 missing = list(missing)
497 499 missing.sort()
498 500 return has, [self.node(r) for r in missing]
499 501
500 502 def incrementalmissingrevs(self, common=None):
501 503 """Return an object that can be used to incrementally compute the
502 504 revision numbers of the ancestors of arbitrary sets that are not
503 505 ancestors of common. This is an ancestor.incrementalmissingancestors
504 506 object.
505 507
506 508 'common' is a list of revision numbers. If common is not supplied, uses
507 509 nullrev.
508 510 """
509 511 if common is None:
510 512 common = [nullrev]
511 513
512 514 return ancestor.incrementalmissingancestors(self.parentrevs, common)
513 515
514 516 def findmissingrevs(self, common=None, heads=None):
515 517 """Return the revision numbers of the ancestors of heads that
516 518 are not ancestors of common.
517 519
518 520 More specifically, return a list of revision numbers corresponding to
519 521 nodes N such that every N satisfies the following constraints:
520 522
521 523 1. N is an ancestor of some node in 'heads'
522 524 2. N is not an ancestor of any node in 'common'
523 525
524 526 The list is sorted by revision number, meaning it is
525 527 topologically sorted.
526 528
527 529 'heads' and 'common' are both lists of revision numbers. If heads is
528 530 not supplied, uses all of the revlog's heads. If common is not
529 531 supplied, uses nullid."""
530 532 if common is None:
531 533 common = [nullrev]
532 534 if heads is None:
533 535 heads = self.headrevs()
534 536
535 537 inc = self.incrementalmissingrevs(common=common)
536 538 return inc.missingancestors(heads)
537 539
538 540 def findmissing(self, common=None, heads=None):
539 541 """Return the ancestors of heads that are not ancestors of common.
540 542
541 543 More specifically, return a list of nodes N such that every N
542 544 satisfies the following constraints:
543 545
544 546 1. N is an ancestor of some node in 'heads'
545 547 2. N is not an ancestor of any node in 'common'
546 548
547 549 The list is sorted by revision number, meaning it is
548 550 topologically sorted.
549 551
550 552 'heads' and 'common' are both lists of node IDs. If heads is
551 553 not supplied, uses all of the revlog's heads. If common is not
552 554 supplied, uses nullid."""
553 555 if common is None:
554 556 common = [nullid]
555 557 if heads is None:
556 558 heads = self.heads()
557 559
558 560 common = [self.rev(n) for n in common]
559 561 heads = [self.rev(n) for n in heads]
560 562
561 563 inc = self.incrementalmissingrevs(common=common)
562 564 return [self.node(r) for r in inc.missingancestors(heads)]
563 565
564 566 def nodesbetween(self, roots=None, heads=None):
565 567 """Return a topological path from 'roots' to 'heads'.
566 568
567 569 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
568 570 topologically sorted list of all nodes N that satisfy both of
569 571 these constraints:
570 572
571 573 1. N is a descendant of some node in 'roots'
572 574 2. N is an ancestor of some node in 'heads'
573 575
574 576 Every node is considered to be both a descendant and an ancestor
575 577 of itself, so every reachable node in 'roots' and 'heads' will be
576 578 included in 'nodes'.
577 579
578 580 'outroots' is the list of reachable nodes in 'roots', i.e., the
579 581 subset of 'roots' that is returned in 'nodes'. Likewise,
580 582 'outheads' is the subset of 'heads' that is also in 'nodes'.
581 583
582 584 'roots' and 'heads' are both lists of node IDs. If 'roots' is
583 585 unspecified, uses nullid as the only root. If 'heads' is
584 586 unspecified, uses list of all of the revlog's heads."""
585 587 nonodes = ([], [], [])
586 588 if roots is not None:
587 589 roots = list(roots)
588 590 if not roots:
589 591 return nonodes
590 592 lowestrev = min([self.rev(n) for n in roots])
591 593 else:
592 594 roots = [nullid] # Everybody's a descendant of nullid
593 595 lowestrev = nullrev
594 596 if (lowestrev == nullrev) and (heads is None):
595 597 # We want _all_ the nodes!
596 598 return ([self.node(r) for r in self], [nullid], list(self.heads()))
597 599 if heads is None:
598 600 # All nodes are ancestors, so the latest ancestor is the last
599 601 # node.
600 602 highestrev = len(self) - 1
601 603 # Set ancestors to None to signal that every node is an ancestor.
602 604 ancestors = None
603 605 # Set heads to an empty dictionary for later discovery of heads
604 606 heads = {}
605 607 else:
606 608 heads = list(heads)
607 609 if not heads:
608 610 return nonodes
609 611 ancestors = set()
610 612 # Turn heads into a dictionary so we can remove 'fake' heads.
611 613 # Also, later we will be using it to filter out the heads we can't
612 614 # find from roots.
613 615 heads = dict.fromkeys(heads, False)
614 616 # Start at the top and keep marking parents until we're done.
615 617 nodestotag = set(heads)
616 618 # Remember where the top was so we can use it as a limit later.
617 619 highestrev = max([self.rev(n) for n in nodestotag])
618 620 while nodestotag:
619 621 # grab a node to tag
620 622 n = nodestotag.pop()
621 623 # Never tag nullid
622 624 if n == nullid:
623 625 continue
624 626 # A node's revision number represents its place in a
625 627 # topologically sorted list of nodes.
626 628 r = self.rev(n)
627 629 if r >= lowestrev:
628 630 if n not in ancestors:
629 631 # If we are possibly a descendant of one of the roots
630 632 # and we haven't already been marked as an ancestor
631 633 ancestors.add(n) # Mark as ancestor
632 634 # Add non-nullid parents to list of nodes to tag.
633 635 nodestotag.update([p for p in self.parents(n) if
634 636 p != nullid])
635 637 elif n in heads: # We've seen it before, is it a fake head?
636 638 # So it is, real heads should not be the ancestors of
637 639 # any other heads.
638 640 heads.pop(n)
639 641 if not ancestors:
640 642 return nonodes
641 643 # Now that we have our set of ancestors, we want to remove any
642 644 # roots that are not ancestors.
643 645
644 646 # If one of the roots was nullid, everything is included anyway.
645 647 if lowestrev > nullrev:
646 648 # But, since we weren't, let's recompute the lowest rev to not
647 649 # include roots that aren't ancestors.
648 650
649 651 # Filter out roots that aren't ancestors of heads
650 652 roots = [n for n in roots if n in ancestors]
651 653 # Recompute the lowest revision
652 654 if roots:
653 655 lowestrev = min([self.rev(n) for n in roots])
654 656 else:
655 657 # No more roots? Return empty list
656 658 return nonodes
657 659 else:
658 660 # We are descending from nullid, and don't need to care about
659 661 # any other roots.
660 662 lowestrev = nullrev
661 663 roots = [nullid]
662 664 # Transform our roots list into a set.
663 665 descendants = set(roots)
664 666 # Also, keep the original roots so we can filter out roots that aren't
665 667 # 'real' roots (i.e. are descended from other roots).
666 668 roots = descendants.copy()
667 669 # Our topologically sorted list of output nodes.
668 670 orderedout = []
669 671 # Don't start at nullid since we don't want nullid in our output list,
670 672 # and if nullid shows up in descendants, empty parents will look like
671 673 # they're descendants.
672 674 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
673 675 n = self.node(r)
674 676 isdescendant = False
675 677 if lowestrev == nullrev: # Everybody is a descendant of nullid
676 678 isdescendant = True
677 679 elif n in descendants:
678 680 # n is already a descendant
679 681 isdescendant = True
680 682 # This check only needs to be done here because all the roots
681 683 # will start being marked is descendants before the loop.
682 684 if n in roots:
683 685 # If n was a root, check if it's a 'real' root.
684 686 p = tuple(self.parents(n))
685 687 # If any of its parents are descendants, it's not a root.
686 688 if (p[0] in descendants) or (p[1] in descendants):
687 689 roots.remove(n)
688 690 else:
689 691 p = tuple(self.parents(n))
690 692 # A node is a descendant if either of its parents are
691 693 # descendants. (We seeded the dependents list with the roots
692 694 # up there, remember?)
693 695 if (p[0] in descendants) or (p[1] in descendants):
694 696 descendants.add(n)
695 697 isdescendant = True
696 698 if isdescendant and ((ancestors is None) or (n in ancestors)):
697 699 # Only include nodes that are both descendants and ancestors.
698 700 orderedout.append(n)
699 701 if (ancestors is not None) and (n in heads):
700 702 # We're trying to figure out which heads are reachable
701 703 # from roots.
702 704 # Mark this head as having been reached
703 705 heads[n] = True
704 706 elif ancestors is None:
705 707 # Otherwise, we're trying to discover the heads.
706 708 # Assume this is a head because if it isn't, the next step
707 709 # will eventually remove it.
708 710 heads[n] = True
709 711 # But, obviously its parents aren't.
710 712 for p in self.parents(n):
711 713 heads.pop(p, None)
712 714 heads = [n for n, flag in heads.iteritems() if flag]
713 715 roots = list(roots)
714 716 assert orderedout
715 717 assert roots
716 718 assert heads
717 719 return (orderedout, roots, heads)
718 720
719 721 def headrevs(self):
720 722 try:
721 723 return self.index.headrevs()
722 724 except AttributeError:
723 725 return self._headrevs()
724 726
725 727 def _headrevs(self):
726 728 count = len(self)
727 729 if not count:
728 730 return [nullrev]
729 731 # we won't iter over filtered rev so nobody is a head at start
730 732 ishead = [0] * (count + 1)
731 733 index = self.index
732 734 for r in self:
733 735 ishead[r] = 1 # I may be an head
734 736 e = index[r]
735 737 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
736 738 return [r for r, val in enumerate(ishead) if val]
737 739
738 740 def heads(self, start=None, stop=None):
739 741 """return the list of all nodes that have no children
740 742
741 743 if start is specified, only heads that are descendants of
742 744 start will be returned
743 745 if stop is specified, it will consider all the revs from stop
744 746 as if they had no children
745 747 """
746 748 if start is None and stop is None:
747 749 if not len(self):
748 750 return [nullid]
749 751 return [self.node(r) for r in self.headrevs()]
750 752
751 753 if start is None:
752 754 start = nullid
753 755 if stop is None:
754 756 stop = []
755 757 stoprevs = set([self.rev(n) for n in stop])
756 758 startrev = self.rev(start)
757 759 reachable = set((startrev,))
758 760 heads = set((startrev,))
759 761
760 762 parentrevs = self.parentrevs
761 763 for r in self.revs(start=startrev + 1):
762 764 for p in parentrevs(r):
763 765 if p in reachable:
764 766 if r not in stoprevs:
765 767 reachable.add(r)
766 768 heads.add(r)
767 769 if p in heads and p not in stoprevs:
768 770 heads.remove(p)
769 771
770 772 return [self.node(r) for r in heads]
771 773
772 774 def children(self, node):
773 775 """find the children of a given node"""
774 776 c = []
775 777 p = self.rev(node)
776 778 for r in self.revs(start=p + 1):
777 779 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
778 780 if prevs:
779 781 for pr in prevs:
780 782 if pr == p:
781 783 c.append(self.node(r))
782 784 elif p == nullrev:
783 785 c.append(self.node(r))
784 786 return c
785 787
786 788 def descendant(self, start, end):
787 789 if start == nullrev:
788 790 return True
789 791 for i in self.descendants([start]):
790 792 if i == end:
791 793 return True
792 794 elif i > end:
793 795 break
794 796 return False
795 797
796 798 def commonancestorsheads(self, a, b):
797 799 """calculate all the heads of the common ancestors of nodes a and b"""
798 800 a, b = self.rev(a), self.rev(b)
799 801 try:
800 802 ancs = self.index.commonancestorsheads(a, b)
801 803 except (AttributeError, OverflowError): # C implementation failed
802 804 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
803 805 return map(self.node, ancs)
804 806
805 807 def isancestor(self, a, b):
806 808 """return True if node a is an ancestor of node b
807 809
808 810 The implementation of this is trivial but the use of
809 811 commonancestorsheads is not."""
810 812 return a in self.commonancestorsheads(a, b)
811 813
812 814 def ancestor(self, a, b):
813 815 """calculate the "best" common ancestor of nodes a and b"""
814 816
815 817 a, b = self.rev(a), self.rev(b)
816 818 try:
817 819 ancs = self.index.ancestors(a, b)
818 820 except (AttributeError, OverflowError):
819 821 ancs = ancestor.ancestors(self.parentrevs, a, b)
820 822 if ancs:
821 823 # choose a consistent winner when there's a tie
822 824 return min(map(self.node, ancs))
823 825 return nullid
824 826
825 827 def _match(self, id):
826 828 if isinstance(id, int):
827 829 # rev
828 830 return self.node(id)
829 831 if len(id) == 20:
830 832 # possibly a binary node
831 833 # odds of a binary node being all hex in ASCII are 1 in 10**25
832 834 try:
833 835 node = id
834 836 self.rev(node) # quick search the index
835 837 return node
836 838 except LookupError:
837 839 pass # may be partial hex id
838 840 try:
839 841 # str(rev)
840 842 rev = int(id)
841 843 if str(rev) != id:
842 844 raise ValueError
843 845 if rev < 0:
844 846 rev = len(self) + rev
845 847 if rev < 0 or rev >= len(self):
846 848 raise ValueError
847 849 return self.node(rev)
848 850 except (ValueError, OverflowError):
849 851 pass
850 852 if len(id) == 40:
851 853 try:
852 854 # a full hex nodeid?
853 855 node = bin(id)
854 856 self.rev(node)
855 857 return node
856 858 except (TypeError, LookupError):
857 859 pass
858 860
859 861 def _partialmatch(self, id):
860 862 try:
861 863 n = self.index.partialmatch(id)
862 864 if n and self.hasnode(n):
863 865 return n
864 866 return None
865 867 except RevlogError:
866 868 # parsers.c radix tree lookup gave multiple matches
867 869 # fall through to slow path that filters hidden revisions
868 870 pass
869 871 except (AttributeError, ValueError):
870 872 # we are pure python, or key was too short to search radix tree
871 873 pass
872 874
873 875 if id in self._pcache:
874 876 return self._pcache[id]
875 877
876 878 if len(id) < 40:
877 879 try:
878 880 # hex(node)[:...]
879 881 l = len(id) // 2 # grab an even number of digits
880 882 prefix = bin(id[:l * 2])
881 883 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
882 884 nl = [n for n in nl if hex(n).startswith(id) and
883 885 self.hasnode(n)]
884 886 if len(nl) > 0:
885 887 if len(nl) == 1:
886 888 self._pcache[id] = nl[0]
887 889 return nl[0]
888 890 raise LookupError(id, self.indexfile,
889 891 _('ambiguous identifier'))
890 892 return None
891 893 except TypeError:
892 894 pass
893 895
894 896 def lookup(self, id):
895 897 """locate a node based on:
896 898 - revision number or str(revision number)
897 899 - nodeid or subset of hex nodeid
898 900 """
899 901 n = self._match(id)
900 902 if n is not None:
901 903 return n
902 904 n = self._partialmatch(id)
903 905 if n:
904 906 return n
905 907
906 908 raise LookupError(id, self.indexfile, _('no match found'))
907 909
908 910 def cmp(self, node, text):
909 911 """compare text with a given file revision
910 912
911 913 returns True if text is different than what is stored.
912 914 """
913 915 p1, p2 = self.parents(node)
914 916 return hash(text, p1, p2) != node
915 917
916 918 def _addchunk(self, offset, data):
917 919 o, d = self._chunkcache
918 920 # try to add to existing cache
919 921 if o + len(d) == offset and len(d) + len(data) < _chunksize:
920 922 self._chunkcache = o, d + data
921 923 else:
922 924 self._chunkcache = offset, data
923 925
924 926 def _loadchunk(self, offset, length):
925 927 if self._inline:
926 928 df = self.opener(self.indexfile)
927 929 else:
928 930 df = self.opener(self.datafile)
929 931
930 932 # Cache data both forward and backward around the requested
931 933 # data, in a fixed size window. This helps speed up operations
932 934 # involving reading the revlog backwards.
933 935 cachesize = self._chunkcachesize
934 936 realoffset = offset & ~(cachesize - 1)
935 937 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
936 938 - realoffset)
937 939 df.seek(realoffset)
938 940 d = df.read(reallength)
939 941 df.close()
940 942 self._addchunk(realoffset, d)
941 943 if offset != realoffset or reallength != length:
942 944 return util.buffer(d, offset - realoffset, length)
943 945 return d
944 946
945 947 def _getchunk(self, offset, length):
946 948 o, d = self._chunkcache
947 949 l = len(d)
948 950
949 951 # is it in the cache?
950 952 cachestart = offset - o
951 953 cacheend = cachestart + length
952 954 if cachestart >= 0 and cacheend <= l:
953 955 if cachestart == 0 and cacheend == l:
954 956 return d # avoid a copy
955 957 return util.buffer(d, cachestart, cacheend - cachestart)
956 958
957 959 return self._loadchunk(offset, length)
958 960
959 961 def _chunkraw(self, startrev, endrev):
960 962 start = self.start(startrev)
961 963 end = self.end(endrev)
962 964 if self._inline:
963 965 start += (startrev + 1) * self._io.size
964 966 end += (endrev + 1) * self._io.size
965 967 length = end - start
966 968 return self._getchunk(start, length)
967 969
968 970 def _chunk(self, rev):
969 971 return decompress(self._chunkraw(rev, rev))
970 972
971 973 def _chunks(self, revs):
972 974 '''faster version of [self._chunk(rev) for rev in revs]
973 975
974 976 Assumes that revs is in ascending order.'''
975 977 if not revs:
976 978 return []
977 979 start = self.start
978 980 length = self.length
979 981 inline = self._inline
980 982 iosize = self._io.size
981 983 buffer = util.buffer
982 984
983 985 l = []
984 986 ladd = l.append
985 987
986 988 # preload the cache
987 989 try:
988 990 while True:
989 991 # ensure that the cache doesn't change out from under us
990 992 _cache = self._chunkcache
991 993 self._chunkraw(revs[0], revs[-1])
992 994 if _cache == self._chunkcache:
993 995 break
994 996 offset, data = _cache
995 997 except OverflowError:
996 998 # issue4215 - we can't cache a run of chunks greater than
997 999 # 2G on Windows
998 1000 return [self._chunk(rev) for rev in revs]
999 1001
1000 1002 for rev in revs:
1001 1003 chunkstart = start(rev)
1002 1004 if inline:
1003 1005 chunkstart += (rev + 1) * iosize
1004 1006 chunklength = length(rev)
1005 1007 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1006 1008
1007 1009 return l
1008 1010
1009 1011 def _chunkclear(self):
1010 1012 self._chunkcache = (0, '')
1011 1013
1012 1014 def deltaparent(self, rev):
1013 1015 """return deltaparent of the given revision"""
1014 1016 base = self.index[rev][3]
1015 1017 if base == rev:
1016 1018 return nullrev
1017 1019 elif self._generaldelta:
1018 1020 return base
1019 1021 else:
1020 1022 return rev - 1
1021 1023
1022 1024 def revdiff(self, rev1, rev2):
1023 1025 """return or calculate a delta between two revisions"""
1024 1026 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1025 1027 return str(self._chunk(rev2))
1026 1028
1027 1029 return mdiff.textdiff(self.revision(rev1),
1028 1030 self.revision(rev2))
1029 1031
1030 1032 def revision(self, nodeorrev):
1031 1033 """return an uncompressed revision of a given node or revision
1032 1034 number.
1033 1035 """
1034 1036 if isinstance(nodeorrev, int):
1035 1037 rev = nodeorrev
1036 1038 node = self.node(rev)
1037 1039 else:
1038 1040 node = nodeorrev
1039 1041 rev = None
1040 1042
1041 1043 _cache = self._cache # grab local copy of cache to avoid thread race
1042 1044 cachedrev = None
1043 1045 if node == nullid:
1044 1046 return ""
1045 1047 if _cache:
1046 1048 if _cache[0] == node:
1047 1049 return _cache[2]
1048 1050 cachedrev = _cache[1]
1049 1051
1050 1052 # look up what we need to read
1051 1053 text = None
1052 1054 if rev is None:
1053 1055 rev = self.rev(node)
1054 1056
1055 1057 # check rev flags
1056 1058 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1057 1059 raise RevlogError(_('incompatible revision flag %x') %
1058 1060 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1059 1061
1060 1062 # build delta chain
1061 1063 chain = []
1062 1064 index = self.index # for performance
1063 1065 generaldelta = self._generaldelta
1064 1066 iterrev = rev
1065 1067 e = index[iterrev]
1066 1068 while iterrev != e[3] and iterrev != cachedrev:
1067 1069 chain.append(iterrev)
1068 1070 if generaldelta:
1069 1071 iterrev = e[3]
1070 1072 else:
1071 1073 iterrev -= 1
1072 1074 e = index[iterrev]
1073 1075
1074 1076 if iterrev == cachedrev:
1075 1077 # cache hit
1076 1078 text = _cache[2]
1077 1079 else:
1078 1080 chain.append(iterrev)
1079 1081 chain.reverse()
1080 1082
1081 1083 # drop cache to save memory
1082 1084 self._cache = None
1083 1085
1084 1086 bins = self._chunks(chain)
1085 1087 if text is None:
1086 1088 text = str(bins[0])
1087 1089 bins = bins[1:]
1088 1090
1089 1091 text = mdiff.patches(text, bins)
1090 1092
1091 1093 text = self._checkhash(text, node, rev)
1092 1094
1093 1095 self._cache = (node, rev, text)
1094 1096 return text
1095 1097
1096 1098 def hash(self, text, p1, p2):
1097 1099 """Compute a node hash.
1098 1100
1099 1101 Available as a function so that subclasses can replace the hash
1100 1102 as needed.
1101 1103 """
1102 1104 return hash(text, p1, p2)
1103 1105
1104 1106 def _checkhash(self, text, node, rev):
1105 1107 p1, p2 = self.parents(node)
1106 1108 self.checkhash(text, p1, p2, node, rev)
1107 1109 return text
1108 1110
1109 1111 def checkhash(self, text, p1, p2, node, rev=None):
1110 1112 if node != self.hash(text, p1, p2):
1111 1113 revornode = rev
1112 1114 if revornode is None:
1113 1115 revornode = templatefilters.short(hex(node))
1114 1116 raise RevlogError(_("integrity check failed on %s:%s")
1115 1117 % (self.indexfile, revornode))
1116 1118
1117 1119 def checkinlinesize(self, tr, fp=None):
1118 1120 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1119 1121 return
1120 1122
1121 1123 trinfo = tr.find(self.indexfile)
1122 1124 if trinfo is None:
1123 1125 raise RevlogError(_("%s not found in the transaction")
1124 1126 % self.indexfile)
1125 1127
1126 1128 trindex = trinfo[2]
1127 1129 dataoff = self.start(trindex)
1128 1130
1129 1131 tr.add(self.datafile, dataoff)
1130 1132
1131 1133 if fp:
1132 1134 fp.flush()
1133 1135 fp.close()
1134 1136
1135 1137 df = self.opener(self.datafile, 'w')
1136 1138 try:
1137 1139 for r in self:
1138 1140 df.write(self._chunkraw(r, r))
1139 1141 finally:
1140 1142 df.close()
1141 1143
1142 1144 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1143 1145 self.version &= ~(REVLOGNGINLINEDATA)
1144 1146 self._inline = False
1145 1147 for i in self:
1146 1148 e = self._io.packentry(self.index[i], self.node, self.version, i)
1147 1149 fp.write(e)
1148 1150
1149 1151 # if we don't call close, the temp file will never replace the
1150 1152 # real index
1151 1153 fp.close()
1152 1154
1153 1155 tr.replace(self.indexfile, trindex * self._io.size)
1154 1156 self._chunkclear()
1155 1157
1156 1158 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1157 1159 node=None):
1158 1160 """add a revision to the log
1159 1161
1160 1162 text - the revision data to add
1161 1163 transaction - the transaction object used for rollback
1162 1164 link - the linkrev data to add
1163 1165 p1, p2 - the parent nodeids of the revision
1164 1166 cachedelta - an optional precomputed delta
1165 1167 node - nodeid of revision; typically node is not specified, and it is
1166 1168 computed by default as hash(text, p1, p2), however subclasses might
1167 1169 use different hashing method (and override checkhash() in such case)
1168 1170 """
1169 1171 if link == nullrev:
1170 1172 raise RevlogError(_("attempted to add linkrev -1 to %s")
1171 1173 % self.indexfile)
1172 1174 node = node or self.hash(text, p1, p2)
1173 1175 if node in self.nodemap:
1174 1176 return node
1175 1177
1176 1178 dfh = None
1177 1179 if not self._inline:
1178 1180 dfh = self.opener(self.datafile, "a")
1179 1181 ifh = self.opener(self.indexfile, "a+")
1180 1182 try:
1181 1183 return self._addrevision(node, text, transaction, link, p1, p2,
1182 1184 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1183 1185 finally:
1184 1186 if dfh:
1185 1187 dfh.close()
1186 1188 ifh.close()
1187 1189
1188 1190 def compress(self, text):
1189 1191 """ generate a possibly-compressed representation of text """
1190 1192 if not text:
1191 1193 return ("", text)
1192 1194 l = len(text)
1193 1195 bin = None
1194 1196 if l < 44:
1195 1197 pass
1196 1198 elif l > 1000000:
1197 1199 # zlib makes an internal copy, thus doubling memory usage for
1198 1200 # large files, so lets do this in pieces
1199 1201 z = zlib.compressobj()
1200 1202 p = []
1201 1203 pos = 0
1202 1204 while pos < l:
1203 1205 pos2 = pos + 2**20
1204 1206 p.append(z.compress(text[pos:pos2]))
1205 1207 pos = pos2
1206 1208 p.append(z.flush())
1207 1209 if sum(map(len, p)) < l:
1208 1210 bin = "".join(p)
1209 1211 else:
1210 1212 bin = _compress(text)
1211 1213 if bin is None or len(bin) > l:
1212 1214 if text[0] == '\0':
1213 1215 return ("", text)
1214 1216 return ('u', text)
1215 1217 return ("", bin)
1216 1218
1217 1219 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1218 1220 cachedelta, ifh, dfh):
1219 1221 """internal function to add revisions to the log
1220 1222
1221 1223 see addrevision for argument descriptions.
1222 1224 invariants:
1223 1225 - text is optional (can be None); if not set, cachedelta must be set.
1224 1226 if both are set, they must correspond to each other.
1225 1227 """
1226 1228 btext = [text]
1227 1229 def buildtext():
1228 1230 if btext[0] is not None:
1229 1231 return btext[0]
1230 1232 # flush any pending writes here so we can read it in revision
1231 1233 if dfh:
1232 1234 dfh.flush()
1233 1235 ifh.flush()
1234 1236 basetext = self.revision(self.node(cachedelta[0]))
1235 1237 btext[0] = mdiff.patch(basetext, cachedelta[1])
1236 1238 try:
1237 1239 self.checkhash(btext[0], p1, p2, node)
1238 1240 if flags & REVIDX_ISCENSORED:
1239 1241 raise RevlogError(_('node %s is not censored') % node)
1240 1242 except CensoredNodeError:
1241 1243 # must pass the censored index flag to add censored revisions
1242 1244 if not flags & REVIDX_ISCENSORED:
1243 1245 raise
1244 1246 return btext[0]
1245 1247
1246 1248 def builddelta(rev):
1247 1249 # can we use the cached delta?
1248 1250 if cachedelta and cachedelta[0] == rev:
1249 1251 delta = cachedelta[1]
1250 1252 else:
1251 1253 t = buildtext()
1252 1254 ptext = self.revision(self.node(rev))
1253 1255 delta = mdiff.textdiff(ptext, t)
1254 1256 data = self.compress(delta)
1255 1257 l = len(data[1]) + len(data[0])
1256 1258 if basecache[0] == rev:
1257 1259 chainbase = basecache[1]
1258 1260 else:
1259 1261 chainbase = self.chainbase(rev)
1260 1262 dist = l + offset - self.start(chainbase)
1261 1263 if self._generaldelta:
1262 1264 base = rev
1263 1265 else:
1264 1266 base = chainbase
1265 1267 chainlen, compresseddeltalen = self._chaininfo(rev)
1266 1268 chainlen += 1
1267 1269 compresseddeltalen += l
1268 1270 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1269 1271
1270 1272 curr = len(self)
1271 1273 prev = curr - 1
1272 1274 base = chainbase = curr
1273 1275 chainlen = None
1274 1276 offset = self.end(prev)
1275 1277 d = None
1276 1278 if self._basecache is None:
1277 1279 self._basecache = (prev, self.chainbase(prev))
1278 1280 basecache = self._basecache
1279 1281 p1r, p2r = self.rev(p1), self.rev(p2)
1280 1282
1281 1283 # should we try to build a delta?
1282 1284 if prev != nullrev:
1283 1285 if self._generaldelta:
1284 1286 if p1r >= basecache[1]:
1285 1287 d = builddelta(p1r)
1286 1288 elif p2r >= basecache[1]:
1287 1289 d = builddelta(p2r)
1288 1290 else:
1289 1291 d = builddelta(prev)
1290 1292 else:
1291 1293 d = builddelta(prev)
1292 1294 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1293 1295
1294 1296 # full versions are inserted when the needed deltas
1295 1297 # become comparable to the uncompressed text
1296 1298 if text is None:
1297 1299 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1298 1300 cachedelta[1])
1299 1301 else:
1300 1302 textlen = len(text)
1301 1303
1302 1304 # - 'dist' is the distance from the base revision -- bounding it limits
1303 1305 # the amount of I/O we need to do.
1304 1306 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1305 1307 # to apply -- bounding it limits the amount of CPU we consume.
1306 1308 if (d is None or dist > textlen * 4 or l > textlen or
1307 1309 compresseddeltalen > textlen * 2 or
1308 1310 (self._maxchainlen and chainlen > self._maxchainlen)):
1309 1311 text = buildtext()
1310 1312 data = self.compress(text)
1311 1313 l = len(data[1]) + len(data[0])
1312 1314 base = chainbase = curr
1313 1315
1314 1316 e = (offset_type(offset, flags), l, textlen,
1315 1317 base, link, p1r, p2r, node)
1316 1318 self.index.insert(-1, e)
1317 1319 self.nodemap[node] = curr
1318 1320
1319 1321 entry = self._io.packentry(e, self.node, self.version, curr)
1320 1322 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1321 1323
1322 1324 if type(text) == str: # only accept immutable objects
1323 1325 self._cache = (node, curr, text)
1324 1326 self._basecache = (curr, chainbase)
1325 1327 return node
1326 1328
1327 1329 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1328 1330 curr = len(self) - 1
1329 1331 if not self._inline:
1330 1332 transaction.add(self.datafile, offset)
1331 1333 transaction.add(self.indexfile, curr * len(entry))
1332 1334 if data[0]:
1333 1335 dfh.write(data[0])
1334 1336 dfh.write(data[1])
1335 1337 dfh.flush()
1336 1338 ifh.write(entry)
1337 1339 else:
1338 1340 offset += curr * self._io.size
1339 1341 transaction.add(self.indexfile, offset, curr)
1340 1342 ifh.write(entry)
1341 1343 ifh.write(data[0])
1342 1344 ifh.write(data[1])
1343 1345 self.checkinlinesize(transaction, ifh)
1344 1346
1345 1347 def addgroup(self, bundle, linkmapper, transaction):
1346 1348 """
1347 1349 add a delta group
1348 1350
1349 1351 given a set of deltas, add them to the revision log. the
1350 1352 first delta is against its parent, which should be in our
1351 1353 log, the rest are against the previous delta.
1352 1354 """
1353 1355
1354 1356 # track the base of the current delta log
1355 1357 content = []
1356 1358 node = None
1357 1359
1358 1360 r = len(self)
1359 1361 end = 0
1360 1362 if r:
1361 1363 end = self.end(r - 1)
1362 1364 ifh = self.opener(self.indexfile, "a+")
1363 1365 isize = r * self._io.size
1364 1366 if self._inline:
1365 1367 transaction.add(self.indexfile, end + isize, r)
1366 1368 dfh = None
1367 1369 else:
1368 1370 transaction.add(self.indexfile, isize, r)
1369 1371 transaction.add(self.datafile, end)
1370 1372 dfh = self.opener(self.datafile, "a")
1371 1373
1372 1374 try:
1373 1375 # loop through our set of deltas
1374 1376 chain = None
1375 1377 while True:
1376 1378 chunkdata = bundle.deltachunk(chain)
1377 1379 if not chunkdata:
1378 1380 break
1379 1381 node = chunkdata['node']
1380 1382 p1 = chunkdata['p1']
1381 1383 p2 = chunkdata['p2']
1382 1384 cs = chunkdata['cs']
1383 1385 deltabase = chunkdata['deltabase']
1384 1386 delta = chunkdata['delta']
1385 1387
1386 1388 content.append(node)
1387 1389
1388 1390 link = linkmapper(cs)
1389 1391 if node in self.nodemap:
1390 1392 # this can happen if two branches make the same change
1391 1393 chain = node
1392 1394 continue
1393 1395
1394 1396 for p in (p1, p2):
1395 1397 if p not in self.nodemap:
1396 1398 raise LookupError(p, self.indexfile,
1397 1399 _('unknown parent'))
1398 1400
1399 1401 if deltabase not in self.nodemap:
1400 1402 raise LookupError(deltabase, self.indexfile,
1401 1403 _('unknown delta base'))
1402 1404
1403 1405 baserev = self.rev(deltabase)
1404 1406 chain = self._addrevision(node, None, transaction, link,
1405 1407 p1, p2, REVIDX_DEFAULT_FLAGS,
1406 1408 (baserev, delta), ifh, dfh)
1407 1409 if not dfh and not self._inline:
1408 1410 # addrevision switched from inline to conventional
1409 1411 # reopen the index
1410 1412 ifh.close()
1411 1413 dfh = self.opener(self.datafile, "a")
1412 1414 ifh = self.opener(self.indexfile, "a")
1413 1415 finally:
1414 1416 if dfh:
1415 1417 dfh.close()
1416 1418 ifh.close()
1417 1419
1418 1420 return content
1419 1421
1420 1422 def getstrippoint(self, minlink):
1421 1423 """find the minimum rev that must be stripped to strip the linkrev
1422 1424
1423 1425 Returns a tuple containing the minimum rev and a set of all revs that
1424 1426 have linkrevs that will be broken by this strip.
1425 1427 """
1426 1428 brokenrevs = set()
1427 1429 strippoint = len(self)
1428 1430
1429 1431 heads = {}
1430 1432 futurelargelinkrevs = set()
1431 1433 for head in self.headrevs():
1432 1434 headlinkrev = self.linkrev(head)
1433 1435 heads[head] = headlinkrev
1434 1436 if headlinkrev >= minlink:
1435 1437 futurelargelinkrevs.add(headlinkrev)
1436 1438
1437 1439 # This algorithm involves walking down the rev graph, starting at the
1438 1440 # heads. Since the revs are topologically sorted according to linkrev,
1439 1441 # once all head linkrevs are below the minlink, we know there are
1440 1442 # no more revs that could have a linkrev greater than minlink.
1441 1443 # So we can stop walking.
1442 1444 while futurelargelinkrevs:
1443 1445 strippoint -= 1
1444 1446 linkrev = heads.pop(strippoint)
1445 1447
1446 1448 if linkrev < minlink:
1447 1449 brokenrevs.add(strippoint)
1448 1450 else:
1449 1451 futurelargelinkrevs.remove(linkrev)
1450 1452
1451 1453 for p in self.parentrevs(strippoint):
1452 1454 if p != nullrev:
1453 1455 plinkrev = self.linkrev(p)
1454 1456 heads[p] = plinkrev
1455 1457 if plinkrev >= minlink:
1456 1458 futurelargelinkrevs.add(plinkrev)
1457 1459
1458 1460 return strippoint, brokenrevs
1459 1461
1460 1462 def strip(self, minlink, transaction):
1461 1463 """truncate the revlog on the first revision with a linkrev >= minlink
1462 1464
1463 1465 This function is called when we're stripping revision minlink and
1464 1466 its descendants from the repository.
1465 1467
1466 1468 We have to remove all revisions with linkrev >= minlink, because
1467 1469 the equivalent changelog revisions will be renumbered after the
1468 1470 strip.
1469 1471
1470 1472 So we truncate the revlog on the first of these revisions, and
1471 1473 trust that the caller has saved the revisions that shouldn't be
1472 1474 removed and that it'll re-add them after this truncation.
1473 1475 """
1474 1476 if len(self) == 0:
1475 1477 return
1476 1478
1477 1479 rev, _ = self.getstrippoint(minlink)
1478 1480 if rev == len(self):
1479 1481 return
1480 1482
1481 1483 # first truncate the files on disk
1482 1484 end = self.start(rev)
1483 1485 if not self._inline:
1484 1486 transaction.add(self.datafile, end)
1485 1487 end = rev * self._io.size
1486 1488 else:
1487 1489 end += rev * self._io.size
1488 1490
1489 1491 transaction.add(self.indexfile, end)
1490 1492
1491 1493 # then reset internal state in memory to forget those revisions
1492 1494 self._cache = None
1493 1495 self._chaininfocache = {}
1494 1496 self._chunkclear()
1495 1497 for x in xrange(rev, len(self)):
1496 1498 del self.nodemap[self.node(x)]
1497 1499
1498 1500 del self.index[rev:-1]
1499 1501
1500 1502 def checksize(self):
1501 1503 expected = 0
1502 1504 if len(self):
1503 1505 expected = max(0, self.end(len(self) - 1))
1504 1506
1505 1507 try:
1506 1508 f = self.opener(self.datafile)
1507 1509 f.seek(0, 2)
1508 1510 actual = f.tell()
1509 1511 f.close()
1510 1512 dd = actual - expected
1511 1513 except IOError, inst:
1512 1514 if inst.errno != errno.ENOENT:
1513 1515 raise
1514 1516 dd = 0
1515 1517
1516 1518 try:
1517 1519 f = self.opener(self.indexfile)
1518 1520 f.seek(0, 2)
1519 1521 actual = f.tell()
1520 1522 f.close()
1521 1523 s = self._io.size
1522 1524 i = max(0, actual // s)
1523 1525 di = actual - (i * s)
1524 1526 if self._inline:
1525 1527 databytes = 0
1526 1528 for r in self:
1527 1529 databytes += max(0, self.length(r))
1528 1530 dd = 0
1529 1531 di = actual - len(self) * s - databytes
1530 1532 except IOError, inst:
1531 1533 if inst.errno != errno.ENOENT:
1532 1534 raise
1533 1535 di = 0
1534 1536
1535 1537 return (dd, di)
1536 1538
1537 1539 def files(self):
1538 1540 res = [self.indexfile]
1539 1541 if not self._inline:
1540 1542 res.append(self.datafile)
1541 1543 return res
General Comments 0
You need to be logged in to leave comments. Login now