##// END OF EJS Templates
repoview: fix 0L with pack/unpack for 2.4
Matt Mackall -
r22282:4092d12b default
parent child Browse files
Show More
@@ -1,328 +1,329
1 1 # repoview.py - Filtered view of a localrepo object
2 2 #
3 3 # Copyright 2012 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
4 4 # Logilab SA <contact@logilab.fr>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 import copy
10 10 import error
11 11 import phases
12 12 import util
13 13 import obsolete
14 14 import struct
15 15 import tags as tagsmod
16 16 from mercurial.i18n import _
17 17
18 18 def hideablerevs(repo):
19 19 """Revisions candidates to be hidden
20 20
21 21 This is a standalone function to help extensions to wrap it."""
22 22 return obsolete.getrevs(repo, 'obsolete')
23 23
24 24 def _getstaticblockers(repo):
25 25 """Cacheable revisions blocking hidden changesets from being filtered.
26 26
27 27 Additional non-cached hidden blockers are computed in _getdynamicblockers.
28 28 This is a standalone function to help extensions to wrap it."""
29 29 assert not repo.changelog.filteredrevs
30 30 hideable = hideablerevs(repo)
31 31 blockers = set()
32 32 if hideable:
33 33 # We use cl to avoid recursive lookup from repo[xxx]
34 34 cl = repo.changelog
35 35 firsthideable = min(hideable)
36 36 revs = cl.revs(start=firsthideable)
37 37 tofilter = repo.revs(
38 38 '(%ld) and children(%ld)', list(revs), list(hideable))
39 39 blockers.update([r for r in tofilter if r not in hideable])
40 40 return blockers
41 41
42 42 def _getdynamicblockers(repo):
43 43 """Non-cacheable revisions blocking hidden changesets from being filtered.
44 44
45 45 Get revisions that will block hidden changesets and are likely to change,
46 46 but unlikely to create hidden blockers. They won't be cached, so be careful
47 47 with adding additional computation."""
48 48
49 49 cl = repo.changelog
50 50 blockers = set()
51 51 blockers.update([par.rev() for par in repo[None].parents()])
52 52 blockers.update([cl.rev(bm) for bm in repo._bookmarks.values()])
53 53
54 54 tags = {}
55 55 tagsmod.readlocaltags(repo.ui, repo, tags, {})
56 56 if tags:
57 57 rev, nodemap = cl.rev, cl.nodemap
58 58 blockers.update(rev(t[0]) for t in tags.values() if t[0] in nodemap)
59 59 return blockers
60 60
61 61 cacheversion = 1
62 62 cachefile = 'cache/hidden'
63 63
64 64 def cachehash(repo, hideable):
65 65 """return sha1 hash of repository data to identify a valid cache.
66 66
67 67 We calculate a sha1 of repo heads and the content of the obsstore and write
68 68 it to the cache. Upon reading we can easily validate by checking the hash
69 69 against the stored one and discard the cache in case the hashes don't match.
70 70 """
71 h = util.sha1(''.join(repo.heads()))
71 h = util.sha1()
72 h.update(''.join(repo.heads()))
72 73 h.update(str(hash(frozenset(hideable))))
73 74 return h.digest()
74 75
75 76 def trywritehiddencache(repo, hideable, hidden):
76 77 """write cache of hidden changesets to disk
77 78
78 79 Will not write the cache if a wlock cannot be obtained lazily.
79 80 The cache consists of a head of 22byte:
80 81 2 byte version number of the cache
81 82 20 byte sha1 to validate the cache
82 83 n*4 byte hidden revs
83 84 """
84 85 wlock = fh = None
85 86 try:
86 87 try:
87 88 wlock = repo.wlock(wait=False)
88 89 # write cache to file
89 90 newhash = cachehash(repo, hideable)
90 91 sortedset = sorted(hidden)
91 data = struct.pack('>%iI' % len(sortedset), *sortedset)
92 data = struct.pack('>%ii' % len(sortedset), *sortedset)
92 93 fh = repo.vfs.open(cachefile, 'w+b', atomictemp=True)
93 94 fh.write(struct.pack(">H", cacheversion))
94 95 fh.write(newhash)
95 96 fh.write(data)
96 97 except (IOError, OSError):
97 98 repo.ui.debug('error writing hidden changesets cache')
98 99 except error.LockHeld:
99 100 repo.ui.debug('cannot obtain lock to write hidden changesets cache')
100 101 finally:
101 102 if fh:
102 103 fh.close()
103 104 if wlock:
104 105 wlock.release()
105 106
106 107 def tryreadcache(repo, hideable):
107 108 """read a cache if the cache exists and is valid, otherwise returns None."""
108 109 hidden = fh = None
109 110 try:
110 111 if repo.vfs.exists(cachefile):
111 112 fh = repo.vfs.open(cachefile, 'rb')
112 113 version, = struct.unpack(">H", fh.read(2))
113 114 oldhash = fh.read(20)
114 115 newhash = cachehash(repo, hideable)
115 116 if (cacheversion, oldhash) == (version, newhash):
116 117 # cache is valid, so we can start reading the hidden revs
117 118 data = fh.read()
118 119 count = len(data) / 4
119 hidden = frozenset(struct.unpack('>%iI' % count, data))
120 hidden = frozenset(struct.unpack('>%ii' % count, data))
120 121 return hidden
121 122 finally:
122 123 if fh:
123 124 fh.close()
124 125
125 126 def computehidden(repo):
126 127 """compute the set of hidden revision to filter
127 128
128 129 During most operation hidden should be filtered."""
129 130 assert not repo.changelog.filteredrevs
130 131
131 132 hidden = frozenset()
132 133 hideable = hideablerevs(repo)
133 134 if hideable:
134 135 cl = repo.changelog
135 136 hidden = tryreadcache(repo, hideable)
136 137 if hidden is None:
137 138 blocked = cl.ancestors(_getstaticblockers(repo), inclusive=True)
138 139 hidden = frozenset(r for r in hideable if r not in blocked)
139 140 trywritehiddencache(repo, hideable, hidden)
140 141 elif repo.ui.configbool('experimental', 'verifyhiddencache', True):
141 142 blocked = cl.ancestors(_getstaticblockers(repo), inclusive=True)
142 143 computed = frozenset(r for r in hideable if r not in blocked)
143 144 if computed != hidden:
144 145 trywritehiddencache(repo, hideable, computed)
145 146 repo.ui.warn(_('Cache inconsistency detected. Please ' +
146 147 'open an issue on http://bz.selenic.com.\n'))
147 148 hidden = computed
148 149
149 150 # check if we have wd parents, bookmarks or tags pointing to hidden
150 151 # changesets and remove those.
151 152 dynamic = hidden & _getdynamicblockers(repo)
152 153 if dynamic:
153 154 blocked = cl.ancestors(dynamic, inclusive=True)
154 155 hidden = frozenset(r for r in hidden if r not in blocked)
155 156 return hidden
156 157
157 158 def computeunserved(repo):
158 159 """compute the set of revision that should be filtered when used a server
159 160
160 161 Secret and hidden changeset should not pretend to be here."""
161 162 assert not repo.changelog.filteredrevs
162 163 # fast path in simple case to avoid impact of non optimised code
163 164 hiddens = filterrevs(repo, 'visible')
164 165 if phases.hassecret(repo):
165 166 cl = repo.changelog
166 167 secret = phases.secret
167 168 getphase = repo._phasecache.phase
168 169 first = min(cl.rev(n) for n in repo._phasecache.phaseroots[secret])
169 170 revs = cl.revs(start=first)
170 171 secrets = set(r for r in revs if getphase(repo, r) >= secret)
171 172 return frozenset(hiddens | secrets)
172 173 else:
173 174 return hiddens
174 175
175 176 def computemutable(repo):
176 177 """compute the set of revision that should be filtered when used a server
177 178
178 179 Secret and hidden changeset should not pretend to be here."""
179 180 assert not repo.changelog.filteredrevs
180 181 # fast check to avoid revset call on huge repo
181 182 if util.any(repo._phasecache.phaseroots[1:]):
182 183 getphase = repo._phasecache.phase
183 184 maymutable = filterrevs(repo, 'base')
184 185 return frozenset(r for r in maymutable if getphase(repo, r))
185 186 return frozenset()
186 187
187 188 def computeimpactable(repo):
188 189 """Everything impactable by mutable revision
189 190
190 191 The immutable filter still have some chance to get invalidated. This will
191 192 happen when:
192 193
193 194 - you garbage collect hidden changeset,
194 195 - public phase is moved backward,
195 196 - something is changed in the filtering (this could be fixed)
196 197
197 198 This filter out any mutable changeset and any public changeset that may be
198 199 impacted by something happening to a mutable revision.
199 200
200 201 This is achieved by filtered everything with a revision number egal or
201 202 higher than the first mutable changeset is filtered."""
202 203 assert not repo.changelog.filteredrevs
203 204 cl = repo.changelog
204 205 firstmutable = len(cl)
205 206 for roots in repo._phasecache.phaseroots[1:]:
206 207 if roots:
207 208 firstmutable = min(firstmutable, min(cl.rev(r) for r in roots))
208 209 # protect from nullrev root
209 210 firstmutable = max(0, firstmutable)
210 211 return frozenset(xrange(firstmutable, len(cl)))
211 212
212 213 # function to compute filtered set
213 214 #
214 215 # When adding a new filter you MUST update the table at:
215 216 # mercurial.branchmap.subsettable
216 217 # Otherwise your filter will have to recompute all its branches cache
217 218 # from scratch (very slow).
218 219 filtertable = {'visible': computehidden,
219 220 'served': computeunserved,
220 221 'immutable': computemutable,
221 222 'base': computeimpactable}
222 223
223 224 def filterrevs(repo, filtername):
224 225 """returns set of filtered revision for this filter name"""
225 226 if filtername not in repo.filteredrevcache:
226 227 func = filtertable[filtername]
227 228 repo.filteredrevcache[filtername] = func(repo.unfiltered())
228 229 return repo.filteredrevcache[filtername]
229 230
230 231 class repoview(object):
231 232 """Provide a read/write view of a repo through a filtered changelog
232 233
233 234 This object is used to access a filtered version of a repository without
234 235 altering the original repository object itself. We can not alter the
235 236 original object for two main reasons:
236 237 - It prevents the use of a repo with multiple filters at the same time. In
237 238 particular when multiple threads are involved.
238 239 - It makes scope of the filtering harder to control.
239 240
240 241 This object behaves very closely to the original repository. All attribute
241 242 operations are done on the original repository:
242 243 - An access to `repoview.someattr` actually returns `repo.someattr`,
243 244 - A write to `repoview.someattr` actually sets value of `repo.someattr`,
244 245 - A deletion of `repoview.someattr` actually drops `someattr`
245 246 from `repo.__dict__`.
246 247
247 248 The only exception is the `changelog` property. It is overridden to return
248 249 a (surface) copy of `repo.changelog` with some revisions filtered. The
249 250 `filtername` attribute of the view control the revisions that need to be
250 251 filtered. (the fact the changelog is copied is an implementation detail).
251 252
252 253 Unlike attributes, this object intercepts all method calls. This means that
253 254 all methods are run on the `repoview` object with the filtered `changelog`
254 255 property. For this purpose the simple `repoview` class must be mixed with
255 256 the actual class of the repository. This ensures that the resulting
256 257 `repoview` object have the very same methods than the repo object. This
257 258 leads to the property below.
258 259
259 260 repoview.method() --> repo.__class__.method(repoview)
260 261
261 262 The inheritance has to be done dynamically because `repo` can be of any
262 263 subclasses of `localrepo`. Eg: `bundlerepo` or `statichttprepo`.
263 264 """
264 265
265 266 def __init__(self, repo, filtername):
266 267 object.__setattr__(self, '_unfilteredrepo', repo)
267 268 object.__setattr__(self, 'filtername', filtername)
268 269 object.__setattr__(self, '_clcachekey', None)
269 270 object.__setattr__(self, '_clcache', None)
270 271
271 272 # not a propertycache on purpose we shall implement a proper cache later
272 273 @property
273 274 def changelog(self):
274 275 """return a filtered version of the changeset
275 276
276 277 this changelog must not be used for writing"""
277 278 # some cache may be implemented later
278 279 unfi = self._unfilteredrepo
279 280 unfichangelog = unfi.changelog
280 281 revs = filterrevs(unfi, self.filtername)
281 282 cl = self._clcache
282 283 newkey = (len(unfichangelog), unfichangelog.tip(), hash(revs))
283 284 if cl is not None:
284 285 # we need to check curkey too for some obscure reason.
285 286 # MQ test show a corruption of the underlying repo (in _clcache)
286 287 # without change in the cachekey.
287 288 oldfilter = cl.filteredrevs
288 289 try:
289 290 cl.filterrevs = () # disable filtering for tip
290 291 curkey = (len(cl), cl.tip(), hash(oldfilter))
291 292 finally:
292 293 cl.filteredrevs = oldfilter
293 294 if newkey != self._clcachekey or newkey != curkey:
294 295 cl = None
295 296 # could have been made None by the previous if
296 297 if cl is None:
297 298 cl = copy.copy(unfichangelog)
298 299 cl.filteredrevs = revs
299 300 object.__setattr__(self, '_clcache', cl)
300 301 object.__setattr__(self, '_clcachekey', newkey)
301 302 return cl
302 303
303 304 def unfiltered(self):
304 305 """Return an unfiltered version of a repo"""
305 306 return self._unfilteredrepo
306 307
307 308 def filtered(self, name):
308 309 """Return a filtered version of a repository"""
309 310 if name == self.filtername:
310 311 return self
311 312 return self.unfiltered().filtered(name)
312 313
313 314 # everything access are forwarded to the proxied repo
314 315 def __getattr__(self, attr):
315 316 return getattr(self._unfilteredrepo, attr)
316 317
317 318 def __setattr__(self, attr, value):
318 319 return setattr(self._unfilteredrepo, attr, value)
319 320
320 321 def __delattr__(self, attr):
321 322 return delattr(self._unfilteredrepo, attr)
322 323
323 324 # The `requirements` attribute is initialized during __init__. But
324 325 # __getattr__ won't be called as it also exists on the class. We need
325 326 # explicit forwarding to main repo here
326 327 @property
327 328 def requirements(self):
328 329 return self._unfilteredrepo.requirements
@@ -1,1448 +1,1450
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, templatefilters
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def decompress(bin):
79 79 """ decompress the given input """
80 80 if not bin:
81 81 return bin
82 82 t = bin[0]
83 83 if t == '\0':
84 84 return bin
85 85 if t == 'x':
86 86 try:
87 87 return _decompress(bin)
88 88 except zlib.error, e:
89 89 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 90 if t == 'u':
91 91 return bin[1:]
92 92 raise RevlogError(_("unknown compression type %r") % t)
93 93
94 94 # index v0:
95 95 # 4 bytes: offset
96 96 # 4 bytes: compressed length
97 97 # 4 bytes: base rev
98 98 # 4 bytes: link rev
99 99 # 32 bytes: parent 1 nodeid
100 100 # 32 bytes: parent 2 nodeid
101 101 # 32 bytes: nodeid
102 102 indexformatv0 = ">4l20s20s20s"
103 103 v0shaoffset = 56
104 104
105 105 class revlogoldio(object):
106 106 def __init__(self):
107 107 self.size = struct.calcsize(indexformatv0)
108 108
109 109 def parseindex(self, data, inline):
110 110 s = self.size
111 111 index = []
112 112 nodemap = {nullid: nullrev}
113 113 n = off = 0
114 114 l = len(data)
115 115 while off + s <= l:
116 116 cur = data[off:off + s]
117 117 off += s
118 118 e = _unpack(indexformatv0, cur)
119 119 # transform to revlogv1 format
120 120 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
121 121 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
122 122 index.append(e2)
123 123 nodemap[e[6]] = n
124 124 n += 1
125 125
126 126 # add the magic null revision at -1
127 127 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
128 128
129 129 return index, nodemap, None
130 130
131 131 def packentry(self, entry, node, version, rev):
132 132 if gettype(entry[0]):
133 133 raise RevlogError(_("index entry flags need RevlogNG"))
134 134 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
135 135 node(entry[5]), node(entry[6]), entry[7])
136 136 return _pack(indexformatv0, *e2)
137 137
138 138 # index ng:
139 139 # 6 bytes: offset
140 140 # 2 bytes: flags
141 141 # 4 bytes: compressed length
142 142 # 4 bytes: uncompressed length
143 143 # 4 bytes: base rev
144 144 # 4 bytes: link rev
145 145 # 4 bytes: parent 1 rev
146 146 # 4 bytes: parent 2 rev
147 147 # 32 bytes: nodeid
148 148 indexformatng = ">Qiiiiii20s12x"
149 149 ngshaoffset = 32
150 150 versionformat = ">I"
151 151
152 152 class revlogio(object):
153 153 def __init__(self):
154 154 self.size = struct.calcsize(indexformatng)
155 155
156 156 def parseindex(self, data, inline):
157 157 # call the C implementation to parse the index data
158 158 index, cache = parsers.parse_index2(data, inline)
159 159 return index, getattr(index, 'nodemap', None), cache
160 160
161 161 def packentry(self, entry, node, version, rev):
162 162 p = _pack(indexformatng, *entry)
163 163 if rev == 0:
164 164 p = _pack(versionformat, version) + p[4:]
165 165 return p
166 166
167 167 class revlog(object):
168 168 """
169 169 the underlying revision storage object
170 170
171 171 A revlog consists of two parts, an index and the revision data.
172 172
173 173 The index is a file with a fixed record size containing
174 174 information on each revision, including its nodeid (hash), the
175 175 nodeids of its parents, the position and offset of its data within
176 176 the data file, and the revision it's based on. Finally, each entry
177 177 contains a linkrev entry that can serve as a pointer to external
178 178 data.
179 179
180 180 The revision data itself is a linear collection of data chunks.
181 181 Each chunk represents a revision and is usually represented as a
182 182 delta against the previous chunk. To bound lookup time, runs of
183 183 deltas are limited to about 2 times the length of the original
184 184 version data. This makes retrieval of a version proportional to
185 185 its size, or O(1) relative to the number of revisions.
186 186
187 187 Both pieces of the revlog are written to in an append-only
188 188 fashion, which means we never need to rewrite a file to insert or
189 189 remove data, and can use some simple techniques to avoid the need
190 190 for locking while reading.
191 191 """
192 192 def __init__(self, opener, indexfile):
193 193 """
194 194 create a revlog object
195 195
196 196 opener is a function that abstracts the file opening operation
197 197 and can be used to implement COW semantics or the like.
198 198 """
199 199 self.indexfile = indexfile
200 200 self.datafile = indexfile[:-2] + ".d"
201 201 self.opener = opener
202 202 self._cache = None
203 203 self._basecache = None
204 204 self._chunkcache = (0, '')
205 205 self._chunkcachesize = 65536
206 206 self.index = []
207 207 self._pcache = {}
208 208 self._nodecache = {nullid: nullrev}
209 209 self._nodepos = None
210 210
211 211 v = REVLOG_DEFAULT_VERSION
212 212 opts = getattr(opener, 'options', None)
213 213 if opts is not None:
214 214 if 'revlogv1' in opts:
215 215 if 'generaldelta' in opts:
216 216 v |= REVLOGGENERALDELTA
217 217 else:
218 218 v = 0
219 219 if 'chunkcachesize' in opts:
220 220 self._chunkcachesize = opts['chunkcachesize']
221 221
222 222 if self._chunkcachesize <= 0:
223 223 raise RevlogError(_('revlog chunk cache size %r is not greater '
224 224 'than 0') % self._chunkcachesize)
225 225 elif self._chunkcachesize & (self._chunkcachesize - 1):
226 226 raise RevlogError(_('revlog chunk cache size %r is not a power '
227 227 'of 2') % self._chunkcachesize)
228 228
229 229 i = ''
230 230 self._initempty = True
231 231 try:
232 232 f = self.opener(self.indexfile)
233 233 i = f.read()
234 234 f.close()
235 235 if len(i) > 0:
236 236 v = struct.unpack(versionformat, i[:4])[0]
237 237 self._initempty = False
238 238 except IOError, inst:
239 239 if inst.errno != errno.ENOENT:
240 240 raise
241 241
242 242 self.version = v
243 243 self._inline = v & REVLOGNGINLINEDATA
244 244 self._generaldelta = v & REVLOGGENERALDELTA
245 245 flags = v & ~0xFFFF
246 246 fmt = v & 0xFFFF
247 247 if fmt == REVLOGV0 and flags:
248 248 raise RevlogError(_("index %s unknown flags %#04x for format v0")
249 249 % (self.indexfile, flags >> 16))
250 250 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
251 251 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
252 252 % (self.indexfile, flags >> 16))
253 253 elif fmt > REVLOGNG:
254 254 raise RevlogError(_("index %s unknown format %d")
255 255 % (self.indexfile, fmt))
256 256
257 257 self._io = revlogio()
258 258 if self.version == REVLOGV0:
259 259 self._io = revlogoldio()
260 260 try:
261 261 d = self._io.parseindex(i, self._inline)
262 262 except (ValueError, IndexError):
263 263 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
264 264 self.index, nodemap, self._chunkcache = d
265 265 if nodemap is not None:
266 266 self.nodemap = self._nodecache = nodemap
267 267 if not self._chunkcache:
268 268 self._chunkclear()
269 269
270 270 def tip(self):
271 271 return self.node(len(self.index) - 2)
272 272 def __len__(self):
273 273 return len(self.index) - 1
274 274 def __iter__(self):
275 275 return iter(xrange(len(self)))
276 276 def revs(self, start=0, stop=None):
277 277 """iterate over all rev in this revlog (from start to stop)"""
278 278 step = 1
279 279 if stop is not None:
280 280 if start > stop:
281 281 step = -1
282 282 stop += step
283 283 else:
284 284 stop = len(self)
285 285 return xrange(start, stop, step)
286 286
287 287 @util.propertycache
288 288 def nodemap(self):
289 289 self.rev(self.node(0))
290 290 return self._nodecache
291 291
292 292 def hasnode(self, node):
293 293 try:
294 294 self.rev(node)
295 295 return True
296 296 except KeyError:
297 297 return False
298 298
299 299 def clearcaches(self):
300 300 try:
301 301 self._nodecache.clearcaches()
302 302 except AttributeError:
303 303 self._nodecache = {nullid: nullrev}
304 304 self._nodepos = None
305 305
306 306 def rev(self, node):
307 307 try:
308 308 return self._nodecache[node]
309 except TypeError:
310 raise
309 311 except RevlogError:
310 312 # parsers.c radix tree lookup failed
311 313 raise LookupError(node, self.indexfile, _('no node'))
312 314 except KeyError:
313 315 # pure python cache lookup failed
314 316 n = self._nodecache
315 317 i = self.index
316 318 p = self._nodepos
317 319 if p is None:
318 320 p = len(i) - 2
319 321 for r in xrange(p, -1, -1):
320 322 v = i[r][7]
321 323 n[v] = r
322 324 if v == node:
323 325 self._nodepos = r - 1
324 326 return r
325 327 raise LookupError(node, self.indexfile, _('no node'))
326 328
327 329 def node(self, rev):
328 330 return self.index[rev][7]
329 331 def linkrev(self, rev):
330 332 return self.index[rev][4]
331 333 def parents(self, node):
332 334 i = self.index
333 335 d = i[self.rev(node)]
334 336 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
335 337 def parentrevs(self, rev):
336 338 return self.index[rev][5:7]
337 339 def start(self, rev):
338 340 return int(self.index[rev][0] >> 16)
339 341 def end(self, rev):
340 342 return self.start(rev) + self.length(rev)
341 343 def length(self, rev):
342 344 return self.index[rev][1]
343 345 def chainbase(self, rev):
344 346 index = self.index
345 347 base = index[rev][3]
346 348 while base != rev:
347 349 rev = base
348 350 base = index[rev][3]
349 351 return base
350 352 def flags(self, rev):
351 353 return self.index[rev][0] & 0xFFFF
352 354 def rawsize(self, rev):
353 355 """return the length of the uncompressed text for a given revision"""
354 356 l = self.index[rev][2]
355 357 if l >= 0:
356 358 return l
357 359
358 360 t = self.revision(self.node(rev))
359 361 return len(t)
360 362 size = rawsize
361 363
362 364 def ancestors(self, revs, stoprev=0, inclusive=False):
363 365 """Generate the ancestors of 'revs' in reverse topological order.
364 366 Does not generate revs lower than stoprev.
365 367
366 368 See the documentation for ancestor.lazyancestors for more details."""
367 369
368 370 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
369 371 inclusive=inclusive)
370 372
371 373 def descendants(self, revs):
372 374 """Generate the descendants of 'revs' in revision order.
373 375
374 376 Yield a sequence of revision numbers starting with a child of
375 377 some rev in revs, i.e., each revision is *not* considered a
376 378 descendant of itself. Results are ordered by revision number (a
377 379 topological sort)."""
378 380 first = min(revs)
379 381 if first == nullrev:
380 382 for i in self:
381 383 yield i
382 384 return
383 385
384 386 seen = set(revs)
385 387 for i in self.revs(start=first + 1):
386 388 for x in self.parentrevs(i):
387 389 if x != nullrev and x in seen:
388 390 seen.add(i)
389 391 yield i
390 392 break
391 393
392 394 def findcommonmissing(self, common=None, heads=None):
393 395 """Return a tuple of the ancestors of common and the ancestors of heads
394 396 that are not ancestors of common. In revset terminology, we return the
395 397 tuple:
396 398
397 399 ::common, (::heads) - (::common)
398 400
399 401 The list is sorted by revision number, meaning it is
400 402 topologically sorted.
401 403
402 404 'heads' and 'common' are both lists of node IDs. If heads is
403 405 not supplied, uses all of the revlog's heads. If common is not
404 406 supplied, uses nullid."""
405 407 if common is None:
406 408 common = [nullid]
407 409 if heads is None:
408 410 heads = self.heads()
409 411
410 412 common = [self.rev(n) for n in common]
411 413 heads = [self.rev(n) for n in heads]
412 414
413 415 # we want the ancestors, but inclusive
414 416 class lazyset(object):
415 417 def __init__(self, lazyvalues):
416 418 self.addedvalues = set()
417 419 self.lazyvalues = lazyvalues
418 420
419 421 def __contains__(self, value):
420 422 return value in self.addedvalues or value in self.lazyvalues
421 423
422 424 def __iter__(self):
423 425 added = self.addedvalues
424 426 for r in added:
425 427 yield r
426 428 for r in self.lazyvalues:
427 429 if not r in added:
428 430 yield r
429 431
430 432 def add(self, value):
431 433 self.addedvalues.add(value)
432 434
433 435 def update(self, values):
434 436 self.addedvalues.update(values)
435 437
436 438 has = lazyset(self.ancestors(common))
437 439 has.add(nullrev)
438 440 has.update(common)
439 441
440 442 # take all ancestors from heads that aren't in has
441 443 missing = set()
442 444 visit = util.deque(r for r in heads if r not in has)
443 445 while visit:
444 446 r = visit.popleft()
445 447 if r in missing:
446 448 continue
447 449 else:
448 450 missing.add(r)
449 451 for p in self.parentrevs(r):
450 452 if p not in has:
451 453 visit.append(p)
452 454 missing = list(missing)
453 455 missing.sort()
454 456 return has, [self.node(r) for r in missing]
455 457
456 458 def findmissingrevs(self, common=None, heads=None):
457 459 """Return the revision numbers of the ancestors of heads that
458 460 are not ancestors of common.
459 461
460 462 More specifically, return a list of revision numbers corresponding to
461 463 nodes N such that every N satisfies the following constraints:
462 464
463 465 1. N is an ancestor of some node in 'heads'
464 466 2. N is not an ancestor of any node in 'common'
465 467
466 468 The list is sorted by revision number, meaning it is
467 469 topologically sorted.
468 470
469 471 'heads' and 'common' are both lists of revision numbers. If heads is
470 472 not supplied, uses all of the revlog's heads. If common is not
471 473 supplied, uses nullid."""
472 474 if common is None:
473 475 common = [nullrev]
474 476 if heads is None:
475 477 heads = self.headrevs()
476 478
477 479 return ancestor.missingancestors(heads, common, self.parentrevs)
478 480
479 481 def findmissing(self, common=None, heads=None):
480 482 """Return the ancestors of heads that are not ancestors of common.
481 483
482 484 More specifically, return a list of nodes N such that every N
483 485 satisfies the following constraints:
484 486
485 487 1. N is an ancestor of some node in 'heads'
486 488 2. N is not an ancestor of any node in 'common'
487 489
488 490 The list is sorted by revision number, meaning it is
489 491 topologically sorted.
490 492
491 493 'heads' and 'common' are both lists of node IDs. If heads is
492 494 not supplied, uses all of the revlog's heads. If common is not
493 495 supplied, uses nullid."""
494 496 if common is None:
495 497 common = [nullid]
496 498 if heads is None:
497 499 heads = self.heads()
498 500
499 501 common = [self.rev(n) for n in common]
500 502 heads = [self.rev(n) for n in heads]
501 503
502 504 return [self.node(r) for r in
503 505 ancestor.missingancestors(heads, common, self.parentrevs)]
504 506
505 507 def nodesbetween(self, roots=None, heads=None):
506 508 """Return a topological path from 'roots' to 'heads'.
507 509
508 510 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
509 511 topologically sorted list of all nodes N that satisfy both of
510 512 these constraints:
511 513
512 514 1. N is a descendant of some node in 'roots'
513 515 2. N is an ancestor of some node in 'heads'
514 516
515 517 Every node is considered to be both a descendant and an ancestor
516 518 of itself, so every reachable node in 'roots' and 'heads' will be
517 519 included in 'nodes'.
518 520
519 521 'outroots' is the list of reachable nodes in 'roots', i.e., the
520 522 subset of 'roots' that is returned in 'nodes'. Likewise,
521 523 'outheads' is the subset of 'heads' that is also in 'nodes'.
522 524
523 525 'roots' and 'heads' are both lists of node IDs. If 'roots' is
524 526 unspecified, uses nullid as the only root. If 'heads' is
525 527 unspecified, uses list of all of the revlog's heads."""
526 528 nonodes = ([], [], [])
527 529 if roots is not None:
528 530 roots = list(roots)
529 531 if not roots:
530 532 return nonodes
531 533 lowestrev = min([self.rev(n) for n in roots])
532 534 else:
533 535 roots = [nullid] # Everybody's a descendant of nullid
534 536 lowestrev = nullrev
535 537 if (lowestrev == nullrev) and (heads is None):
536 538 # We want _all_ the nodes!
537 539 return ([self.node(r) for r in self], [nullid], list(self.heads()))
538 540 if heads is None:
539 541 # All nodes are ancestors, so the latest ancestor is the last
540 542 # node.
541 543 highestrev = len(self) - 1
542 544 # Set ancestors to None to signal that every node is an ancestor.
543 545 ancestors = None
544 546 # Set heads to an empty dictionary for later discovery of heads
545 547 heads = {}
546 548 else:
547 549 heads = list(heads)
548 550 if not heads:
549 551 return nonodes
550 552 ancestors = set()
551 553 # Turn heads into a dictionary so we can remove 'fake' heads.
552 554 # Also, later we will be using it to filter out the heads we can't
553 555 # find from roots.
554 556 heads = dict.fromkeys(heads, False)
555 557 # Start at the top and keep marking parents until we're done.
556 558 nodestotag = set(heads)
557 559 # Remember where the top was so we can use it as a limit later.
558 560 highestrev = max([self.rev(n) for n in nodestotag])
559 561 while nodestotag:
560 562 # grab a node to tag
561 563 n = nodestotag.pop()
562 564 # Never tag nullid
563 565 if n == nullid:
564 566 continue
565 567 # A node's revision number represents its place in a
566 568 # topologically sorted list of nodes.
567 569 r = self.rev(n)
568 570 if r >= lowestrev:
569 571 if n not in ancestors:
570 572 # If we are possibly a descendant of one of the roots
571 573 # and we haven't already been marked as an ancestor
572 574 ancestors.add(n) # Mark as ancestor
573 575 # Add non-nullid parents to list of nodes to tag.
574 576 nodestotag.update([p for p in self.parents(n) if
575 577 p != nullid])
576 578 elif n in heads: # We've seen it before, is it a fake head?
577 579 # So it is, real heads should not be the ancestors of
578 580 # any other heads.
579 581 heads.pop(n)
580 582 if not ancestors:
581 583 return nonodes
582 584 # Now that we have our set of ancestors, we want to remove any
583 585 # roots that are not ancestors.
584 586
585 587 # If one of the roots was nullid, everything is included anyway.
586 588 if lowestrev > nullrev:
587 589 # But, since we weren't, let's recompute the lowest rev to not
588 590 # include roots that aren't ancestors.
589 591
590 592 # Filter out roots that aren't ancestors of heads
591 593 roots = [n for n in roots if n in ancestors]
592 594 # Recompute the lowest revision
593 595 if roots:
594 596 lowestrev = min([self.rev(n) for n in roots])
595 597 else:
596 598 # No more roots? Return empty list
597 599 return nonodes
598 600 else:
599 601 # We are descending from nullid, and don't need to care about
600 602 # any other roots.
601 603 lowestrev = nullrev
602 604 roots = [nullid]
603 605 # Transform our roots list into a set.
604 606 descendants = set(roots)
605 607 # Also, keep the original roots so we can filter out roots that aren't
606 608 # 'real' roots (i.e. are descended from other roots).
607 609 roots = descendants.copy()
608 610 # Our topologically sorted list of output nodes.
609 611 orderedout = []
610 612 # Don't start at nullid since we don't want nullid in our output list,
611 613 # and if nullid shows up in descendants, empty parents will look like
612 614 # they're descendants.
613 615 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
614 616 n = self.node(r)
615 617 isdescendant = False
616 618 if lowestrev == nullrev: # Everybody is a descendant of nullid
617 619 isdescendant = True
618 620 elif n in descendants:
619 621 # n is already a descendant
620 622 isdescendant = True
621 623 # This check only needs to be done here because all the roots
622 624 # will start being marked is descendants before the loop.
623 625 if n in roots:
624 626 # If n was a root, check if it's a 'real' root.
625 627 p = tuple(self.parents(n))
626 628 # If any of its parents are descendants, it's not a root.
627 629 if (p[0] in descendants) or (p[1] in descendants):
628 630 roots.remove(n)
629 631 else:
630 632 p = tuple(self.parents(n))
631 633 # A node is a descendant if either of its parents are
632 634 # descendants. (We seeded the dependents list with the roots
633 635 # up there, remember?)
634 636 if (p[0] in descendants) or (p[1] in descendants):
635 637 descendants.add(n)
636 638 isdescendant = True
637 639 if isdescendant and ((ancestors is None) or (n in ancestors)):
638 640 # Only include nodes that are both descendants and ancestors.
639 641 orderedout.append(n)
640 642 if (ancestors is not None) and (n in heads):
641 643 # We're trying to figure out which heads are reachable
642 644 # from roots.
643 645 # Mark this head as having been reached
644 646 heads[n] = True
645 647 elif ancestors is None:
646 648 # Otherwise, we're trying to discover the heads.
647 649 # Assume this is a head because if it isn't, the next step
648 650 # will eventually remove it.
649 651 heads[n] = True
650 652 # But, obviously its parents aren't.
651 653 for p in self.parents(n):
652 654 heads.pop(p, None)
653 655 heads = [n for n, flag in heads.iteritems() if flag]
654 656 roots = list(roots)
655 657 assert orderedout
656 658 assert roots
657 659 assert heads
658 660 return (orderedout, roots, heads)
659 661
660 662 def headrevs(self):
661 663 try:
662 664 return self.index.headrevs()
663 665 except AttributeError:
664 666 return self._headrevs()
665 667
666 668 def _headrevs(self):
667 669 count = len(self)
668 670 if not count:
669 671 return [nullrev]
670 672 # we won't iter over filtered rev so nobody is a head at start
671 673 ishead = [0] * (count + 1)
672 674 index = self.index
673 675 for r in self:
674 676 ishead[r] = 1 # I may be an head
675 677 e = index[r]
676 678 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
677 679 return [r for r, val in enumerate(ishead) if val]
678 680
679 681 def heads(self, start=None, stop=None):
680 682 """return the list of all nodes that have no children
681 683
682 684 if start is specified, only heads that are descendants of
683 685 start will be returned
684 686 if stop is specified, it will consider all the revs from stop
685 687 as if they had no children
686 688 """
687 689 if start is None and stop is None:
688 690 if not len(self):
689 691 return [nullid]
690 692 return [self.node(r) for r in self.headrevs()]
691 693
692 694 if start is None:
693 695 start = nullid
694 696 if stop is None:
695 697 stop = []
696 698 stoprevs = set([self.rev(n) for n in stop])
697 699 startrev = self.rev(start)
698 700 reachable = set((startrev,))
699 701 heads = set((startrev,))
700 702
701 703 parentrevs = self.parentrevs
702 704 for r in self.revs(start=startrev + 1):
703 705 for p in parentrevs(r):
704 706 if p in reachable:
705 707 if r not in stoprevs:
706 708 reachable.add(r)
707 709 heads.add(r)
708 710 if p in heads and p not in stoprevs:
709 711 heads.remove(p)
710 712
711 713 return [self.node(r) for r in heads]
712 714
713 715 def children(self, node):
714 716 """find the children of a given node"""
715 717 c = []
716 718 p = self.rev(node)
717 719 for r in self.revs(start=p + 1):
718 720 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
719 721 if prevs:
720 722 for pr in prevs:
721 723 if pr == p:
722 724 c.append(self.node(r))
723 725 elif p == nullrev:
724 726 c.append(self.node(r))
725 727 return c
726 728
727 729 def descendant(self, start, end):
728 730 if start == nullrev:
729 731 return True
730 732 for i in self.descendants([start]):
731 733 if i == end:
732 734 return True
733 735 elif i > end:
734 736 break
735 737 return False
736 738
737 739 def commonancestorsheads(self, a, b):
738 740 """calculate all the heads of the common ancestors of nodes a and b"""
739 741 a, b = self.rev(a), self.rev(b)
740 742 try:
741 743 ancs = self.index.commonancestorsheads(a, b)
742 744 except (AttributeError, OverflowError): # C implementation failed
743 745 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
744 746 return map(self.node, ancs)
745 747
746 748 def ancestor(self, a, b):
747 749 """calculate the least common ancestor of nodes a and b"""
748 750
749 751 a, b = self.rev(a), self.rev(b)
750 752 try:
751 753 ancs = self.index.ancestors(a, b)
752 754 except (AttributeError, OverflowError):
753 755 ancs = ancestor.ancestors(self.parentrevs, a, b)
754 756 if ancs:
755 757 # choose a consistent winner when there's a tie
756 758 return min(map(self.node, ancs))
757 759 return nullid
758 760
759 761 def _match(self, id):
760 762 if isinstance(id, int):
761 763 # rev
762 764 return self.node(id)
763 765 if len(id) == 20:
764 766 # possibly a binary node
765 767 # odds of a binary node being all hex in ASCII are 1 in 10**25
766 768 try:
767 769 node = id
768 770 self.rev(node) # quick search the index
769 771 return node
770 772 except LookupError:
771 773 pass # may be partial hex id
772 774 try:
773 775 # str(rev)
774 776 rev = int(id)
775 777 if str(rev) != id:
776 778 raise ValueError
777 779 if rev < 0:
778 780 rev = len(self) + rev
779 781 if rev < 0 or rev >= len(self):
780 782 raise ValueError
781 783 return self.node(rev)
782 784 except (ValueError, OverflowError):
783 785 pass
784 786 if len(id) == 40:
785 787 try:
786 788 # a full hex nodeid?
787 789 node = bin(id)
788 790 self.rev(node)
789 791 return node
790 792 except (TypeError, LookupError):
791 793 pass
792 794
793 795 def _partialmatch(self, id):
794 796 try:
795 797 n = self.index.partialmatch(id)
796 798 if n and self.hasnode(n):
797 799 return n
798 800 return None
799 801 except RevlogError:
800 802 # parsers.c radix tree lookup gave multiple matches
801 803 # fall through to slow path that filters hidden revisions
802 804 pass
803 805 except (AttributeError, ValueError):
804 806 # we are pure python, or key was too short to search radix tree
805 807 pass
806 808
807 809 if id in self._pcache:
808 810 return self._pcache[id]
809 811
810 812 if len(id) < 40:
811 813 try:
812 814 # hex(node)[:...]
813 815 l = len(id) // 2 # grab an even number of digits
814 816 prefix = bin(id[:l * 2])
815 817 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
816 818 nl = [n for n in nl if hex(n).startswith(id) and
817 819 self.hasnode(n)]
818 820 if len(nl) > 0:
819 821 if len(nl) == 1:
820 822 self._pcache[id] = nl[0]
821 823 return nl[0]
822 824 raise LookupError(id, self.indexfile,
823 825 _('ambiguous identifier'))
824 826 return None
825 827 except TypeError:
826 828 pass
827 829
828 830 def lookup(self, id):
829 831 """locate a node based on:
830 832 - revision number or str(revision number)
831 833 - nodeid or subset of hex nodeid
832 834 """
833 835 n = self._match(id)
834 836 if n is not None:
835 837 return n
836 838 n = self._partialmatch(id)
837 839 if n:
838 840 return n
839 841
840 842 raise LookupError(id, self.indexfile, _('no match found'))
841 843
842 844 def cmp(self, node, text):
843 845 """compare text with a given file revision
844 846
845 847 returns True if text is different than what is stored.
846 848 """
847 849 p1, p2 = self.parents(node)
848 850 return hash(text, p1, p2) != node
849 851
850 852 def _addchunk(self, offset, data):
851 853 o, d = self._chunkcache
852 854 # try to add to existing cache
853 855 if o + len(d) == offset and len(d) + len(data) < _chunksize:
854 856 self._chunkcache = o, d + data
855 857 else:
856 858 self._chunkcache = offset, data
857 859
858 860 def _loadchunk(self, offset, length):
859 861 if self._inline:
860 862 df = self.opener(self.indexfile)
861 863 else:
862 864 df = self.opener(self.datafile)
863 865
864 866 # Cache data both forward and backward around the requested
865 867 # data, in a fixed size window. This helps speed up operations
866 868 # involving reading the revlog backwards.
867 869 cachesize = self._chunkcachesize
868 870 realoffset = offset & ~(cachesize - 1)
869 871 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
870 872 - realoffset)
871 873 df.seek(realoffset)
872 874 d = df.read(reallength)
873 875 df.close()
874 876 self._addchunk(realoffset, d)
875 877 if offset != realoffset or reallength != length:
876 878 return util.buffer(d, offset - realoffset, length)
877 879 return d
878 880
879 881 def _getchunk(self, offset, length):
880 882 o, d = self._chunkcache
881 883 l = len(d)
882 884
883 885 # is it in the cache?
884 886 cachestart = offset - o
885 887 cacheend = cachestart + length
886 888 if cachestart >= 0 and cacheend <= l:
887 889 if cachestart == 0 and cacheend == l:
888 890 return d # avoid a copy
889 891 return util.buffer(d, cachestart, cacheend - cachestart)
890 892
891 893 return self._loadchunk(offset, length)
892 894
893 895 def _chunkraw(self, startrev, endrev):
894 896 start = self.start(startrev)
895 897 end = self.end(endrev)
896 898 if self._inline:
897 899 start += (startrev + 1) * self._io.size
898 900 end += (endrev + 1) * self._io.size
899 901 length = end - start
900 902 return self._getchunk(start, length)
901 903
902 904 def _chunk(self, rev):
903 905 return decompress(self._chunkraw(rev, rev))
904 906
905 907 def _chunks(self, revs):
906 908 '''faster version of [self._chunk(rev) for rev in revs]
907 909
908 910 Assumes that revs is in ascending order.'''
909 911 if not revs:
910 912 return []
911 913 start = self.start
912 914 length = self.length
913 915 inline = self._inline
914 916 iosize = self._io.size
915 917 buffer = util.buffer
916 918
917 919 l = []
918 920 ladd = l.append
919 921
920 922 # preload the cache
921 923 try:
922 924 while True:
923 925 # ensure that the cache doesn't change out from under us
924 926 _cache = self._chunkcache
925 927 self._chunkraw(revs[0], revs[-1])
926 928 if _cache == self._chunkcache:
927 929 break
928 930 offset, data = _cache
929 931 except OverflowError:
930 932 # issue4215 - we can't cache a run of chunks greater than
931 933 # 2G on Windows
932 934 return [self._chunk(rev) for rev in revs]
933 935
934 936 for rev in revs:
935 937 chunkstart = start(rev)
936 938 if inline:
937 939 chunkstart += (rev + 1) * iosize
938 940 chunklength = length(rev)
939 941 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
940 942
941 943 return l
942 944
943 945 def _chunkclear(self):
944 946 self._chunkcache = (0, '')
945 947
946 948 def deltaparent(self, rev):
947 949 """return deltaparent of the given revision"""
948 950 base = self.index[rev][3]
949 951 if base == rev:
950 952 return nullrev
951 953 elif self._generaldelta:
952 954 return base
953 955 else:
954 956 return rev - 1
955 957
956 958 def revdiff(self, rev1, rev2):
957 959 """return or calculate a delta between two revisions"""
958 960 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
959 961 return str(self._chunk(rev2))
960 962
961 963 return mdiff.textdiff(self.revision(rev1),
962 964 self.revision(rev2))
963 965
964 966 def revision(self, nodeorrev):
965 967 """return an uncompressed revision of a given node or revision
966 968 number.
967 969 """
968 970 if isinstance(nodeorrev, int):
969 971 rev = nodeorrev
970 972 node = self.node(rev)
971 973 else:
972 974 node = nodeorrev
973 975 rev = None
974 976
975 977 _cache = self._cache # grab local copy of cache to avoid thread race
976 978 cachedrev = None
977 979 if node == nullid:
978 980 return ""
979 981 if _cache:
980 982 if _cache[0] == node:
981 983 return _cache[2]
982 984 cachedrev = _cache[1]
983 985
984 986 # look up what we need to read
985 987 text = None
986 988 if rev is None:
987 989 rev = self.rev(node)
988 990
989 991 # check rev flags
990 992 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
991 993 raise RevlogError(_('incompatible revision flag %x') %
992 994 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
993 995
994 996 # build delta chain
995 997 chain = []
996 998 index = self.index # for performance
997 999 generaldelta = self._generaldelta
998 1000 iterrev = rev
999 1001 e = index[iterrev]
1000 1002 while iterrev != e[3] and iterrev != cachedrev:
1001 1003 chain.append(iterrev)
1002 1004 if generaldelta:
1003 1005 iterrev = e[3]
1004 1006 else:
1005 1007 iterrev -= 1
1006 1008 e = index[iterrev]
1007 1009
1008 1010 if iterrev == cachedrev:
1009 1011 # cache hit
1010 1012 text = _cache[2]
1011 1013 else:
1012 1014 chain.append(iterrev)
1013 1015 chain.reverse()
1014 1016
1015 1017 # drop cache to save memory
1016 1018 self._cache = None
1017 1019
1018 1020 bins = self._chunks(chain)
1019 1021 if text is None:
1020 1022 text = str(bins[0])
1021 1023 bins = bins[1:]
1022 1024
1023 1025 text = mdiff.patches(text, bins)
1024 1026
1025 1027 text = self._checkhash(text, node, rev)
1026 1028
1027 1029 self._cache = (node, rev, text)
1028 1030 return text
1029 1031
1030 1032 def _checkhash(self, text, node, rev):
1031 1033 p1, p2 = self.parents(node)
1032 1034 self.checkhash(text, p1, p2, node, rev)
1033 1035 return text
1034 1036
1035 1037 def checkhash(self, text, p1, p2, node, rev=None):
1036 1038 if node != hash(text, p1, p2):
1037 1039 revornode = rev
1038 1040 if revornode is None:
1039 1041 revornode = templatefilters.short(hex(node))
1040 1042 raise RevlogError(_("integrity check failed on %s:%s")
1041 1043 % (self.indexfile, revornode))
1042 1044
1043 1045 def checkinlinesize(self, tr, fp=None):
1044 1046 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1045 1047 return
1046 1048
1047 1049 trinfo = tr.find(self.indexfile)
1048 1050 if trinfo is None:
1049 1051 raise RevlogError(_("%s not found in the transaction")
1050 1052 % self.indexfile)
1051 1053
1052 1054 trindex = trinfo[2]
1053 1055 dataoff = self.start(trindex)
1054 1056
1055 1057 tr.add(self.datafile, dataoff)
1056 1058
1057 1059 if fp:
1058 1060 fp.flush()
1059 1061 fp.close()
1060 1062
1061 1063 df = self.opener(self.datafile, 'w')
1062 1064 try:
1063 1065 for r in self:
1064 1066 df.write(self._chunkraw(r, r))
1065 1067 finally:
1066 1068 df.close()
1067 1069
1068 1070 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1069 1071 self.version &= ~(REVLOGNGINLINEDATA)
1070 1072 self._inline = False
1071 1073 for i in self:
1072 1074 e = self._io.packentry(self.index[i], self.node, self.version, i)
1073 1075 fp.write(e)
1074 1076
1075 1077 # if we don't call close, the temp file will never replace the
1076 1078 # real index
1077 1079 fp.close()
1078 1080
1079 1081 tr.replace(self.indexfile, trindex * self._io.size)
1080 1082 self._chunkclear()
1081 1083
1082 1084 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1083 1085 node=None):
1084 1086 """add a revision to the log
1085 1087
1086 1088 text - the revision data to add
1087 1089 transaction - the transaction object used for rollback
1088 1090 link - the linkrev data to add
1089 1091 p1, p2 - the parent nodeids of the revision
1090 1092 cachedelta - an optional precomputed delta
1091 1093 node - nodeid of revision; typically node is not specified, and it is
1092 1094 computed by default as hash(text, p1, p2), however subclasses might
1093 1095 use different hashing method (and override checkhash() in such case)
1094 1096 """
1095 1097 if link == nullrev:
1096 1098 raise RevlogError(_("attempted to add linkrev -1 to %s")
1097 1099 % self.indexfile)
1098 1100 node = node or hash(text, p1, p2)
1099 1101 if node in self.nodemap:
1100 1102 return node
1101 1103
1102 1104 dfh = None
1103 1105 if not self._inline:
1104 1106 dfh = self.opener(self.datafile, "a")
1105 1107 ifh = self.opener(self.indexfile, "a+")
1106 1108 try:
1107 1109 return self._addrevision(node, text, transaction, link, p1, p2,
1108 1110 cachedelta, ifh, dfh)
1109 1111 finally:
1110 1112 if dfh:
1111 1113 dfh.close()
1112 1114 ifh.close()
1113 1115
1114 1116 def compress(self, text):
1115 1117 """ generate a possibly-compressed representation of text """
1116 1118 if not text:
1117 1119 return ("", text)
1118 1120 l = len(text)
1119 1121 bin = None
1120 1122 if l < 44:
1121 1123 pass
1122 1124 elif l > 1000000:
1123 1125 # zlib makes an internal copy, thus doubling memory usage for
1124 1126 # large files, so lets do this in pieces
1125 1127 z = zlib.compressobj()
1126 1128 p = []
1127 1129 pos = 0
1128 1130 while pos < l:
1129 1131 pos2 = pos + 2**20
1130 1132 p.append(z.compress(text[pos:pos2]))
1131 1133 pos = pos2
1132 1134 p.append(z.flush())
1133 1135 if sum(map(len, p)) < l:
1134 1136 bin = "".join(p)
1135 1137 else:
1136 1138 bin = _compress(text)
1137 1139 if bin is None or len(bin) > l:
1138 1140 if text[0] == '\0':
1139 1141 return ("", text)
1140 1142 return ('u', text)
1141 1143 return ("", bin)
1142 1144
1143 1145 def _addrevision(self, node, text, transaction, link, p1, p2,
1144 1146 cachedelta, ifh, dfh):
1145 1147 """internal function to add revisions to the log
1146 1148
1147 1149 see addrevision for argument descriptions.
1148 1150 invariants:
1149 1151 - text is optional (can be None); if not set, cachedelta must be set.
1150 1152 if both are set, they must correspond to each other.
1151 1153 """
1152 1154 btext = [text]
1153 1155 def buildtext():
1154 1156 if btext[0] is not None:
1155 1157 return btext[0]
1156 1158 # flush any pending writes here so we can read it in revision
1157 1159 if dfh:
1158 1160 dfh.flush()
1159 1161 ifh.flush()
1160 1162 basetext = self.revision(self.node(cachedelta[0]))
1161 1163 btext[0] = mdiff.patch(basetext, cachedelta[1])
1162 1164 self.checkhash(btext[0], p1, p2, node)
1163 1165 return btext[0]
1164 1166
1165 1167 def builddelta(rev):
1166 1168 # can we use the cached delta?
1167 1169 if cachedelta and cachedelta[0] == rev:
1168 1170 delta = cachedelta[1]
1169 1171 else:
1170 1172 t = buildtext()
1171 1173 ptext = self.revision(self.node(rev))
1172 1174 delta = mdiff.textdiff(ptext, t)
1173 1175 data = self.compress(delta)
1174 1176 l = len(data[1]) + len(data[0])
1175 1177 if basecache[0] == rev:
1176 1178 chainbase = basecache[1]
1177 1179 else:
1178 1180 chainbase = self.chainbase(rev)
1179 1181 dist = l + offset - self.start(chainbase)
1180 1182 if self._generaldelta:
1181 1183 base = rev
1182 1184 else:
1183 1185 base = chainbase
1184 1186 return dist, l, data, base, chainbase
1185 1187
1186 1188 curr = len(self)
1187 1189 prev = curr - 1
1188 1190 base = chainbase = curr
1189 1191 offset = self.end(prev)
1190 1192 flags = 0
1191 1193 d = None
1192 1194 if self._basecache is None:
1193 1195 self._basecache = (prev, self.chainbase(prev))
1194 1196 basecache = self._basecache
1195 1197 p1r, p2r = self.rev(p1), self.rev(p2)
1196 1198
1197 1199 # should we try to build a delta?
1198 1200 if prev != nullrev:
1199 1201 if self._generaldelta:
1200 1202 if p1r >= basecache[1]:
1201 1203 d = builddelta(p1r)
1202 1204 elif p2r >= basecache[1]:
1203 1205 d = builddelta(p2r)
1204 1206 else:
1205 1207 d = builddelta(prev)
1206 1208 else:
1207 1209 d = builddelta(prev)
1208 1210 dist, l, data, base, chainbase = d
1209 1211
1210 1212 # full versions are inserted when the needed deltas
1211 1213 # become comparable to the uncompressed text
1212 1214 if text is None:
1213 1215 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1214 1216 cachedelta[1])
1215 1217 else:
1216 1218 textlen = len(text)
1217 1219 if d is None or dist > textlen * 2:
1218 1220 text = buildtext()
1219 1221 data = self.compress(text)
1220 1222 l = len(data[1]) + len(data[0])
1221 1223 base = chainbase = curr
1222 1224
1223 1225 e = (offset_type(offset, flags), l, textlen,
1224 1226 base, link, p1r, p2r, node)
1225 1227 self.index.insert(-1, e)
1226 1228 self.nodemap[node] = curr
1227 1229
1228 1230 entry = self._io.packentry(e, self.node, self.version, curr)
1229 1231 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1230 1232
1231 1233 if type(text) == str: # only accept immutable objects
1232 1234 self._cache = (node, curr, text)
1233 1235 self._basecache = (curr, chainbase)
1234 1236 return node
1235 1237
1236 1238 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1237 1239 curr = len(self) - 1
1238 1240 if not self._inline:
1239 1241 transaction.add(self.datafile, offset)
1240 1242 transaction.add(self.indexfile, curr * len(entry))
1241 1243 if data[0]:
1242 1244 dfh.write(data[0])
1243 1245 dfh.write(data[1])
1244 1246 dfh.flush()
1245 1247 ifh.write(entry)
1246 1248 else:
1247 1249 offset += curr * self._io.size
1248 1250 transaction.add(self.indexfile, offset, curr)
1249 1251 ifh.write(entry)
1250 1252 ifh.write(data[0])
1251 1253 ifh.write(data[1])
1252 1254 self.checkinlinesize(transaction, ifh)
1253 1255
1254 1256 def addgroup(self, bundle, linkmapper, transaction):
1255 1257 """
1256 1258 add a delta group
1257 1259
1258 1260 given a set of deltas, add them to the revision log. the
1259 1261 first delta is against its parent, which should be in our
1260 1262 log, the rest are against the previous delta.
1261 1263 """
1262 1264
1263 1265 # track the base of the current delta log
1264 1266 content = []
1265 1267 node = None
1266 1268
1267 1269 r = len(self)
1268 1270 end = 0
1269 1271 if r:
1270 1272 end = self.end(r - 1)
1271 1273 ifh = self.opener(self.indexfile, "a+")
1272 1274 isize = r * self._io.size
1273 1275 if self._inline:
1274 1276 transaction.add(self.indexfile, end + isize, r)
1275 1277 dfh = None
1276 1278 else:
1277 1279 transaction.add(self.indexfile, isize, r)
1278 1280 transaction.add(self.datafile, end)
1279 1281 dfh = self.opener(self.datafile, "a")
1280 1282
1281 1283 try:
1282 1284 # loop through our set of deltas
1283 1285 chain = None
1284 1286 while True:
1285 1287 chunkdata = bundle.deltachunk(chain)
1286 1288 if not chunkdata:
1287 1289 break
1288 1290 node = chunkdata['node']
1289 1291 p1 = chunkdata['p1']
1290 1292 p2 = chunkdata['p2']
1291 1293 cs = chunkdata['cs']
1292 1294 deltabase = chunkdata['deltabase']
1293 1295 delta = chunkdata['delta']
1294 1296
1295 1297 content.append(node)
1296 1298
1297 1299 link = linkmapper(cs)
1298 1300 if node in self.nodemap:
1299 1301 # this can happen if two branches make the same change
1300 1302 chain = node
1301 1303 continue
1302 1304
1303 1305 for p in (p1, p2):
1304 1306 if p not in self.nodemap:
1305 1307 raise LookupError(p, self.indexfile,
1306 1308 _('unknown parent'))
1307 1309
1308 1310 if deltabase not in self.nodemap:
1309 1311 raise LookupError(deltabase, self.indexfile,
1310 1312 _('unknown delta base'))
1311 1313
1312 1314 baserev = self.rev(deltabase)
1313 1315 chain = self._addrevision(node, None, transaction, link,
1314 1316 p1, p2, (baserev, delta), ifh, dfh)
1315 1317 if not dfh and not self._inline:
1316 1318 # addrevision switched from inline to conventional
1317 1319 # reopen the index
1318 1320 ifh.close()
1319 1321 dfh = self.opener(self.datafile, "a")
1320 1322 ifh = self.opener(self.indexfile, "a")
1321 1323 finally:
1322 1324 if dfh:
1323 1325 dfh.close()
1324 1326 ifh.close()
1325 1327
1326 1328 return content
1327 1329
1328 1330 def getstrippoint(self, minlink):
1329 1331 """find the minimum rev that must be stripped to strip the linkrev
1330 1332
1331 1333 Returns a tuple containing the minimum rev and a set of all revs that
1332 1334 have linkrevs that will be broken by this strip.
1333 1335 """
1334 1336 brokenrevs = set()
1335 1337 strippoint = len(self)
1336 1338
1337 1339 heads = {}
1338 1340 futurelargelinkrevs = set()
1339 1341 for head in self.headrevs():
1340 1342 headlinkrev = self.linkrev(head)
1341 1343 heads[head] = headlinkrev
1342 1344 if headlinkrev >= minlink:
1343 1345 futurelargelinkrevs.add(headlinkrev)
1344 1346
1345 1347 # This algorithm involves walking down the rev graph, starting at the
1346 1348 # heads. Since the revs are topologically sorted according to linkrev,
1347 1349 # once all head linkrevs are below the minlink, we know there are
1348 1350 # no more revs that could have a linkrev greater than minlink.
1349 1351 # So we can stop walking.
1350 1352 while futurelargelinkrevs:
1351 1353 strippoint -= 1
1352 1354 linkrev = heads.pop(strippoint)
1353 1355
1354 1356 if linkrev < minlink:
1355 1357 brokenrevs.add(strippoint)
1356 1358 else:
1357 1359 futurelargelinkrevs.remove(linkrev)
1358 1360
1359 1361 for p in self.parentrevs(strippoint):
1360 1362 if p != nullrev:
1361 1363 plinkrev = self.linkrev(p)
1362 1364 heads[p] = plinkrev
1363 1365 if plinkrev >= minlink:
1364 1366 futurelargelinkrevs.add(plinkrev)
1365 1367
1366 1368 return strippoint, brokenrevs
1367 1369
1368 1370 def strip(self, minlink, transaction):
1369 1371 """truncate the revlog on the first revision with a linkrev >= minlink
1370 1372
1371 1373 This function is called when we're stripping revision minlink and
1372 1374 its descendants from the repository.
1373 1375
1374 1376 We have to remove all revisions with linkrev >= minlink, because
1375 1377 the equivalent changelog revisions will be renumbered after the
1376 1378 strip.
1377 1379
1378 1380 So we truncate the revlog on the first of these revisions, and
1379 1381 trust that the caller has saved the revisions that shouldn't be
1380 1382 removed and that it'll re-add them after this truncation.
1381 1383 """
1382 1384 if len(self) == 0:
1383 1385 return
1384 1386
1385 1387 rev, _ = self.getstrippoint(minlink)
1386 1388 if rev == len(self):
1387 1389 return
1388 1390
1389 1391 # first truncate the files on disk
1390 1392 end = self.start(rev)
1391 1393 if not self._inline:
1392 1394 transaction.add(self.datafile, end)
1393 1395 end = rev * self._io.size
1394 1396 else:
1395 1397 end += rev * self._io.size
1396 1398
1397 1399 transaction.add(self.indexfile, end)
1398 1400
1399 1401 # then reset internal state in memory to forget those revisions
1400 1402 self._cache = None
1401 1403 self._chunkclear()
1402 1404 for x in xrange(rev, len(self)):
1403 1405 del self.nodemap[self.node(x)]
1404 1406
1405 1407 del self.index[rev:-1]
1406 1408
1407 1409 def checksize(self):
1408 1410 expected = 0
1409 1411 if len(self):
1410 1412 expected = max(0, self.end(len(self) - 1))
1411 1413
1412 1414 try:
1413 1415 f = self.opener(self.datafile)
1414 1416 f.seek(0, 2)
1415 1417 actual = f.tell()
1416 1418 f.close()
1417 1419 dd = actual - expected
1418 1420 except IOError, inst:
1419 1421 if inst.errno != errno.ENOENT:
1420 1422 raise
1421 1423 dd = 0
1422 1424
1423 1425 try:
1424 1426 f = self.opener(self.indexfile)
1425 1427 f.seek(0, 2)
1426 1428 actual = f.tell()
1427 1429 f.close()
1428 1430 s = self._io.size
1429 1431 i = max(0, actual // s)
1430 1432 di = actual - (i * s)
1431 1433 if self._inline:
1432 1434 databytes = 0
1433 1435 for r in self:
1434 1436 databytes += max(0, self.length(r))
1435 1437 dd = 0
1436 1438 di = actual - len(self) * s - databytes
1437 1439 except IOError, inst:
1438 1440 if inst.errno != errno.ENOENT:
1439 1441 raise
1440 1442 di = 0
1441 1443
1442 1444 return (dd, di)
1443 1445
1444 1446 def files(self):
1445 1447 res = [self.indexfile]
1446 1448 if not self._inline:
1447 1449 res.append(self.datafile)
1448 1450 return res
General Comments 0
You need to be logged in to leave comments. Login now