##// END OF EJS Templates
repoview: fix 0L with pack/unpack for 2.4
Matt Mackall -
r22282:4092d12b default
parent child Browse files
Show More
@@ -1,328 +1,329 b''
1 # repoview.py - Filtered view of a localrepo object
1 # repoview.py - Filtered view of a localrepo object
2 #
2 #
3 # Copyright 2012 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
3 # Copyright 2012 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
4 # Logilab SA <contact@logilab.fr>
4 # Logilab SA <contact@logilab.fr>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 import copy
9 import copy
10 import error
10 import error
11 import phases
11 import phases
12 import util
12 import util
13 import obsolete
13 import obsolete
14 import struct
14 import struct
15 import tags as tagsmod
15 import tags as tagsmod
16 from mercurial.i18n import _
16 from mercurial.i18n import _
17
17
18 def hideablerevs(repo):
18 def hideablerevs(repo):
19 """Revisions candidates to be hidden
19 """Revisions candidates to be hidden
20
20
21 This is a standalone function to help extensions to wrap it."""
21 This is a standalone function to help extensions to wrap it."""
22 return obsolete.getrevs(repo, 'obsolete')
22 return obsolete.getrevs(repo, 'obsolete')
23
23
24 def _getstaticblockers(repo):
24 def _getstaticblockers(repo):
25 """Cacheable revisions blocking hidden changesets from being filtered.
25 """Cacheable revisions blocking hidden changesets from being filtered.
26
26
27 Additional non-cached hidden blockers are computed in _getdynamicblockers.
27 Additional non-cached hidden blockers are computed in _getdynamicblockers.
28 This is a standalone function to help extensions to wrap it."""
28 This is a standalone function to help extensions to wrap it."""
29 assert not repo.changelog.filteredrevs
29 assert not repo.changelog.filteredrevs
30 hideable = hideablerevs(repo)
30 hideable = hideablerevs(repo)
31 blockers = set()
31 blockers = set()
32 if hideable:
32 if hideable:
33 # We use cl to avoid recursive lookup from repo[xxx]
33 # We use cl to avoid recursive lookup from repo[xxx]
34 cl = repo.changelog
34 cl = repo.changelog
35 firsthideable = min(hideable)
35 firsthideable = min(hideable)
36 revs = cl.revs(start=firsthideable)
36 revs = cl.revs(start=firsthideable)
37 tofilter = repo.revs(
37 tofilter = repo.revs(
38 '(%ld) and children(%ld)', list(revs), list(hideable))
38 '(%ld) and children(%ld)', list(revs), list(hideable))
39 blockers.update([r for r in tofilter if r not in hideable])
39 blockers.update([r for r in tofilter if r not in hideable])
40 return blockers
40 return blockers
41
41
42 def _getdynamicblockers(repo):
42 def _getdynamicblockers(repo):
43 """Non-cacheable revisions blocking hidden changesets from being filtered.
43 """Non-cacheable revisions blocking hidden changesets from being filtered.
44
44
45 Get revisions that will block hidden changesets and are likely to change,
45 Get revisions that will block hidden changesets and are likely to change,
46 but unlikely to create hidden blockers. They won't be cached, so be careful
46 but unlikely to create hidden blockers. They won't be cached, so be careful
47 with adding additional computation."""
47 with adding additional computation."""
48
48
49 cl = repo.changelog
49 cl = repo.changelog
50 blockers = set()
50 blockers = set()
51 blockers.update([par.rev() for par in repo[None].parents()])
51 blockers.update([par.rev() for par in repo[None].parents()])
52 blockers.update([cl.rev(bm) for bm in repo._bookmarks.values()])
52 blockers.update([cl.rev(bm) for bm in repo._bookmarks.values()])
53
53
54 tags = {}
54 tags = {}
55 tagsmod.readlocaltags(repo.ui, repo, tags, {})
55 tagsmod.readlocaltags(repo.ui, repo, tags, {})
56 if tags:
56 if tags:
57 rev, nodemap = cl.rev, cl.nodemap
57 rev, nodemap = cl.rev, cl.nodemap
58 blockers.update(rev(t[0]) for t in tags.values() if t[0] in nodemap)
58 blockers.update(rev(t[0]) for t in tags.values() if t[0] in nodemap)
59 return blockers
59 return blockers
60
60
61 cacheversion = 1
61 cacheversion = 1
62 cachefile = 'cache/hidden'
62 cachefile = 'cache/hidden'
63
63
64 def cachehash(repo, hideable):
64 def cachehash(repo, hideable):
65 """return sha1 hash of repository data to identify a valid cache.
65 """return sha1 hash of repository data to identify a valid cache.
66
66
67 We calculate a sha1 of repo heads and the content of the obsstore and write
67 We calculate a sha1 of repo heads and the content of the obsstore and write
68 it to the cache. Upon reading we can easily validate by checking the hash
68 it to the cache. Upon reading we can easily validate by checking the hash
69 against the stored one and discard the cache in case the hashes don't match.
69 against the stored one and discard the cache in case the hashes don't match.
70 """
70 """
71 h = util.sha1(''.join(repo.heads()))
71 h = util.sha1()
72 h.update(''.join(repo.heads()))
72 h.update(str(hash(frozenset(hideable))))
73 h.update(str(hash(frozenset(hideable))))
73 return h.digest()
74 return h.digest()
74
75
75 def trywritehiddencache(repo, hideable, hidden):
76 def trywritehiddencache(repo, hideable, hidden):
76 """write cache of hidden changesets to disk
77 """write cache of hidden changesets to disk
77
78
78 Will not write the cache if a wlock cannot be obtained lazily.
79 Will not write the cache if a wlock cannot be obtained lazily.
79 The cache consists of a head of 22byte:
80 The cache consists of a head of 22byte:
80 2 byte version number of the cache
81 2 byte version number of the cache
81 20 byte sha1 to validate the cache
82 20 byte sha1 to validate the cache
82 n*4 byte hidden revs
83 n*4 byte hidden revs
83 """
84 """
84 wlock = fh = None
85 wlock = fh = None
85 try:
86 try:
86 try:
87 try:
87 wlock = repo.wlock(wait=False)
88 wlock = repo.wlock(wait=False)
88 # write cache to file
89 # write cache to file
89 newhash = cachehash(repo, hideable)
90 newhash = cachehash(repo, hideable)
90 sortedset = sorted(hidden)
91 sortedset = sorted(hidden)
91 data = struct.pack('>%iI' % len(sortedset), *sortedset)
92 data = struct.pack('>%ii' % len(sortedset), *sortedset)
92 fh = repo.vfs.open(cachefile, 'w+b', atomictemp=True)
93 fh = repo.vfs.open(cachefile, 'w+b', atomictemp=True)
93 fh.write(struct.pack(">H", cacheversion))
94 fh.write(struct.pack(">H", cacheversion))
94 fh.write(newhash)
95 fh.write(newhash)
95 fh.write(data)
96 fh.write(data)
96 except (IOError, OSError):
97 except (IOError, OSError):
97 repo.ui.debug('error writing hidden changesets cache')
98 repo.ui.debug('error writing hidden changesets cache')
98 except error.LockHeld:
99 except error.LockHeld:
99 repo.ui.debug('cannot obtain lock to write hidden changesets cache')
100 repo.ui.debug('cannot obtain lock to write hidden changesets cache')
100 finally:
101 finally:
101 if fh:
102 if fh:
102 fh.close()
103 fh.close()
103 if wlock:
104 if wlock:
104 wlock.release()
105 wlock.release()
105
106
106 def tryreadcache(repo, hideable):
107 def tryreadcache(repo, hideable):
107 """read a cache if the cache exists and is valid, otherwise returns None."""
108 """read a cache if the cache exists and is valid, otherwise returns None."""
108 hidden = fh = None
109 hidden = fh = None
109 try:
110 try:
110 if repo.vfs.exists(cachefile):
111 if repo.vfs.exists(cachefile):
111 fh = repo.vfs.open(cachefile, 'rb')
112 fh = repo.vfs.open(cachefile, 'rb')
112 version, = struct.unpack(">H", fh.read(2))
113 version, = struct.unpack(">H", fh.read(2))
113 oldhash = fh.read(20)
114 oldhash = fh.read(20)
114 newhash = cachehash(repo, hideable)
115 newhash = cachehash(repo, hideable)
115 if (cacheversion, oldhash) == (version, newhash):
116 if (cacheversion, oldhash) == (version, newhash):
116 # cache is valid, so we can start reading the hidden revs
117 # cache is valid, so we can start reading the hidden revs
117 data = fh.read()
118 data = fh.read()
118 count = len(data) / 4
119 count = len(data) / 4
119 hidden = frozenset(struct.unpack('>%iI' % count, data))
120 hidden = frozenset(struct.unpack('>%ii' % count, data))
120 return hidden
121 return hidden
121 finally:
122 finally:
122 if fh:
123 if fh:
123 fh.close()
124 fh.close()
124
125
125 def computehidden(repo):
126 def computehidden(repo):
126 """compute the set of hidden revision to filter
127 """compute the set of hidden revision to filter
127
128
128 During most operation hidden should be filtered."""
129 During most operation hidden should be filtered."""
129 assert not repo.changelog.filteredrevs
130 assert not repo.changelog.filteredrevs
130
131
131 hidden = frozenset()
132 hidden = frozenset()
132 hideable = hideablerevs(repo)
133 hideable = hideablerevs(repo)
133 if hideable:
134 if hideable:
134 cl = repo.changelog
135 cl = repo.changelog
135 hidden = tryreadcache(repo, hideable)
136 hidden = tryreadcache(repo, hideable)
136 if hidden is None:
137 if hidden is None:
137 blocked = cl.ancestors(_getstaticblockers(repo), inclusive=True)
138 blocked = cl.ancestors(_getstaticblockers(repo), inclusive=True)
138 hidden = frozenset(r for r in hideable if r not in blocked)
139 hidden = frozenset(r for r in hideable if r not in blocked)
139 trywritehiddencache(repo, hideable, hidden)
140 trywritehiddencache(repo, hideable, hidden)
140 elif repo.ui.configbool('experimental', 'verifyhiddencache', True):
141 elif repo.ui.configbool('experimental', 'verifyhiddencache', True):
141 blocked = cl.ancestors(_getstaticblockers(repo), inclusive=True)
142 blocked = cl.ancestors(_getstaticblockers(repo), inclusive=True)
142 computed = frozenset(r for r in hideable if r not in blocked)
143 computed = frozenset(r for r in hideable if r not in blocked)
143 if computed != hidden:
144 if computed != hidden:
144 trywritehiddencache(repo, hideable, computed)
145 trywritehiddencache(repo, hideable, computed)
145 repo.ui.warn(_('Cache inconsistency detected. Please ' +
146 repo.ui.warn(_('Cache inconsistency detected. Please ' +
146 'open an issue on http://bz.selenic.com.\n'))
147 'open an issue on http://bz.selenic.com.\n'))
147 hidden = computed
148 hidden = computed
148
149
149 # check if we have wd parents, bookmarks or tags pointing to hidden
150 # check if we have wd parents, bookmarks or tags pointing to hidden
150 # changesets and remove those.
151 # changesets and remove those.
151 dynamic = hidden & _getdynamicblockers(repo)
152 dynamic = hidden & _getdynamicblockers(repo)
152 if dynamic:
153 if dynamic:
153 blocked = cl.ancestors(dynamic, inclusive=True)
154 blocked = cl.ancestors(dynamic, inclusive=True)
154 hidden = frozenset(r for r in hidden if r not in blocked)
155 hidden = frozenset(r for r in hidden if r not in blocked)
155 return hidden
156 return hidden
156
157
157 def computeunserved(repo):
158 def computeunserved(repo):
158 """compute the set of revision that should be filtered when used a server
159 """compute the set of revision that should be filtered when used a server
159
160
160 Secret and hidden changeset should not pretend to be here."""
161 Secret and hidden changeset should not pretend to be here."""
161 assert not repo.changelog.filteredrevs
162 assert not repo.changelog.filteredrevs
162 # fast path in simple case to avoid impact of non optimised code
163 # fast path in simple case to avoid impact of non optimised code
163 hiddens = filterrevs(repo, 'visible')
164 hiddens = filterrevs(repo, 'visible')
164 if phases.hassecret(repo):
165 if phases.hassecret(repo):
165 cl = repo.changelog
166 cl = repo.changelog
166 secret = phases.secret
167 secret = phases.secret
167 getphase = repo._phasecache.phase
168 getphase = repo._phasecache.phase
168 first = min(cl.rev(n) for n in repo._phasecache.phaseroots[secret])
169 first = min(cl.rev(n) for n in repo._phasecache.phaseroots[secret])
169 revs = cl.revs(start=first)
170 revs = cl.revs(start=first)
170 secrets = set(r for r in revs if getphase(repo, r) >= secret)
171 secrets = set(r for r in revs if getphase(repo, r) >= secret)
171 return frozenset(hiddens | secrets)
172 return frozenset(hiddens | secrets)
172 else:
173 else:
173 return hiddens
174 return hiddens
174
175
175 def computemutable(repo):
176 def computemutable(repo):
176 """compute the set of revision that should be filtered when used a server
177 """compute the set of revision that should be filtered when used a server
177
178
178 Secret and hidden changeset should not pretend to be here."""
179 Secret and hidden changeset should not pretend to be here."""
179 assert not repo.changelog.filteredrevs
180 assert not repo.changelog.filteredrevs
180 # fast check to avoid revset call on huge repo
181 # fast check to avoid revset call on huge repo
181 if util.any(repo._phasecache.phaseroots[1:]):
182 if util.any(repo._phasecache.phaseroots[1:]):
182 getphase = repo._phasecache.phase
183 getphase = repo._phasecache.phase
183 maymutable = filterrevs(repo, 'base')
184 maymutable = filterrevs(repo, 'base')
184 return frozenset(r for r in maymutable if getphase(repo, r))
185 return frozenset(r for r in maymutable if getphase(repo, r))
185 return frozenset()
186 return frozenset()
186
187
187 def computeimpactable(repo):
188 def computeimpactable(repo):
188 """Everything impactable by mutable revision
189 """Everything impactable by mutable revision
189
190
190 The immutable filter still have some chance to get invalidated. This will
191 The immutable filter still have some chance to get invalidated. This will
191 happen when:
192 happen when:
192
193
193 - you garbage collect hidden changeset,
194 - you garbage collect hidden changeset,
194 - public phase is moved backward,
195 - public phase is moved backward,
195 - something is changed in the filtering (this could be fixed)
196 - something is changed in the filtering (this could be fixed)
196
197
197 This filter out any mutable changeset and any public changeset that may be
198 This filter out any mutable changeset and any public changeset that may be
198 impacted by something happening to a mutable revision.
199 impacted by something happening to a mutable revision.
199
200
200 This is achieved by filtered everything with a revision number egal or
201 This is achieved by filtered everything with a revision number egal or
201 higher than the first mutable changeset is filtered."""
202 higher than the first mutable changeset is filtered."""
202 assert not repo.changelog.filteredrevs
203 assert not repo.changelog.filteredrevs
203 cl = repo.changelog
204 cl = repo.changelog
204 firstmutable = len(cl)
205 firstmutable = len(cl)
205 for roots in repo._phasecache.phaseroots[1:]:
206 for roots in repo._phasecache.phaseroots[1:]:
206 if roots:
207 if roots:
207 firstmutable = min(firstmutable, min(cl.rev(r) for r in roots))
208 firstmutable = min(firstmutable, min(cl.rev(r) for r in roots))
208 # protect from nullrev root
209 # protect from nullrev root
209 firstmutable = max(0, firstmutable)
210 firstmutable = max(0, firstmutable)
210 return frozenset(xrange(firstmutable, len(cl)))
211 return frozenset(xrange(firstmutable, len(cl)))
211
212
212 # function to compute filtered set
213 # function to compute filtered set
213 #
214 #
214 # When adding a new filter you MUST update the table at:
215 # When adding a new filter you MUST update the table at:
215 # mercurial.branchmap.subsettable
216 # mercurial.branchmap.subsettable
216 # Otherwise your filter will have to recompute all its branches cache
217 # Otherwise your filter will have to recompute all its branches cache
217 # from scratch (very slow).
218 # from scratch (very slow).
218 filtertable = {'visible': computehidden,
219 filtertable = {'visible': computehidden,
219 'served': computeunserved,
220 'served': computeunserved,
220 'immutable': computemutable,
221 'immutable': computemutable,
221 'base': computeimpactable}
222 'base': computeimpactable}
222
223
223 def filterrevs(repo, filtername):
224 def filterrevs(repo, filtername):
224 """returns set of filtered revision for this filter name"""
225 """returns set of filtered revision for this filter name"""
225 if filtername not in repo.filteredrevcache:
226 if filtername not in repo.filteredrevcache:
226 func = filtertable[filtername]
227 func = filtertable[filtername]
227 repo.filteredrevcache[filtername] = func(repo.unfiltered())
228 repo.filteredrevcache[filtername] = func(repo.unfiltered())
228 return repo.filteredrevcache[filtername]
229 return repo.filteredrevcache[filtername]
229
230
230 class repoview(object):
231 class repoview(object):
231 """Provide a read/write view of a repo through a filtered changelog
232 """Provide a read/write view of a repo through a filtered changelog
232
233
233 This object is used to access a filtered version of a repository without
234 This object is used to access a filtered version of a repository without
234 altering the original repository object itself. We can not alter the
235 altering the original repository object itself. We can not alter the
235 original object for two main reasons:
236 original object for two main reasons:
236 - It prevents the use of a repo with multiple filters at the same time. In
237 - It prevents the use of a repo with multiple filters at the same time. In
237 particular when multiple threads are involved.
238 particular when multiple threads are involved.
238 - It makes scope of the filtering harder to control.
239 - It makes scope of the filtering harder to control.
239
240
240 This object behaves very closely to the original repository. All attribute
241 This object behaves very closely to the original repository. All attribute
241 operations are done on the original repository:
242 operations are done on the original repository:
242 - An access to `repoview.someattr` actually returns `repo.someattr`,
243 - An access to `repoview.someattr` actually returns `repo.someattr`,
243 - A write to `repoview.someattr` actually sets value of `repo.someattr`,
244 - A write to `repoview.someattr` actually sets value of `repo.someattr`,
244 - A deletion of `repoview.someattr` actually drops `someattr`
245 - A deletion of `repoview.someattr` actually drops `someattr`
245 from `repo.__dict__`.
246 from `repo.__dict__`.
246
247
247 The only exception is the `changelog` property. It is overridden to return
248 The only exception is the `changelog` property. It is overridden to return
248 a (surface) copy of `repo.changelog` with some revisions filtered. The
249 a (surface) copy of `repo.changelog` with some revisions filtered. The
249 `filtername` attribute of the view control the revisions that need to be
250 `filtername` attribute of the view control the revisions that need to be
250 filtered. (the fact the changelog is copied is an implementation detail).
251 filtered. (the fact the changelog is copied is an implementation detail).
251
252
252 Unlike attributes, this object intercepts all method calls. This means that
253 Unlike attributes, this object intercepts all method calls. This means that
253 all methods are run on the `repoview` object with the filtered `changelog`
254 all methods are run on the `repoview` object with the filtered `changelog`
254 property. For this purpose the simple `repoview` class must be mixed with
255 property. For this purpose the simple `repoview` class must be mixed with
255 the actual class of the repository. This ensures that the resulting
256 the actual class of the repository. This ensures that the resulting
256 `repoview` object have the very same methods than the repo object. This
257 `repoview` object have the very same methods than the repo object. This
257 leads to the property below.
258 leads to the property below.
258
259
259 repoview.method() --> repo.__class__.method(repoview)
260 repoview.method() --> repo.__class__.method(repoview)
260
261
261 The inheritance has to be done dynamically because `repo` can be of any
262 The inheritance has to be done dynamically because `repo` can be of any
262 subclasses of `localrepo`. Eg: `bundlerepo` or `statichttprepo`.
263 subclasses of `localrepo`. Eg: `bundlerepo` or `statichttprepo`.
263 """
264 """
264
265
265 def __init__(self, repo, filtername):
266 def __init__(self, repo, filtername):
266 object.__setattr__(self, '_unfilteredrepo', repo)
267 object.__setattr__(self, '_unfilteredrepo', repo)
267 object.__setattr__(self, 'filtername', filtername)
268 object.__setattr__(self, 'filtername', filtername)
268 object.__setattr__(self, '_clcachekey', None)
269 object.__setattr__(self, '_clcachekey', None)
269 object.__setattr__(self, '_clcache', None)
270 object.__setattr__(self, '_clcache', None)
270
271
271 # not a propertycache on purpose we shall implement a proper cache later
272 # not a propertycache on purpose we shall implement a proper cache later
272 @property
273 @property
273 def changelog(self):
274 def changelog(self):
274 """return a filtered version of the changeset
275 """return a filtered version of the changeset
275
276
276 this changelog must not be used for writing"""
277 this changelog must not be used for writing"""
277 # some cache may be implemented later
278 # some cache may be implemented later
278 unfi = self._unfilteredrepo
279 unfi = self._unfilteredrepo
279 unfichangelog = unfi.changelog
280 unfichangelog = unfi.changelog
280 revs = filterrevs(unfi, self.filtername)
281 revs = filterrevs(unfi, self.filtername)
281 cl = self._clcache
282 cl = self._clcache
282 newkey = (len(unfichangelog), unfichangelog.tip(), hash(revs))
283 newkey = (len(unfichangelog), unfichangelog.tip(), hash(revs))
283 if cl is not None:
284 if cl is not None:
284 # we need to check curkey too for some obscure reason.
285 # we need to check curkey too for some obscure reason.
285 # MQ test show a corruption of the underlying repo (in _clcache)
286 # MQ test show a corruption of the underlying repo (in _clcache)
286 # without change in the cachekey.
287 # without change in the cachekey.
287 oldfilter = cl.filteredrevs
288 oldfilter = cl.filteredrevs
288 try:
289 try:
289 cl.filterrevs = () # disable filtering for tip
290 cl.filterrevs = () # disable filtering for tip
290 curkey = (len(cl), cl.tip(), hash(oldfilter))
291 curkey = (len(cl), cl.tip(), hash(oldfilter))
291 finally:
292 finally:
292 cl.filteredrevs = oldfilter
293 cl.filteredrevs = oldfilter
293 if newkey != self._clcachekey or newkey != curkey:
294 if newkey != self._clcachekey or newkey != curkey:
294 cl = None
295 cl = None
295 # could have been made None by the previous if
296 # could have been made None by the previous if
296 if cl is None:
297 if cl is None:
297 cl = copy.copy(unfichangelog)
298 cl = copy.copy(unfichangelog)
298 cl.filteredrevs = revs
299 cl.filteredrevs = revs
299 object.__setattr__(self, '_clcache', cl)
300 object.__setattr__(self, '_clcache', cl)
300 object.__setattr__(self, '_clcachekey', newkey)
301 object.__setattr__(self, '_clcachekey', newkey)
301 return cl
302 return cl
302
303
303 def unfiltered(self):
304 def unfiltered(self):
304 """Return an unfiltered version of a repo"""
305 """Return an unfiltered version of a repo"""
305 return self._unfilteredrepo
306 return self._unfilteredrepo
306
307
307 def filtered(self, name):
308 def filtered(self, name):
308 """Return a filtered version of a repository"""
309 """Return a filtered version of a repository"""
309 if name == self.filtername:
310 if name == self.filtername:
310 return self
311 return self
311 return self.unfiltered().filtered(name)
312 return self.unfiltered().filtered(name)
312
313
313 # everything access are forwarded to the proxied repo
314 # everything access are forwarded to the proxied repo
314 def __getattr__(self, attr):
315 def __getattr__(self, attr):
315 return getattr(self._unfilteredrepo, attr)
316 return getattr(self._unfilteredrepo, attr)
316
317
317 def __setattr__(self, attr, value):
318 def __setattr__(self, attr, value):
318 return setattr(self._unfilteredrepo, attr, value)
319 return setattr(self._unfilteredrepo, attr, value)
319
320
320 def __delattr__(self, attr):
321 def __delattr__(self, attr):
321 return delattr(self._unfilteredrepo, attr)
322 return delattr(self._unfilteredrepo, attr)
322
323
323 # The `requirements` attribute is initialized during __init__. But
324 # The `requirements` attribute is initialized during __init__. But
324 # __getattr__ won't be called as it also exists on the class. We need
325 # __getattr__ won't be called as it also exists on the class. We need
325 # explicit forwarding to main repo here
326 # explicit forwarding to main repo here
326 @property
327 @property
327 def requirements(self):
328 def requirements(self):
328 return self._unfilteredrepo.requirements
329 return self._unfilteredrepo.requirements
@@ -1,1448 +1,1450 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev
15 from node import bin, hex, nullid, nullrev
16 from i18n import _
16 from i18n import _
17 import ancestor, mdiff, parsers, error, util, templatefilters
17 import ancestor, mdiff, parsers, error, util, templatefilters
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog header flags
26 # revlog header flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGGENERALDELTA = (1 << 17)
30 REVLOGGENERALDELTA = (1 << 17)
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35
35
36 # revlog index flags
36 # revlog index flags
37 REVIDX_KNOWN_FLAGS = 0
37 REVIDX_KNOWN_FLAGS = 0
38
38
39 # max size of revlog with inline data
39 # max size of revlog with inline data
40 _maxinline = 131072
40 _maxinline = 131072
41 _chunksize = 1048576
41 _chunksize = 1048576
42
42
43 RevlogError = error.RevlogError
43 RevlogError = error.RevlogError
44 LookupError = error.LookupError
44 LookupError = error.LookupError
45
45
46 def getoffset(q):
46 def getoffset(q):
47 return int(q >> 16)
47 return int(q >> 16)
48
48
49 def gettype(q):
49 def gettype(q):
50 return int(q & 0xFFFF)
50 return int(q & 0xFFFF)
51
51
52 def offset_type(offset, type):
52 def offset_type(offset, type):
53 return long(long(offset) << 16 | type)
53 return long(long(offset) << 16 | type)
54
54
55 nullhash = _sha(nullid)
55 nullhash = _sha(nullid)
56
56
57 def hash(text, p1, p2):
57 def hash(text, p1, p2):
58 """generate a hash from the given text and its parent hashes
58 """generate a hash from the given text and its parent hashes
59
59
60 This hash combines both the current file contents and its history
60 This hash combines both the current file contents and its history
61 in a manner that makes it easy to distinguish nodes with the same
61 in a manner that makes it easy to distinguish nodes with the same
62 content in the revision graph.
62 content in the revision graph.
63 """
63 """
64 # As of now, if one of the parent node is null, p2 is null
64 # As of now, if one of the parent node is null, p2 is null
65 if p2 == nullid:
65 if p2 == nullid:
66 # deep copy of a hash is faster than creating one
66 # deep copy of a hash is faster than creating one
67 s = nullhash.copy()
67 s = nullhash.copy()
68 s.update(p1)
68 s.update(p1)
69 else:
69 else:
70 # none of the parent nodes are nullid
70 # none of the parent nodes are nullid
71 l = [p1, p2]
71 l = [p1, p2]
72 l.sort()
72 l.sort()
73 s = _sha(l[0])
73 s = _sha(l[0])
74 s.update(l[1])
74 s.update(l[1])
75 s.update(text)
75 s.update(text)
76 return s.digest()
76 return s.digest()
77
77
78 def decompress(bin):
78 def decompress(bin):
79 """ decompress the given input """
79 """ decompress the given input """
80 if not bin:
80 if not bin:
81 return bin
81 return bin
82 t = bin[0]
82 t = bin[0]
83 if t == '\0':
83 if t == '\0':
84 return bin
84 return bin
85 if t == 'x':
85 if t == 'x':
86 try:
86 try:
87 return _decompress(bin)
87 return _decompress(bin)
88 except zlib.error, e:
88 except zlib.error, e:
89 raise RevlogError(_("revlog decompress error: %s") % str(e))
89 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 if t == 'u':
90 if t == 'u':
91 return bin[1:]
91 return bin[1:]
92 raise RevlogError(_("unknown compression type %r") % t)
92 raise RevlogError(_("unknown compression type %r") % t)
93
93
94 # index v0:
94 # index v0:
95 # 4 bytes: offset
95 # 4 bytes: offset
96 # 4 bytes: compressed length
96 # 4 bytes: compressed length
97 # 4 bytes: base rev
97 # 4 bytes: base rev
98 # 4 bytes: link rev
98 # 4 bytes: link rev
99 # 32 bytes: parent 1 nodeid
99 # 32 bytes: parent 1 nodeid
100 # 32 bytes: parent 2 nodeid
100 # 32 bytes: parent 2 nodeid
101 # 32 bytes: nodeid
101 # 32 bytes: nodeid
102 indexformatv0 = ">4l20s20s20s"
102 indexformatv0 = ">4l20s20s20s"
103 v0shaoffset = 56
103 v0shaoffset = 56
104
104
105 class revlogoldio(object):
105 class revlogoldio(object):
106 def __init__(self):
106 def __init__(self):
107 self.size = struct.calcsize(indexformatv0)
107 self.size = struct.calcsize(indexformatv0)
108
108
109 def parseindex(self, data, inline):
109 def parseindex(self, data, inline):
110 s = self.size
110 s = self.size
111 index = []
111 index = []
112 nodemap = {nullid: nullrev}
112 nodemap = {nullid: nullrev}
113 n = off = 0
113 n = off = 0
114 l = len(data)
114 l = len(data)
115 while off + s <= l:
115 while off + s <= l:
116 cur = data[off:off + s]
116 cur = data[off:off + s]
117 off += s
117 off += s
118 e = _unpack(indexformatv0, cur)
118 e = _unpack(indexformatv0, cur)
119 # transform to revlogv1 format
119 # transform to revlogv1 format
120 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
120 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
121 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
121 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
122 index.append(e2)
122 index.append(e2)
123 nodemap[e[6]] = n
123 nodemap[e[6]] = n
124 n += 1
124 n += 1
125
125
126 # add the magic null revision at -1
126 # add the magic null revision at -1
127 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
127 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
128
128
129 return index, nodemap, None
129 return index, nodemap, None
130
130
131 def packentry(self, entry, node, version, rev):
131 def packentry(self, entry, node, version, rev):
132 if gettype(entry[0]):
132 if gettype(entry[0]):
133 raise RevlogError(_("index entry flags need RevlogNG"))
133 raise RevlogError(_("index entry flags need RevlogNG"))
134 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
134 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
135 node(entry[5]), node(entry[6]), entry[7])
135 node(entry[5]), node(entry[6]), entry[7])
136 return _pack(indexformatv0, *e2)
136 return _pack(indexformatv0, *e2)
137
137
138 # index ng:
138 # index ng:
139 # 6 bytes: offset
139 # 6 bytes: offset
140 # 2 bytes: flags
140 # 2 bytes: flags
141 # 4 bytes: compressed length
141 # 4 bytes: compressed length
142 # 4 bytes: uncompressed length
142 # 4 bytes: uncompressed length
143 # 4 bytes: base rev
143 # 4 bytes: base rev
144 # 4 bytes: link rev
144 # 4 bytes: link rev
145 # 4 bytes: parent 1 rev
145 # 4 bytes: parent 1 rev
146 # 4 bytes: parent 2 rev
146 # 4 bytes: parent 2 rev
147 # 32 bytes: nodeid
147 # 32 bytes: nodeid
148 indexformatng = ">Qiiiiii20s12x"
148 indexformatng = ">Qiiiiii20s12x"
149 ngshaoffset = 32
149 ngshaoffset = 32
150 versionformat = ">I"
150 versionformat = ">I"
151
151
152 class revlogio(object):
152 class revlogio(object):
153 def __init__(self):
153 def __init__(self):
154 self.size = struct.calcsize(indexformatng)
154 self.size = struct.calcsize(indexformatng)
155
155
156 def parseindex(self, data, inline):
156 def parseindex(self, data, inline):
157 # call the C implementation to parse the index data
157 # call the C implementation to parse the index data
158 index, cache = parsers.parse_index2(data, inline)
158 index, cache = parsers.parse_index2(data, inline)
159 return index, getattr(index, 'nodemap', None), cache
159 return index, getattr(index, 'nodemap', None), cache
160
160
161 def packentry(self, entry, node, version, rev):
161 def packentry(self, entry, node, version, rev):
162 p = _pack(indexformatng, *entry)
162 p = _pack(indexformatng, *entry)
163 if rev == 0:
163 if rev == 0:
164 p = _pack(versionformat, version) + p[4:]
164 p = _pack(versionformat, version) + p[4:]
165 return p
165 return p
166
166
167 class revlog(object):
167 class revlog(object):
168 """
168 """
169 the underlying revision storage object
169 the underlying revision storage object
170
170
171 A revlog consists of two parts, an index and the revision data.
171 A revlog consists of two parts, an index and the revision data.
172
172
173 The index is a file with a fixed record size containing
173 The index is a file with a fixed record size containing
174 information on each revision, including its nodeid (hash), the
174 information on each revision, including its nodeid (hash), the
175 nodeids of its parents, the position and offset of its data within
175 nodeids of its parents, the position and offset of its data within
176 the data file, and the revision it's based on. Finally, each entry
176 the data file, and the revision it's based on. Finally, each entry
177 contains a linkrev entry that can serve as a pointer to external
177 contains a linkrev entry that can serve as a pointer to external
178 data.
178 data.
179
179
180 The revision data itself is a linear collection of data chunks.
180 The revision data itself is a linear collection of data chunks.
181 Each chunk represents a revision and is usually represented as a
181 Each chunk represents a revision and is usually represented as a
182 delta against the previous chunk. To bound lookup time, runs of
182 delta against the previous chunk. To bound lookup time, runs of
183 deltas are limited to about 2 times the length of the original
183 deltas are limited to about 2 times the length of the original
184 version data. This makes retrieval of a version proportional to
184 version data. This makes retrieval of a version proportional to
185 its size, or O(1) relative to the number of revisions.
185 its size, or O(1) relative to the number of revisions.
186
186
187 Both pieces of the revlog are written to in an append-only
187 Both pieces of the revlog are written to in an append-only
188 fashion, which means we never need to rewrite a file to insert or
188 fashion, which means we never need to rewrite a file to insert or
189 remove data, and can use some simple techniques to avoid the need
189 remove data, and can use some simple techniques to avoid the need
190 for locking while reading.
190 for locking while reading.
191 """
191 """
192 def __init__(self, opener, indexfile):
192 def __init__(self, opener, indexfile):
193 """
193 """
194 create a revlog object
194 create a revlog object
195
195
196 opener is a function that abstracts the file opening operation
196 opener is a function that abstracts the file opening operation
197 and can be used to implement COW semantics or the like.
197 and can be used to implement COW semantics or the like.
198 """
198 """
199 self.indexfile = indexfile
199 self.indexfile = indexfile
200 self.datafile = indexfile[:-2] + ".d"
200 self.datafile = indexfile[:-2] + ".d"
201 self.opener = opener
201 self.opener = opener
202 self._cache = None
202 self._cache = None
203 self._basecache = None
203 self._basecache = None
204 self._chunkcache = (0, '')
204 self._chunkcache = (0, '')
205 self._chunkcachesize = 65536
205 self._chunkcachesize = 65536
206 self.index = []
206 self.index = []
207 self._pcache = {}
207 self._pcache = {}
208 self._nodecache = {nullid: nullrev}
208 self._nodecache = {nullid: nullrev}
209 self._nodepos = None
209 self._nodepos = None
210
210
211 v = REVLOG_DEFAULT_VERSION
211 v = REVLOG_DEFAULT_VERSION
212 opts = getattr(opener, 'options', None)
212 opts = getattr(opener, 'options', None)
213 if opts is not None:
213 if opts is not None:
214 if 'revlogv1' in opts:
214 if 'revlogv1' in opts:
215 if 'generaldelta' in opts:
215 if 'generaldelta' in opts:
216 v |= REVLOGGENERALDELTA
216 v |= REVLOGGENERALDELTA
217 else:
217 else:
218 v = 0
218 v = 0
219 if 'chunkcachesize' in opts:
219 if 'chunkcachesize' in opts:
220 self._chunkcachesize = opts['chunkcachesize']
220 self._chunkcachesize = opts['chunkcachesize']
221
221
222 if self._chunkcachesize <= 0:
222 if self._chunkcachesize <= 0:
223 raise RevlogError(_('revlog chunk cache size %r is not greater '
223 raise RevlogError(_('revlog chunk cache size %r is not greater '
224 'than 0') % self._chunkcachesize)
224 'than 0') % self._chunkcachesize)
225 elif self._chunkcachesize & (self._chunkcachesize - 1):
225 elif self._chunkcachesize & (self._chunkcachesize - 1):
226 raise RevlogError(_('revlog chunk cache size %r is not a power '
226 raise RevlogError(_('revlog chunk cache size %r is not a power '
227 'of 2') % self._chunkcachesize)
227 'of 2') % self._chunkcachesize)
228
228
229 i = ''
229 i = ''
230 self._initempty = True
230 self._initempty = True
231 try:
231 try:
232 f = self.opener(self.indexfile)
232 f = self.opener(self.indexfile)
233 i = f.read()
233 i = f.read()
234 f.close()
234 f.close()
235 if len(i) > 0:
235 if len(i) > 0:
236 v = struct.unpack(versionformat, i[:4])[0]
236 v = struct.unpack(versionformat, i[:4])[0]
237 self._initempty = False
237 self._initempty = False
238 except IOError, inst:
238 except IOError, inst:
239 if inst.errno != errno.ENOENT:
239 if inst.errno != errno.ENOENT:
240 raise
240 raise
241
241
242 self.version = v
242 self.version = v
243 self._inline = v & REVLOGNGINLINEDATA
243 self._inline = v & REVLOGNGINLINEDATA
244 self._generaldelta = v & REVLOGGENERALDELTA
244 self._generaldelta = v & REVLOGGENERALDELTA
245 flags = v & ~0xFFFF
245 flags = v & ~0xFFFF
246 fmt = v & 0xFFFF
246 fmt = v & 0xFFFF
247 if fmt == REVLOGV0 and flags:
247 if fmt == REVLOGV0 and flags:
248 raise RevlogError(_("index %s unknown flags %#04x for format v0")
248 raise RevlogError(_("index %s unknown flags %#04x for format v0")
249 % (self.indexfile, flags >> 16))
249 % (self.indexfile, flags >> 16))
250 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
250 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
251 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
251 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
252 % (self.indexfile, flags >> 16))
252 % (self.indexfile, flags >> 16))
253 elif fmt > REVLOGNG:
253 elif fmt > REVLOGNG:
254 raise RevlogError(_("index %s unknown format %d")
254 raise RevlogError(_("index %s unknown format %d")
255 % (self.indexfile, fmt))
255 % (self.indexfile, fmt))
256
256
257 self._io = revlogio()
257 self._io = revlogio()
258 if self.version == REVLOGV0:
258 if self.version == REVLOGV0:
259 self._io = revlogoldio()
259 self._io = revlogoldio()
260 try:
260 try:
261 d = self._io.parseindex(i, self._inline)
261 d = self._io.parseindex(i, self._inline)
262 except (ValueError, IndexError):
262 except (ValueError, IndexError):
263 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
263 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
264 self.index, nodemap, self._chunkcache = d
264 self.index, nodemap, self._chunkcache = d
265 if nodemap is not None:
265 if nodemap is not None:
266 self.nodemap = self._nodecache = nodemap
266 self.nodemap = self._nodecache = nodemap
267 if not self._chunkcache:
267 if not self._chunkcache:
268 self._chunkclear()
268 self._chunkclear()
269
269
270 def tip(self):
270 def tip(self):
271 return self.node(len(self.index) - 2)
271 return self.node(len(self.index) - 2)
272 def __len__(self):
272 def __len__(self):
273 return len(self.index) - 1
273 return len(self.index) - 1
274 def __iter__(self):
274 def __iter__(self):
275 return iter(xrange(len(self)))
275 return iter(xrange(len(self)))
276 def revs(self, start=0, stop=None):
276 def revs(self, start=0, stop=None):
277 """iterate over all rev in this revlog (from start to stop)"""
277 """iterate over all rev in this revlog (from start to stop)"""
278 step = 1
278 step = 1
279 if stop is not None:
279 if stop is not None:
280 if start > stop:
280 if start > stop:
281 step = -1
281 step = -1
282 stop += step
282 stop += step
283 else:
283 else:
284 stop = len(self)
284 stop = len(self)
285 return xrange(start, stop, step)
285 return xrange(start, stop, step)
286
286
287 @util.propertycache
287 @util.propertycache
288 def nodemap(self):
288 def nodemap(self):
289 self.rev(self.node(0))
289 self.rev(self.node(0))
290 return self._nodecache
290 return self._nodecache
291
291
292 def hasnode(self, node):
292 def hasnode(self, node):
293 try:
293 try:
294 self.rev(node)
294 self.rev(node)
295 return True
295 return True
296 except KeyError:
296 except KeyError:
297 return False
297 return False
298
298
299 def clearcaches(self):
299 def clearcaches(self):
300 try:
300 try:
301 self._nodecache.clearcaches()
301 self._nodecache.clearcaches()
302 except AttributeError:
302 except AttributeError:
303 self._nodecache = {nullid: nullrev}
303 self._nodecache = {nullid: nullrev}
304 self._nodepos = None
304 self._nodepos = None
305
305
306 def rev(self, node):
306 def rev(self, node):
307 try:
307 try:
308 return self._nodecache[node]
308 return self._nodecache[node]
309 except TypeError:
310 raise
309 except RevlogError:
311 except RevlogError:
310 # parsers.c radix tree lookup failed
312 # parsers.c radix tree lookup failed
311 raise LookupError(node, self.indexfile, _('no node'))
313 raise LookupError(node, self.indexfile, _('no node'))
312 except KeyError:
314 except KeyError:
313 # pure python cache lookup failed
315 # pure python cache lookup failed
314 n = self._nodecache
316 n = self._nodecache
315 i = self.index
317 i = self.index
316 p = self._nodepos
318 p = self._nodepos
317 if p is None:
319 if p is None:
318 p = len(i) - 2
320 p = len(i) - 2
319 for r in xrange(p, -1, -1):
321 for r in xrange(p, -1, -1):
320 v = i[r][7]
322 v = i[r][7]
321 n[v] = r
323 n[v] = r
322 if v == node:
324 if v == node:
323 self._nodepos = r - 1
325 self._nodepos = r - 1
324 return r
326 return r
325 raise LookupError(node, self.indexfile, _('no node'))
327 raise LookupError(node, self.indexfile, _('no node'))
326
328
327 def node(self, rev):
329 def node(self, rev):
328 return self.index[rev][7]
330 return self.index[rev][7]
329 def linkrev(self, rev):
331 def linkrev(self, rev):
330 return self.index[rev][4]
332 return self.index[rev][4]
331 def parents(self, node):
333 def parents(self, node):
332 i = self.index
334 i = self.index
333 d = i[self.rev(node)]
335 d = i[self.rev(node)]
334 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
336 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
335 def parentrevs(self, rev):
337 def parentrevs(self, rev):
336 return self.index[rev][5:7]
338 return self.index[rev][5:7]
337 def start(self, rev):
339 def start(self, rev):
338 return int(self.index[rev][0] >> 16)
340 return int(self.index[rev][0] >> 16)
339 def end(self, rev):
341 def end(self, rev):
340 return self.start(rev) + self.length(rev)
342 return self.start(rev) + self.length(rev)
341 def length(self, rev):
343 def length(self, rev):
342 return self.index[rev][1]
344 return self.index[rev][1]
343 def chainbase(self, rev):
345 def chainbase(self, rev):
344 index = self.index
346 index = self.index
345 base = index[rev][3]
347 base = index[rev][3]
346 while base != rev:
348 while base != rev:
347 rev = base
349 rev = base
348 base = index[rev][3]
350 base = index[rev][3]
349 return base
351 return base
350 def flags(self, rev):
352 def flags(self, rev):
351 return self.index[rev][0] & 0xFFFF
353 return self.index[rev][0] & 0xFFFF
352 def rawsize(self, rev):
354 def rawsize(self, rev):
353 """return the length of the uncompressed text for a given revision"""
355 """return the length of the uncompressed text for a given revision"""
354 l = self.index[rev][2]
356 l = self.index[rev][2]
355 if l >= 0:
357 if l >= 0:
356 return l
358 return l
357
359
358 t = self.revision(self.node(rev))
360 t = self.revision(self.node(rev))
359 return len(t)
361 return len(t)
360 size = rawsize
362 size = rawsize
361
363
362 def ancestors(self, revs, stoprev=0, inclusive=False):
364 def ancestors(self, revs, stoprev=0, inclusive=False):
363 """Generate the ancestors of 'revs' in reverse topological order.
365 """Generate the ancestors of 'revs' in reverse topological order.
364 Does not generate revs lower than stoprev.
366 Does not generate revs lower than stoprev.
365
367
366 See the documentation for ancestor.lazyancestors for more details."""
368 See the documentation for ancestor.lazyancestors for more details."""
367
369
368 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
370 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
369 inclusive=inclusive)
371 inclusive=inclusive)
370
372
371 def descendants(self, revs):
373 def descendants(self, revs):
372 """Generate the descendants of 'revs' in revision order.
374 """Generate the descendants of 'revs' in revision order.
373
375
374 Yield a sequence of revision numbers starting with a child of
376 Yield a sequence of revision numbers starting with a child of
375 some rev in revs, i.e., each revision is *not* considered a
377 some rev in revs, i.e., each revision is *not* considered a
376 descendant of itself. Results are ordered by revision number (a
378 descendant of itself. Results are ordered by revision number (a
377 topological sort)."""
379 topological sort)."""
378 first = min(revs)
380 first = min(revs)
379 if first == nullrev:
381 if first == nullrev:
380 for i in self:
382 for i in self:
381 yield i
383 yield i
382 return
384 return
383
385
384 seen = set(revs)
386 seen = set(revs)
385 for i in self.revs(start=first + 1):
387 for i in self.revs(start=first + 1):
386 for x in self.parentrevs(i):
388 for x in self.parentrevs(i):
387 if x != nullrev and x in seen:
389 if x != nullrev and x in seen:
388 seen.add(i)
390 seen.add(i)
389 yield i
391 yield i
390 break
392 break
391
393
392 def findcommonmissing(self, common=None, heads=None):
394 def findcommonmissing(self, common=None, heads=None):
393 """Return a tuple of the ancestors of common and the ancestors of heads
395 """Return a tuple of the ancestors of common and the ancestors of heads
394 that are not ancestors of common. In revset terminology, we return the
396 that are not ancestors of common. In revset terminology, we return the
395 tuple:
397 tuple:
396
398
397 ::common, (::heads) - (::common)
399 ::common, (::heads) - (::common)
398
400
399 The list is sorted by revision number, meaning it is
401 The list is sorted by revision number, meaning it is
400 topologically sorted.
402 topologically sorted.
401
403
402 'heads' and 'common' are both lists of node IDs. If heads is
404 'heads' and 'common' are both lists of node IDs. If heads is
403 not supplied, uses all of the revlog's heads. If common is not
405 not supplied, uses all of the revlog's heads. If common is not
404 supplied, uses nullid."""
406 supplied, uses nullid."""
405 if common is None:
407 if common is None:
406 common = [nullid]
408 common = [nullid]
407 if heads is None:
409 if heads is None:
408 heads = self.heads()
410 heads = self.heads()
409
411
410 common = [self.rev(n) for n in common]
412 common = [self.rev(n) for n in common]
411 heads = [self.rev(n) for n in heads]
413 heads = [self.rev(n) for n in heads]
412
414
413 # we want the ancestors, but inclusive
415 # we want the ancestors, but inclusive
414 class lazyset(object):
416 class lazyset(object):
415 def __init__(self, lazyvalues):
417 def __init__(self, lazyvalues):
416 self.addedvalues = set()
418 self.addedvalues = set()
417 self.lazyvalues = lazyvalues
419 self.lazyvalues = lazyvalues
418
420
419 def __contains__(self, value):
421 def __contains__(self, value):
420 return value in self.addedvalues or value in self.lazyvalues
422 return value in self.addedvalues or value in self.lazyvalues
421
423
422 def __iter__(self):
424 def __iter__(self):
423 added = self.addedvalues
425 added = self.addedvalues
424 for r in added:
426 for r in added:
425 yield r
427 yield r
426 for r in self.lazyvalues:
428 for r in self.lazyvalues:
427 if not r in added:
429 if not r in added:
428 yield r
430 yield r
429
431
430 def add(self, value):
432 def add(self, value):
431 self.addedvalues.add(value)
433 self.addedvalues.add(value)
432
434
433 def update(self, values):
435 def update(self, values):
434 self.addedvalues.update(values)
436 self.addedvalues.update(values)
435
437
436 has = lazyset(self.ancestors(common))
438 has = lazyset(self.ancestors(common))
437 has.add(nullrev)
439 has.add(nullrev)
438 has.update(common)
440 has.update(common)
439
441
440 # take all ancestors from heads that aren't in has
442 # take all ancestors from heads that aren't in has
441 missing = set()
443 missing = set()
442 visit = util.deque(r for r in heads if r not in has)
444 visit = util.deque(r for r in heads if r not in has)
443 while visit:
445 while visit:
444 r = visit.popleft()
446 r = visit.popleft()
445 if r in missing:
447 if r in missing:
446 continue
448 continue
447 else:
449 else:
448 missing.add(r)
450 missing.add(r)
449 for p in self.parentrevs(r):
451 for p in self.parentrevs(r):
450 if p not in has:
452 if p not in has:
451 visit.append(p)
453 visit.append(p)
452 missing = list(missing)
454 missing = list(missing)
453 missing.sort()
455 missing.sort()
454 return has, [self.node(r) for r in missing]
456 return has, [self.node(r) for r in missing]
455
457
456 def findmissingrevs(self, common=None, heads=None):
458 def findmissingrevs(self, common=None, heads=None):
457 """Return the revision numbers of the ancestors of heads that
459 """Return the revision numbers of the ancestors of heads that
458 are not ancestors of common.
460 are not ancestors of common.
459
461
460 More specifically, return a list of revision numbers corresponding to
462 More specifically, return a list of revision numbers corresponding to
461 nodes N such that every N satisfies the following constraints:
463 nodes N such that every N satisfies the following constraints:
462
464
463 1. N is an ancestor of some node in 'heads'
465 1. N is an ancestor of some node in 'heads'
464 2. N is not an ancestor of any node in 'common'
466 2. N is not an ancestor of any node in 'common'
465
467
466 The list is sorted by revision number, meaning it is
468 The list is sorted by revision number, meaning it is
467 topologically sorted.
469 topologically sorted.
468
470
469 'heads' and 'common' are both lists of revision numbers. If heads is
471 'heads' and 'common' are both lists of revision numbers. If heads is
470 not supplied, uses all of the revlog's heads. If common is not
472 not supplied, uses all of the revlog's heads. If common is not
471 supplied, uses nullid."""
473 supplied, uses nullid."""
472 if common is None:
474 if common is None:
473 common = [nullrev]
475 common = [nullrev]
474 if heads is None:
476 if heads is None:
475 heads = self.headrevs()
477 heads = self.headrevs()
476
478
477 return ancestor.missingancestors(heads, common, self.parentrevs)
479 return ancestor.missingancestors(heads, common, self.parentrevs)
478
480
479 def findmissing(self, common=None, heads=None):
481 def findmissing(self, common=None, heads=None):
480 """Return the ancestors of heads that are not ancestors of common.
482 """Return the ancestors of heads that are not ancestors of common.
481
483
482 More specifically, return a list of nodes N such that every N
484 More specifically, return a list of nodes N such that every N
483 satisfies the following constraints:
485 satisfies the following constraints:
484
486
485 1. N is an ancestor of some node in 'heads'
487 1. N is an ancestor of some node in 'heads'
486 2. N is not an ancestor of any node in 'common'
488 2. N is not an ancestor of any node in 'common'
487
489
488 The list is sorted by revision number, meaning it is
490 The list is sorted by revision number, meaning it is
489 topologically sorted.
491 topologically sorted.
490
492
491 'heads' and 'common' are both lists of node IDs. If heads is
493 'heads' and 'common' are both lists of node IDs. If heads is
492 not supplied, uses all of the revlog's heads. If common is not
494 not supplied, uses all of the revlog's heads. If common is not
493 supplied, uses nullid."""
495 supplied, uses nullid."""
494 if common is None:
496 if common is None:
495 common = [nullid]
497 common = [nullid]
496 if heads is None:
498 if heads is None:
497 heads = self.heads()
499 heads = self.heads()
498
500
499 common = [self.rev(n) for n in common]
501 common = [self.rev(n) for n in common]
500 heads = [self.rev(n) for n in heads]
502 heads = [self.rev(n) for n in heads]
501
503
502 return [self.node(r) for r in
504 return [self.node(r) for r in
503 ancestor.missingancestors(heads, common, self.parentrevs)]
505 ancestor.missingancestors(heads, common, self.parentrevs)]
504
506
505 def nodesbetween(self, roots=None, heads=None):
507 def nodesbetween(self, roots=None, heads=None):
506 """Return a topological path from 'roots' to 'heads'.
508 """Return a topological path from 'roots' to 'heads'.
507
509
508 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
510 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
509 topologically sorted list of all nodes N that satisfy both of
511 topologically sorted list of all nodes N that satisfy both of
510 these constraints:
512 these constraints:
511
513
512 1. N is a descendant of some node in 'roots'
514 1. N is a descendant of some node in 'roots'
513 2. N is an ancestor of some node in 'heads'
515 2. N is an ancestor of some node in 'heads'
514
516
515 Every node is considered to be both a descendant and an ancestor
517 Every node is considered to be both a descendant and an ancestor
516 of itself, so every reachable node in 'roots' and 'heads' will be
518 of itself, so every reachable node in 'roots' and 'heads' will be
517 included in 'nodes'.
519 included in 'nodes'.
518
520
519 'outroots' is the list of reachable nodes in 'roots', i.e., the
521 'outroots' is the list of reachable nodes in 'roots', i.e., the
520 subset of 'roots' that is returned in 'nodes'. Likewise,
522 subset of 'roots' that is returned in 'nodes'. Likewise,
521 'outheads' is the subset of 'heads' that is also in 'nodes'.
523 'outheads' is the subset of 'heads' that is also in 'nodes'.
522
524
523 'roots' and 'heads' are both lists of node IDs. If 'roots' is
525 'roots' and 'heads' are both lists of node IDs. If 'roots' is
524 unspecified, uses nullid as the only root. If 'heads' is
526 unspecified, uses nullid as the only root. If 'heads' is
525 unspecified, uses list of all of the revlog's heads."""
527 unspecified, uses list of all of the revlog's heads."""
526 nonodes = ([], [], [])
528 nonodes = ([], [], [])
527 if roots is not None:
529 if roots is not None:
528 roots = list(roots)
530 roots = list(roots)
529 if not roots:
531 if not roots:
530 return nonodes
532 return nonodes
531 lowestrev = min([self.rev(n) for n in roots])
533 lowestrev = min([self.rev(n) for n in roots])
532 else:
534 else:
533 roots = [nullid] # Everybody's a descendant of nullid
535 roots = [nullid] # Everybody's a descendant of nullid
534 lowestrev = nullrev
536 lowestrev = nullrev
535 if (lowestrev == nullrev) and (heads is None):
537 if (lowestrev == nullrev) and (heads is None):
536 # We want _all_ the nodes!
538 # We want _all_ the nodes!
537 return ([self.node(r) for r in self], [nullid], list(self.heads()))
539 return ([self.node(r) for r in self], [nullid], list(self.heads()))
538 if heads is None:
540 if heads is None:
539 # All nodes are ancestors, so the latest ancestor is the last
541 # All nodes are ancestors, so the latest ancestor is the last
540 # node.
542 # node.
541 highestrev = len(self) - 1
543 highestrev = len(self) - 1
542 # Set ancestors to None to signal that every node is an ancestor.
544 # Set ancestors to None to signal that every node is an ancestor.
543 ancestors = None
545 ancestors = None
544 # Set heads to an empty dictionary for later discovery of heads
546 # Set heads to an empty dictionary for later discovery of heads
545 heads = {}
547 heads = {}
546 else:
548 else:
547 heads = list(heads)
549 heads = list(heads)
548 if not heads:
550 if not heads:
549 return nonodes
551 return nonodes
550 ancestors = set()
552 ancestors = set()
551 # Turn heads into a dictionary so we can remove 'fake' heads.
553 # Turn heads into a dictionary so we can remove 'fake' heads.
552 # Also, later we will be using it to filter out the heads we can't
554 # Also, later we will be using it to filter out the heads we can't
553 # find from roots.
555 # find from roots.
554 heads = dict.fromkeys(heads, False)
556 heads = dict.fromkeys(heads, False)
555 # Start at the top and keep marking parents until we're done.
557 # Start at the top and keep marking parents until we're done.
556 nodestotag = set(heads)
558 nodestotag = set(heads)
557 # Remember where the top was so we can use it as a limit later.
559 # Remember where the top was so we can use it as a limit later.
558 highestrev = max([self.rev(n) for n in nodestotag])
560 highestrev = max([self.rev(n) for n in nodestotag])
559 while nodestotag:
561 while nodestotag:
560 # grab a node to tag
562 # grab a node to tag
561 n = nodestotag.pop()
563 n = nodestotag.pop()
562 # Never tag nullid
564 # Never tag nullid
563 if n == nullid:
565 if n == nullid:
564 continue
566 continue
565 # A node's revision number represents its place in a
567 # A node's revision number represents its place in a
566 # topologically sorted list of nodes.
568 # topologically sorted list of nodes.
567 r = self.rev(n)
569 r = self.rev(n)
568 if r >= lowestrev:
570 if r >= lowestrev:
569 if n not in ancestors:
571 if n not in ancestors:
570 # If we are possibly a descendant of one of the roots
572 # If we are possibly a descendant of one of the roots
571 # and we haven't already been marked as an ancestor
573 # and we haven't already been marked as an ancestor
572 ancestors.add(n) # Mark as ancestor
574 ancestors.add(n) # Mark as ancestor
573 # Add non-nullid parents to list of nodes to tag.
575 # Add non-nullid parents to list of nodes to tag.
574 nodestotag.update([p for p in self.parents(n) if
576 nodestotag.update([p for p in self.parents(n) if
575 p != nullid])
577 p != nullid])
576 elif n in heads: # We've seen it before, is it a fake head?
578 elif n in heads: # We've seen it before, is it a fake head?
577 # So it is, real heads should not be the ancestors of
579 # So it is, real heads should not be the ancestors of
578 # any other heads.
580 # any other heads.
579 heads.pop(n)
581 heads.pop(n)
580 if not ancestors:
582 if not ancestors:
581 return nonodes
583 return nonodes
582 # Now that we have our set of ancestors, we want to remove any
584 # Now that we have our set of ancestors, we want to remove any
583 # roots that are not ancestors.
585 # roots that are not ancestors.
584
586
585 # If one of the roots was nullid, everything is included anyway.
587 # If one of the roots was nullid, everything is included anyway.
586 if lowestrev > nullrev:
588 if lowestrev > nullrev:
587 # But, since we weren't, let's recompute the lowest rev to not
589 # But, since we weren't, let's recompute the lowest rev to not
588 # include roots that aren't ancestors.
590 # include roots that aren't ancestors.
589
591
590 # Filter out roots that aren't ancestors of heads
592 # Filter out roots that aren't ancestors of heads
591 roots = [n for n in roots if n in ancestors]
593 roots = [n for n in roots if n in ancestors]
592 # Recompute the lowest revision
594 # Recompute the lowest revision
593 if roots:
595 if roots:
594 lowestrev = min([self.rev(n) for n in roots])
596 lowestrev = min([self.rev(n) for n in roots])
595 else:
597 else:
596 # No more roots? Return empty list
598 # No more roots? Return empty list
597 return nonodes
599 return nonodes
598 else:
600 else:
599 # We are descending from nullid, and don't need to care about
601 # We are descending from nullid, and don't need to care about
600 # any other roots.
602 # any other roots.
601 lowestrev = nullrev
603 lowestrev = nullrev
602 roots = [nullid]
604 roots = [nullid]
603 # Transform our roots list into a set.
605 # Transform our roots list into a set.
604 descendants = set(roots)
606 descendants = set(roots)
605 # Also, keep the original roots so we can filter out roots that aren't
607 # Also, keep the original roots so we can filter out roots that aren't
606 # 'real' roots (i.e. are descended from other roots).
608 # 'real' roots (i.e. are descended from other roots).
607 roots = descendants.copy()
609 roots = descendants.copy()
608 # Our topologically sorted list of output nodes.
610 # Our topologically sorted list of output nodes.
609 orderedout = []
611 orderedout = []
610 # Don't start at nullid since we don't want nullid in our output list,
612 # Don't start at nullid since we don't want nullid in our output list,
611 # and if nullid shows up in descendants, empty parents will look like
613 # and if nullid shows up in descendants, empty parents will look like
612 # they're descendants.
614 # they're descendants.
613 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
615 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
614 n = self.node(r)
616 n = self.node(r)
615 isdescendant = False
617 isdescendant = False
616 if lowestrev == nullrev: # Everybody is a descendant of nullid
618 if lowestrev == nullrev: # Everybody is a descendant of nullid
617 isdescendant = True
619 isdescendant = True
618 elif n in descendants:
620 elif n in descendants:
619 # n is already a descendant
621 # n is already a descendant
620 isdescendant = True
622 isdescendant = True
621 # This check only needs to be done here because all the roots
623 # This check only needs to be done here because all the roots
622 # will start being marked is descendants before the loop.
624 # will start being marked is descendants before the loop.
623 if n in roots:
625 if n in roots:
624 # If n was a root, check if it's a 'real' root.
626 # If n was a root, check if it's a 'real' root.
625 p = tuple(self.parents(n))
627 p = tuple(self.parents(n))
626 # If any of its parents are descendants, it's not a root.
628 # If any of its parents are descendants, it's not a root.
627 if (p[0] in descendants) or (p[1] in descendants):
629 if (p[0] in descendants) or (p[1] in descendants):
628 roots.remove(n)
630 roots.remove(n)
629 else:
631 else:
630 p = tuple(self.parents(n))
632 p = tuple(self.parents(n))
631 # A node is a descendant if either of its parents are
633 # A node is a descendant if either of its parents are
632 # descendants. (We seeded the dependents list with the roots
634 # descendants. (We seeded the dependents list with the roots
633 # up there, remember?)
635 # up there, remember?)
634 if (p[0] in descendants) or (p[1] in descendants):
636 if (p[0] in descendants) or (p[1] in descendants):
635 descendants.add(n)
637 descendants.add(n)
636 isdescendant = True
638 isdescendant = True
637 if isdescendant and ((ancestors is None) or (n in ancestors)):
639 if isdescendant and ((ancestors is None) or (n in ancestors)):
638 # Only include nodes that are both descendants and ancestors.
640 # Only include nodes that are both descendants and ancestors.
639 orderedout.append(n)
641 orderedout.append(n)
640 if (ancestors is not None) and (n in heads):
642 if (ancestors is not None) and (n in heads):
641 # We're trying to figure out which heads are reachable
643 # We're trying to figure out which heads are reachable
642 # from roots.
644 # from roots.
643 # Mark this head as having been reached
645 # Mark this head as having been reached
644 heads[n] = True
646 heads[n] = True
645 elif ancestors is None:
647 elif ancestors is None:
646 # Otherwise, we're trying to discover the heads.
648 # Otherwise, we're trying to discover the heads.
647 # Assume this is a head because if it isn't, the next step
649 # Assume this is a head because if it isn't, the next step
648 # will eventually remove it.
650 # will eventually remove it.
649 heads[n] = True
651 heads[n] = True
650 # But, obviously its parents aren't.
652 # But, obviously its parents aren't.
651 for p in self.parents(n):
653 for p in self.parents(n):
652 heads.pop(p, None)
654 heads.pop(p, None)
653 heads = [n for n, flag in heads.iteritems() if flag]
655 heads = [n for n, flag in heads.iteritems() if flag]
654 roots = list(roots)
656 roots = list(roots)
655 assert orderedout
657 assert orderedout
656 assert roots
658 assert roots
657 assert heads
659 assert heads
658 return (orderedout, roots, heads)
660 return (orderedout, roots, heads)
659
661
660 def headrevs(self):
662 def headrevs(self):
661 try:
663 try:
662 return self.index.headrevs()
664 return self.index.headrevs()
663 except AttributeError:
665 except AttributeError:
664 return self._headrevs()
666 return self._headrevs()
665
667
666 def _headrevs(self):
668 def _headrevs(self):
667 count = len(self)
669 count = len(self)
668 if not count:
670 if not count:
669 return [nullrev]
671 return [nullrev]
670 # we won't iter over filtered rev so nobody is a head at start
672 # we won't iter over filtered rev so nobody is a head at start
671 ishead = [0] * (count + 1)
673 ishead = [0] * (count + 1)
672 index = self.index
674 index = self.index
673 for r in self:
675 for r in self:
674 ishead[r] = 1 # I may be an head
676 ishead[r] = 1 # I may be an head
675 e = index[r]
677 e = index[r]
676 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
678 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
677 return [r for r, val in enumerate(ishead) if val]
679 return [r for r, val in enumerate(ishead) if val]
678
680
679 def heads(self, start=None, stop=None):
681 def heads(self, start=None, stop=None):
680 """return the list of all nodes that have no children
682 """return the list of all nodes that have no children
681
683
682 if start is specified, only heads that are descendants of
684 if start is specified, only heads that are descendants of
683 start will be returned
685 start will be returned
684 if stop is specified, it will consider all the revs from stop
686 if stop is specified, it will consider all the revs from stop
685 as if they had no children
687 as if they had no children
686 """
688 """
687 if start is None and stop is None:
689 if start is None and stop is None:
688 if not len(self):
690 if not len(self):
689 return [nullid]
691 return [nullid]
690 return [self.node(r) for r in self.headrevs()]
692 return [self.node(r) for r in self.headrevs()]
691
693
692 if start is None:
694 if start is None:
693 start = nullid
695 start = nullid
694 if stop is None:
696 if stop is None:
695 stop = []
697 stop = []
696 stoprevs = set([self.rev(n) for n in stop])
698 stoprevs = set([self.rev(n) for n in stop])
697 startrev = self.rev(start)
699 startrev = self.rev(start)
698 reachable = set((startrev,))
700 reachable = set((startrev,))
699 heads = set((startrev,))
701 heads = set((startrev,))
700
702
701 parentrevs = self.parentrevs
703 parentrevs = self.parentrevs
702 for r in self.revs(start=startrev + 1):
704 for r in self.revs(start=startrev + 1):
703 for p in parentrevs(r):
705 for p in parentrevs(r):
704 if p in reachable:
706 if p in reachable:
705 if r not in stoprevs:
707 if r not in stoprevs:
706 reachable.add(r)
708 reachable.add(r)
707 heads.add(r)
709 heads.add(r)
708 if p in heads and p not in stoprevs:
710 if p in heads and p not in stoprevs:
709 heads.remove(p)
711 heads.remove(p)
710
712
711 return [self.node(r) for r in heads]
713 return [self.node(r) for r in heads]
712
714
713 def children(self, node):
715 def children(self, node):
714 """find the children of a given node"""
716 """find the children of a given node"""
715 c = []
717 c = []
716 p = self.rev(node)
718 p = self.rev(node)
717 for r in self.revs(start=p + 1):
719 for r in self.revs(start=p + 1):
718 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
720 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
719 if prevs:
721 if prevs:
720 for pr in prevs:
722 for pr in prevs:
721 if pr == p:
723 if pr == p:
722 c.append(self.node(r))
724 c.append(self.node(r))
723 elif p == nullrev:
725 elif p == nullrev:
724 c.append(self.node(r))
726 c.append(self.node(r))
725 return c
727 return c
726
728
727 def descendant(self, start, end):
729 def descendant(self, start, end):
728 if start == nullrev:
730 if start == nullrev:
729 return True
731 return True
730 for i in self.descendants([start]):
732 for i in self.descendants([start]):
731 if i == end:
733 if i == end:
732 return True
734 return True
733 elif i > end:
735 elif i > end:
734 break
736 break
735 return False
737 return False
736
738
737 def commonancestorsheads(self, a, b):
739 def commonancestorsheads(self, a, b):
738 """calculate all the heads of the common ancestors of nodes a and b"""
740 """calculate all the heads of the common ancestors of nodes a and b"""
739 a, b = self.rev(a), self.rev(b)
741 a, b = self.rev(a), self.rev(b)
740 try:
742 try:
741 ancs = self.index.commonancestorsheads(a, b)
743 ancs = self.index.commonancestorsheads(a, b)
742 except (AttributeError, OverflowError): # C implementation failed
744 except (AttributeError, OverflowError): # C implementation failed
743 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
745 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
744 return map(self.node, ancs)
746 return map(self.node, ancs)
745
747
746 def ancestor(self, a, b):
748 def ancestor(self, a, b):
747 """calculate the least common ancestor of nodes a and b"""
749 """calculate the least common ancestor of nodes a and b"""
748
750
749 a, b = self.rev(a), self.rev(b)
751 a, b = self.rev(a), self.rev(b)
750 try:
752 try:
751 ancs = self.index.ancestors(a, b)
753 ancs = self.index.ancestors(a, b)
752 except (AttributeError, OverflowError):
754 except (AttributeError, OverflowError):
753 ancs = ancestor.ancestors(self.parentrevs, a, b)
755 ancs = ancestor.ancestors(self.parentrevs, a, b)
754 if ancs:
756 if ancs:
755 # choose a consistent winner when there's a tie
757 # choose a consistent winner when there's a tie
756 return min(map(self.node, ancs))
758 return min(map(self.node, ancs))
757 return nullid
759 return nullid
758
760
759 def _match(self, id):
761 def _match(self, id):
760 if isinstance(id, int):
762 if isinstance(id, int):
761 # rev
763 # rev
762 return self.node(id)
764 return self.node(id)
763 if len(id) == 20:
765 if len(id) == 20:
764 # possibly a binary node
766 # possibly a binary node
765 # odds of a binary node being all hex in ASCII are 1 in 10**25
767 # odds of a binary node being all hex in ASCII are 1 in 10**25
766 try:
768 try:
767 node = id
769 node = id
768 self.rev(node) # quick search the index
770 self.rev(node) # quick search the index
769 return node
771 return node
770 except LookupError:
772 except LookupError:
771 pass # may be partial hex id
773 pass # may be partial hex id
772 try:
774 try:
773 # str(rev)
775 # str(rev)
774 rev = int(id)
776 rev = int(id)
775 if str(rev) != id:
777 if str(rev) != id:
776 raise ValueError
778 raise ValueError
777 if rev < 0:
779 if rev < 0:
778 rev = len(self) + rev
780 rev = len(self) + rev
779 if rev < 0 or rev >= len(self):
781 if rev < 0 or rev >= len(self):
780 raise ValueError
782 raise ValueError
781 return self.node(rev)
783 return self.node(rev)
782 except (ValueError, OverflowError):
784 except (ValueError, OverflowError):
783 pass
785 pass
784 if len(id) == 40:
786 if len(id) == 40:
785 try:
787 try:
786 # a full hex nodeid?
788 # a full hex nodeid?
787 node = bin(id)
789 node = bin(id)
788 self.rev(node)
790 self.rev(node)
789 return node
791 return node
790 except (TypeError, LookupError):
792 except (TypeError, LookupError):
791 pass
793 pass
792
794
793 def _partialmatch(self, id):
795 def _partialmatch(self, id):
794 try:
796 try:
795 n = self.index.partialmatch(id)
797 n = self.index.partialmatch(id)
796 if n and self.hasnode(n):
798 if n and self.hasnode(n):
797 return n
799 return n
798 return None
800 return None
799 except RevlogError:
801 except RevlogError:
800 # parsers.c radix tree lookup gave multiple matches
802 # parsers.c radix tree lookup gave multiple matches
801 # fall through to slow path that filters hidden revisions
803 # fall through to slow path that filters hidden revisions
802 pass
804 pass
803 except (AttributeError, ValueError):
805 except (AttributeError, ValueError):
804 # we are pure python, or key was too short to search radix tree
806 # we are pure python, or key was too short to search radix tree
805 pass
807 pass
806
808
807 if id in self._pcache:
809 if id in self._pcache:
808 return self._pcache[id]
810 return self._pcache[id]
809
811
810 if len(id) < 40:
812 if len(id) < 40:
811 try:
813 try:
812 # hex(node)[:...]
814 # hex(node)[:...]
813 l = len(id) // 2 # grab an even number of digits
815 l = len(id) // 2 # grab an even number of digits
814 prefix = bin(id[:l * 2])
816 prefix = bin(id[:l * 2])
815 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
817 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
816 nl = [n for n in nl if hex(n).startswith(id) and
818 nl = [n for n in nl if hex(n).startswith(id) and
817 self.hasnode(n)]
819 self.hasnode(n)]
818 if len(nl) > 0:
820 if len(nl) > 0:
819 if len(nl) == 1:
821 if len(nl) == 1:
820 self._pcache[id] = nl[0]
822 self._pcache[id] = nl[0]
821 return nl[0]
823 return nl[0]
822 raise LookupError(id, self.indexfile,
824 raise LookupError(id, self.indexfile,
823 _('ambiguous identifier'))
825 _('ambiguous identifier'))
824 return None
826 return None
825 except TypeError:
827 except TypeError:
826 pass
828 pass
827
829
828 def lookup(self, id):
830 def lookup(self, id):
829 """locate a node based on:
831 """locate a node based on:
830 - revision number or str(revision number)
832 - revision number or str(revision number)
831 - nodeid or subset of hex nodeid
833 - nodeid or subset of hex nodeid
832 """
834 """
833 n = self._match(id)
835 n = self._match(id)
834 if n is not None:
836 if n is not None:
835 return n
837 return n
836 n = self._partialmatch(id)
838 n = self._partialmatch(id)
837 if n:
839 if n:
838 return n
840 return n
839
841
840 raise LookupError(id, self.indexfile, _('no match found'))
842 raise LookupError(id, self.indexfile, _('no match found'))
841
843
842 def cmp(self, node, text):
844 def cmp(self, node, text):
843 """compare text with a given file revision
845 """compare text with a given file revision
844
846
845 returns True if text is different than what is stored.
847 returns True if text is different than what is stored.
846 """
848 """
847 p1, p2 = self.parents(node)
849 p1, p2 = self.parents(node)
848 return hash(text, p1, p2) != node
850 return hash(text, p1, p2) != node
849
851
850 def _addchunk(self, offset, data):
852 def _addchunk(self, offset, data):
851 o, d = self._chunkcache
853 o, d = self._chunkcache
852 # try to add to existing cache
854 # try to add to existing cache
853 if o + len(d) == offset and len(d) + len(data) < _chunksize:
855 if o + len(d) == offset and len(d) + len(data) < _chunksize:
854 self._chunkcache = o, d + data
856 self._chunkcache = o, d + data
855 else:
857 else:
856 self._chunkcache = offset, data
858 self._chunkcache = offset, data
857
859
858 def _loadchunk(self, offset, length):
860 def _loadchunk(self, offset, length):
859 if self._inline:
861 if self._inline:
860 df = self.opener(self.indexfile)
862 df = self.opener(self.indexfile)
861 else:
863 else:
862 df = self.opener(self.datafile)
864 df = self.opener(self.datafile)
863
865
864 # Cache data both forward and backward around the requested
866 # Cache data both forward and backward around the requested
865 # data, in a fixed size window. This helps speed up operations
867 # data, in a fixed size window. This helps speed up operations
866 # involving reading the revlog backwards.
868 # involving reading the revlog backwards.
867 cachesize = self._chunkcachesize
869 cachesize = self._chunkcachesize
868 realoffset = offset & ~(cachesize - 1)
870 realoffset = offset & ~(cachesize - 1)
869 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
871 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
870 - realoffset)
872 - realoffset)
871 df.seek(realoffset)
873 df.seek(realoffset)
872 d = df.read(reallength)
874 d = df.read(reallength)
873 df.close()
875 df.close()
874 self._addchunk(realoffset, d)
876 self._addchunk(realoffset, d)
875 if offset != realoffset or reallength != length:
877 if offset != realoffset or reallength != length:
876 return util.buffer(d, offset - realoffset, length)
878 return util.buffer(d, offset - realoffset, length)
877 return d
879 return d
878
880
879 def _getchunk(self, offset, length):
881 def _getchunk(self, offset, length):
880 o, d = self._chunkcache
882 o, d = self._chunkcache
881 l = len(d)
883 l = len(d)
882
884
883 # is it in the cache?
885 # is it in the cache?
884 cachestart = offset - o
886 cachestart = offset - o
885 cacheend = cachestart + length
887 cacheend = cachestart + length
886 if cachestart >= 0 and cacheend <= l:
888 if cachestart >= 0 and cacheend <= l:
887 if cachestart == 0 and cacheend == l:
889 if cachestart == 0 and cacheend == l:
888 return d # avoid a copy
890 return d # avoid a copy
889 return util.buffer(d, cachestart, cacheend - cachestart)
891 return util.buffer(d, cachestart, cacheend - cachestart)
890
892
891 return self._loadchunk(offset, length)
893 return self._loadchunk(offset, length)
892
894
893 def _chunkraw(self, startrev, endrev):
895 def _chunkraw(self, startrev, endrev):
894 start = self.start(startrev)
896 start = self.start(startrev)
895 end = self.end(endrev)
897 end = self.end(endrev)
896 if self._inline:
898 if self._inline:
897 start += (startrev + 1) * self._io.size
899 start += (startrev + 1) * self._io.size
898 end += (endrev + 1) * self._io.size
900 end += (endrev + 1) * self._io.size
899 length = end - start
901 length = end - start
900 return self._getchunk(start, length)
902 return self._getchunk(start, length)
901
903
902 def _chunk(self, rev):
904 def _chunk(self, rev):
903 return decompress(self._chunkraw(rev, rev))
905 return decompress(self._chunkraw(rev, rev))
904
906
905 def _chunks(self, revs):
907 def _chunks(self, revs):
906 '''faster version of [self._chunk(rev) for rev in revs]
908 '''faster version of [self._chunk(rev) for rev in revs]
907
909
908 Assumes that revs is in ascending order.'''
910 Assumes that revs is in ascending order.'''
909 if not revs:
911 if not revs:
910 return []
912 return []
911 start = self.start
913 start = self.start
912 length = self.length
914 length = self.length
913 inline = self._inline
915 inline = self._inline
914 iosize = self._io.size
916 iosize = self._io.size
915 buffer = util.buffer
917 buffer = util.buffer
916
918
917 l = []
919 l = []
918 ladd = l.append
920 ladd = l.append
919
921
920 # preload the cache
922 # preload the cache
921 try:
923 try:
922 while True:
924 while True:
923 # ensure that the cache doesn't change out from under us
925 # ensure that the cache doesn't change out from under us
924 _cache = self._chunkcache
926 _cache = self._chunkcache
925 self._chunkraw(revs[0], revs[-1])
927 self._chunkraw(revs[0], revs[-1])
926 if _cache == self._chunkcache:
928 if _cache == self._chunkcache:
927 break
929 break
928 offset, data = _cache
930 offset, data = _cache
929 except OverflowError:
931 except OverflowError:
930 # issue4215 - we can't cache a run of chunks greater than
932 # issue4215 - we can't cache a run of chunks greater than
931 # 2G on Windows
933 # 2G on Windows
932 return [self._chunk(rev) for rev in revs]
934 return [self._chunk(rev) for rev in revs]
933
935
934 for rev in revs:
936 for rev in revs:
935 chunkstart = start(rev)
937 chunkstart = start(rev)
936 if inline:
938 if inline:
937 chunkstart += (rev + 1) * iosize
939 chunkstart += (rev + 1) * iosize
938 chunklength = length(rev)
940 chunklength = length(rev)
939 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
941 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
940
942
941 return l
943 return l
942
944
943 def _chunkclear(self):
945 def _chunkclear(self):
944 self._chunkcache = (0, '')
946 self._chunkcache = (0, '')
945
947
946 def deltaparent(self, rev):
948 def deltaparent(self, rev):
947 """return deltaparent of the given revision"""
949 """return deltaparent of the given revision"""
948 base = self.index[rev][3]
950 base = self.index[rev][3]
949 if base == rev:
951 if base == rev:
950 return nullrev
952 return nullrev
951 elif self._generaldelta:
953 elif self._generaldelta:
952 return base
954 return base
953 else:
955 else:
954 return rev - 1
956 return rev - 1
955
957
956 def revdiff(self, rev1, rev2):
958 def revdiff(self, rev1, rev2):
957 """return or calculate a delta between two revisions"""
959 """return or calculate a delta between two revisions"""
958 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
960 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
959 return str(self._chunk(rev2))
961 return str(self._chunk(rev2))
960
962
961 return mdiff.textdiff(self.revision(rev1),
963 return mdiff.textdiff(self.revision(rev1),
962 self.revision(rev2))
964 self.revision(rev2))
963
965
964 def revision(self, nodeorrev):
966 def revision(self, nodeorrev):
965 """return an uncompressed revision of a given node or revision
967 """return an uncompressed revision of a given node or revision
966 number.
968 number.
967 """
969 """
968 if isinstance(nodeorrev, int):
970 if isinstance(nodeorrev, int):
969 rev = nodeorrev
971 rev = nodeorrev
970 node = self.node(rev)
972 node = self.node(rev)
971 else:
973 else:
972 node = nodeorrev
974 node = nodeorrev
973 rev = None
975 rev = None
974
976
975 _cache = self._cache # grab local copy of cache to avoid thread race
977 _cache = self._cache # grab local copy of cache to avoid thread race
976 cachedrev = None
978 cachedrev = None
977 if node == nullid:
979 if node == nullid:
978 return ""
980 return ""
979 if _cache:
981 if _cache:
980 if _cache[0] == node:
982 if _cache[0] == node:
981 return _cache[2]
983 return _cache[2]
982 cachedrev = _cache[1]
984 cachedrev = _cache[1]
983
985
984 # look up what we need to read
986 # look up what we need to read
985 text = None
987 text = None
986 if rev is None:
988 if rev is None:
987 rev = self.rev(node)
989 rev = self.rev(node)
988
990
989 # check rev flags
991 # check rev flags
990 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
992 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
991 raise RevlogError(_('incompatible revision flag %x') %
993 raise RevlogError(_('incompatible revision flag %x') %
992 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
994 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
993
995
994 # build delta chain
996 # build delta chain
995 chain = []
997 chain = []
996 index = self.index # for performance
998 index = self.index # for performance
997 generaldelta = self._generaldelta
999 generaldelta = self._generaldelta
998 iterrev = rev
1000 iterrev = rev
999 e = index[iterrev]
1001 e = index[iterrev]
1000 while iterrev != e[3] and iterrev != cachedrev:
1002 while iterrev != e[3] and iterrev != cachedrev:
1001 chain.append(iterrev)
1003 chain.append(iterrev)
1002 if generaldelta:
1004 if generaldelta:
1003 iterrev = e[3]
1005 iterrev = e[3]
1004 else:
1006 else:
1005 iterrev -= 1
1007 iterrev -= 1
1006 e = index[iterrev]
1008 e = index[iterrev]
1007
1009
1008 if iterrev == cachedrev:
1010 if iterrev == cachedrev:
1009 # cache hit
1011 # cache hit
1010 text = _cache[2]
1012 text = _cache[2]
1011 else:
1013 else:
1012 chain.append(iterrev)
1014 chain.append(iterrev)
1013 chain.reverse()
1015 chain.reverse()
1014
1016
1015 # drop cache to save memory
1017 # drop cache to save memory
1016 self._cache = None
1018 self._cache = None
1017
1019
1018 bins = self._chunks(chain)
1020 bins = self._chunks(chain)
1019 if text is None:
1021 if text is None:
1020 text = str(bins[0])
1022 text = str(bins[0])
1021 bins = bins[1:]
1023 bins = bins[1:]
1022
1024
1023 text = mdiff.patches(text, bins)
1025 text = mdiff.patches(text, bins)
1024
1026
1025 text = self._checkhash(text, node, rev)
1027 text = self._checkhash(text, node, rev)
1026
1028
1027 self._cache = (node, rev, text)
1029 self._cache = (node, rev, text)
1028 return text
1030 return text
1029
1031
1030 def _checkhash(self, text, node, rev):
1032 def _checkhash(self, text, node, rev):
1031 p1, p2 = self.parents(node)
1033 p1, p2 = self.parents(node)
1032 self.checkhash(text, p1, p2, node, rev)
1034 self.checkhash(text, p1, p2, node, rev)
1033 return text
1035 return text
1034
1036
1035 def checkhash(self, text, p1, p2, node, rev=None):
1037 def checkhash(self, text, p1, p2, node, rev=None):
1036 if node != hash(text, p1, p2):
1038 if node != hash(text, p1, p2):
1037 revornode = rev
1039 revornode = rev
1038 if revornode is None:
1040 if revornode is None:
1039 revornode = templatefilters.short(hex(node))
1041 revornode = templatefilters.short(hex(node))
1040 raise RevlogError(_("integrity check failed on %s:%s")
1042 raise RevlogError(_("integrity check failed on %s:%s")
1041 % (self.indexfile, revornode))
1043 % (self.indexfile, revornode))
1042
1044
1043 def checkinlinesize(self, tr, fp=None):
1045 def checkinlinesize(self, tr, fp=None):
1044 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1046 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1045 return
1047 return
1046
1048
1047 trinfo = tr.find(self.indexfile)
1049 trinfo = tr.find(self.indexfile)
1048 if trinfo is None:
1050 if trinfo is None:
1049 raise RevlogError(_("%s not found in the transaction")
1051 raise RevlogError(_("%s not found in the transaction")
1050 % self.indexfile)
1052 % self.indexfile)
1051
1053
1052 trindex = trinfo[2]
1054 trindex = trinfo[2]
1053 dataoff = self.start(trindex)
1055 dataoff = self.start(trindex)
1054
1056
1055 tr.add(self.datafile, dataoff)
1057 tr.add(self.datafile, dataoff)
1056
1058
1057 if fp:
1059 if fp:
1058 fp.flush()
1060 fp.flush()
1059 fp.close()
1061 fp.close()
1060
1062
1061 df = self.opener(self.datafile, 'w')
1063 df = self.opener(self.datafile, 'w')
1062 try:
1064 try:
1063 for r in self:
1065 for r in self:
1064 df.write(self._chunkraw(r, r))
1066 df.write(self._chunkraw(r, r))
1065 finally:
1067 finally:
1066 df.close()
1068 df.close()
1067
1069
1068 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1070 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1069 self.version &= ~(REVLOGNGINLINEDATA)
1071 self.version &= ~(REVLOGNGINLINEDATA)
1070 self._inline = False
1072 self._inline = False
1071 for i in self:
1073 for i in self:
1072 e = self._io.packentry(self.index[i], self.node, self.version, i)
1074 e = self._io.packentry(self.index[i], self.node, self.version, i)
1073 fp.write(e)
1075 fp.write(e)
1074
1076
1075 # if we don't call close, the temp file will never replace the
1077 # if we don't call close, the temp file will never replace the
1076 # real index
1078 # real index
1077 fp.close()
1079 fp.close()
1078
1080
1079 tr.replace(self.indexfile, trindex * self._io.size)
1081 tr.replace(self.indexfile, trindex * self._io.size)
1080 self._chunkclear()
1082 self._chunkclear()
1081
1083
1082 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1084 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1083 node=None):
1085 node=None):
1084 """add a revision to the log
1086 """add a revision to the log
1085
1087
1086 text - the revision data to add
1088 text - the revision data to add
1087 transaction - the transaction object used for rollback
1089 transaction - the transaction object used for rollback
1088 link - the linkrev data to add
1090 link - the linkrev data to add
1089 p1, p2 - the parent nodeids of the revision
1091 p1, p2 - the parent nodeids of the revision
1090 cachedelta - an optional precomputed delta
1092 cachedelta - an optional precomputed delta
1091 node - nodeid of revision; typically node is not specified, and it is
1093 node - nodeid of revision; typically node is not specified, and it is
1092 computed by default as hash(text, p1, p2), however subclasses might
1094 computed by default as hash(text, p1, p2), however subclasses might
1093 use different hashing method (and override checkhash() in such case)
1095 use different hashing method (and override checkhash() in such case)
1094 """
1096 """
1095 if link == nullrev:
1097 if link == nullrev:
1096 raise RevlogError(_("attempted to add linkrev -1 to %s")
1098 raise RevlogError(_("attempted to add linkrev -1 to %s")
1097 % self.indexfile)
1099 % self.indexfile)
1098 node = node or hash(text, p1, p2)
1100 node = node or hash(text, p1, p2)
1099 if node in self.nodemap:
1101 if node in self.nodemap:
1100 return node
1102 return node
1101
1103
1102 dfh = None
1104 dfh = None
1103 if not self._inline:
1105 if not self._inline:
1104 dfh = self.opener(self.datafile, "a")
1106 dfh = self.opener(self.datafile, "a")
1105 ifh = self.opener(self.indexfile, "a+")
1107 ifh = self.opener(self.indexfile, "a+")
1106 try:
1108 try:
1107 return self._addrevision(node, text, transaction, link, p1, p2,
1109 return self._addrevision(node, text, transaction, link, p1, p2,
1108 cachedelta, ifh, dfh)
1110 cachedelta, ifh, dfh)
1109 finally:
1111 finally:
1110 if dfh:
1112 if dfh:
1111 dfh.close()
1113 dfh.close()
1112 ifh.close()
1114 ifh.close()
1113
1115
1114 def compress(self, text):
1116 def compress(self, text):
1115 """ generate a possibly-compressed representation of text """
1117 """ generate a possibly-compressed representation of text """
1116 if not text:
1118 if not text:
1117 return ("", text)
1119 return ("", text)
1118 l = len(text)
1120 l = len(text)
1119 bin = None
1121 bin = None
1120 if l < 44:
1122 if l < 44:
1121 pass
1123 pass
1122 elif l > 1000000:
1124 elif l > 1000000:
1123 # zlib makes an internal copy, thus doubling memory usage for
1125 # zlib makes an internal copy, thus doubling memory usage for
1124 # large files, so lets do this in pieces
1126 # large files, so lets do this in pieces
1125 z = zlib.compressobj()
1127 z = zlib.compressobj()
1126 p = []
1128 p = []
1127 pos = 0
1129 pos = 0
1128 while pos < l:
1130 while pos < l:
1129 pos2 = pos + 2**20
1131 pos2 = pos + 2**20
1130 p.append(z.compress(text[pos:pos2]))
1132 p.append(z.compress(text[pos:pos2]))
1131 pos = pos2
1133 pos = pos2
1132 p.append(z.flush())
1134 p.append(z.flush())
1133 if sum(map(len, p)) < l:
1135 if sum(map(len, p)) < l:
1134 bin = "".join(p)
1136 bin = "".join(p)
1135 else:
1137 else:
1136 bin = _compress(text)
1138 bin = _compress(text)
1137 if bin is None or len(bin) > l:
1139 if bin is None or len(bin) > l:
1138 if text[0] == '\0':
1140 if text[0] == '\0':
1139 return ("", text)
1141 return ("", text)
1140 return ('u', text)
1142 return ('u', text)
1141 return ("", bin)
1143 return ("", bin)
1142
1144
1143 def _addrevision(self, node, text, transaction, link, p1, p2,
1145 def _addrevision(self, node, text, transaction, link, p1, p2,
1144 cachedelta, ifh, dfh):
1146 cachedelta, ifh, dfh):
1145 """internal function to add revisions to the log
1147 """internal function to add revisions to the log
1146
1148
1147 see addrevision for argument descriptions.
1149 see addrevision for argument descriptions.
1148 invariants:
1150 invariants:
1149 - text is optional (can be None); if not set, cachedelta must be set.
1151 - text is optional (can be None); if not set, cachedelta must be set.
1150 if both are set, they must correspond to each other.
1152 if both are set, they must correspond to each other.
1151 """
1153 """
1152 btext = [text]
1154 btext = [text]
1153 def buildtext():
1155 def buildtext():
1154 if btext[0] is not None:
1156 if btext[0] is not None:
1155 return btext[0]
1157 return btext[0]
1156 # flush any pending writes here so we can read it in revision
1158 # flush any pending writes here so we can read it in revision
1157 if dfh:
1159 if dfh:
1158 dfh.flush()
1160 dfh.flush()
1159 ifh.flush()
1161 ifh.flush()
1160 basetext = self.revision(self.node(cachedelta[0]))
1162 basetext = self.revision(self.node(cachedelta[0]))
1161 btext[0] = mdiff.patch(basetext, cachedelta[1])
1163 btext[0] = mdiff.patch(basetext, cachedelta[1])
1162 self.checkhash(btext[0], p1, p2, node)
1164 self.checkhash(btext[0], p1, p2, node)
1163 return btext[0]
1165 return btext[0]
1164
1166
1165 def builddelta(rev):
1167 def builddelta(rev):
1166 # can we use the cached delta?
1168 # can we use the cached delta?
1167 if cachedelta and cachedelta[0] == rev:
1169 if cachedelta and cachedelta[0] == rev:
1168 delta = cachedelta[1]
1170 delta = cachedelta[1]
1169 else:
1171 else:
1170 t = buildtext()
1172 t = buildtext()
1171 ptext = self.revision(self.node(rev))
1173 ptext = self.revision(self.node(rev))
1172 delta = mdiff.textdiff(ptext, t)
1174 delta = mdiff.textdiff(ptext, t)
1173 data = self.compress(delta)
1175 data = self.compress(delta)
1174 l = len(data[1]) + len(data[0])
1176 l = len(data[1]) + len(data[0])
1175 if basecache[0] == rev:
1177 if basecache[0] == rev:
1176 chainbase = basecache[1]
1178 chainbase = basecache[1]
1177 else:
1179 else:
1178 chainbase = self.chainbase(rev)
1180 chainbase = self.chainbase(rev)
1179 dist = l + offset - self.start(chainbase)
1181 dist = l + offset - self.start(chainbase)
1180 if self._generaldelta:
1182 if self._generaldelta:
1181 base = rev
1183 base = rev
1182 else:
1184 else:
1183 base = chainbase
1185 base = chainbase
1184 return dist, l, data, base, chainbase
1186 return dist, l, data, base, chainbase
1185
1187
1186 curr = len(self)
1188 curr = len(self)
1187 prev = curr - 1
1189 prev = curr - 1
1188 base = chainbase = curr
1190 base = chainbase = curr
1189 offset = self.end(prev)
1191 offset = self.end(prev)
1190 flags = 0
1192 flags = 0
1191 d = None
1193 d = None
1192 if self._basecache is None:
1194 if self._basecache is None:
1193 self._basecache = (prev, self.chainbase(prev))
1195 self._basecache = (prev, self.chainbase(prev))
1194 basecache = self._basecache
1196 basecache = self._basecache
1195 p1r, p2r = self.rev(p1), self.rev(p2)
1197 p1r, p2r = self.rev(p1), self.rev(p2)
1196
1198
1197 # should we try to build a delta?
1199 # should we try to build a delta?
1198 if prev != nullrev:
1200 if prev != nullrev:
1199 if self._generaldelta:
1201 if self._generaldelta:
1200 if p1r >= basecache[1]:
1202 if p1r >= basecache[1]:
1201 d = builddelta(p1r)
1203 d = builddelta(p1r)
1202 elif p2r >= basecache[1]:
1204 elif p2r >= basecache[1]:
1203 d = builddelta(p2r)
1205 d = builddelta(p2r)
1204 else:
1206 else:
1205 d = builddelta(prev)
1207 d = builddelta(prev)
1206 else:
1208 else:
1207 d = builddelta(prev)
1209 d = builddelta(prev)
1208 dist, l, data, base, chainbase = d
1210 dist, l, data, base, chainbase = d
1209
1211
1210 # full versions are inserted when the needed deltas
1212 # full versions are inserted when the needed deltas
1211 # become comparable to the uncompressed text
1213 # become comparable to the uncompressed text
1212 if text is None:
1214 if text is None:
1213 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1215 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1214 cachedelta[1])
1216 cachedelta[1])
1215 else:
1217 else:
1216 textlen = len(text)
1218 textlen = len(text)
1217 if d is None or dist > textlen * 2:
1219 if d is None or dist > textlen * 2:
1218 text = buildtext()
1220 text = buildtext()
1219 data = self.compress(text)
1221 data = self.compress(text)
1220 l = len(data[1]) + len(data[0])
1222 l = len(data[1]) + len(data[0])
1221 base = chainbase = curr
1223 base = chainbase = curr
1222
1224
1223 e = (offset_type(offset, flags), l, textlen,
1225 e = (offset_type(offset, flags), l, textlen,
1224 base, link, p1r, p2r, node)
1226 base, link, p1r, p2r, node)
1225 self.index.insert(-1, e)
1227 self.index.insert(-1, e)
1226 self.nodemap[node] = curr
1228 self.nodemap[node] = curr
1227
1229
1228 entry = self._io.packentry(e, self.node, self.version, curr)
1230 entry = self._io.packentry(e, self.node, self.version, curr)
1229 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1231 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1230
1232
1231 if type(text) == str: # only accept immutable objects
1233 if type(text) == str: # only accept immutable objects
1232 self._cache = (node, curr, text)
1234 self._cache = (node, curr, text)
1233 self._basecache = (curr, chainbase)
1235 self._basecache = (curr, chainbase)
1234 return node
1236 return node
1235
1237
1236 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1238 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1237 curr = len(self) - 1
1239 curr = len(self) - 1
1238 if not self._inline:
1240 if not self._inline:
1239 transaction.add(self.datafile, offset)
1241 transaction.add(self.datafile, offset)
1240 transaction.add(self.indexfile, curr * len(entry))
1242 transaction.add(self.indexfile, curr * len(entry))
1241 if data[0]:
1243 if data[0]:
1242 dfh.write(data[0])
1244 dfh.write(data[0])
1243 dfh.write(data[1])
1245 dfh.write(data[1])
1244 dfh.flush()
1246 dfh.flush()
1245 ifh.write(entry)
1247 ifh.write(entry)
1246 else:
1248 else:
1247 offset += curr * self._io.size
1249 offset += curr * self._io.size
1248 transaction.add(self.indexfile, offset, curr)
1250 transaction.add(self.indexfile, offset, curr)
1249 ifh.write(entry)
1251 ifh.write(entry)
1250 ifh.write(data[0])
1252 ifh.write(data[0])
1251 ifh.write(data[1])
1253 ifh.write(data[1])
1252 self.checkinlinesize(transaction, ifh)
1254 self.checkinlinesize(transaction, ifh)
1253
1255
1254 def addgroup(self, bundle, linkmapper, transaction):
1256 def addgroup(self, bundle, linkmapper, transaction):
1255 """
1257 """
1256 add a delta group
1258 add a delta group
1257
1259
1258 given a set of deltas, add them to the revision log. the
1260 given a set of deltas, add them to the revision log. the
1259 first delta is against its parent, which should be in our
1261 first delta is against its parent, which should be in our
1260 log, the rest are against the previous delta.
1262 log, the rest are against the previous delta.
1261 """
1263 """
1262
1264
1263 # track the base of the current delta log
1265 # track the base of the current delta log
1264 content = []
1266 content = []
1265 node = None
1267 node = None
1266
1268
1267 r = len(self)
1269 r = len(self)
1268 end = 0
1270 end = 0
1269 if r:
1271 if r:
1270 end = self.end(r - 1)
1272 end = self.end(r - 1)
1271 ifh = self.opener(self.indexfile, "a+")
1273 ifh = self.opener(self.indexfile, "a+")
1272 isize = r * self._io.size
1274 isize = r * self._io.size
1273 if self._inline:
1275 if self._inline:
1274 transaction.add(self.indexfile, end + isize, r)
1276 transaction.add(self.indexfile, end + isize, r)
1275 dfh = None
1277 dfh = None
1276 else:
1278 else:
1277 transaction.add(self.indexfile, isize, r)
1279 transaction.add(self.indexfile, isize, r)
1278 transaction.add(self.datafile, end)
1280 transaction.add(self.datafile, end)
1279 dfh = self.opener(self.datafile, "a")
1281 dfh = self.opener(self.datafile, "a")
1280
1282
1281 try:
1283 try:
1282 # loop through our set of deltas
1284 # loop through our set of deltas
1283 chain = None
1285 chain = None
1284 while True:
1286 while True:
1285 chunkdata = bundle.deltachunk(chain)
1287 chunkdata = bundle.deltachunk(chain)
1286 if not chunkdata:
1288 if not chunkdata:
1287 break
1289 break
1288 node = chunkdata['node']
1290 node = chunkdata['node']
1289 p1 = chunkdata['p1']
1291 p1 = chunkdata['p1']
1290 p2 = chunkdata['p2']
1292 p2 = chunkdata['p2']
1291 cs = chunkdata['cs']
1293 cs = chunkdata['cs']
1292 deltabase = chunkdata['deltabase']
1294 deltabase = chunkdata['deltabase']
1293 delta = chunkdata['delta']
1295 delta = chunkdata['delta']
1294
1296
1295 content.append(node)
1297 content.append(node)
1296
1298
1297 link = linkmapper(cs)
1299 link = linkmapper(cs)
1298 if node in self.nodemap:
1300 if node in self.nodemap:
1299 # this can happen if two branches make the same change
1301 # this can happen if two branches make the same change
1300 chain = node
1302 chain = node
1301 continue
1303 continue
1302
1304
1303 for p in (p1, p2):
1305 for p in (p1, p2):
1304 if p not in self.nodemap:
1306 if p not in self.nodemap:
1305 raise LookupError(p, self.indexfile,
1307 raise LookupError(p, self.indexfile,
1306 _('unknown parent'))
1308 _('unknown parent'))
1307
1309
1308 if deltabase not in self.nodemap:
1310 if deltabase not in self.nodemap:
1309 raise LookupError(deltabase, self.indexfile,
1311 raise LookupError(deltabase, self.indexfile,
1310 _('unknown delta base'))
1312 _('unknown delta base'))
1311
1313
1312 baserev = self.rev(deltabase)
1314 baserev = self.rev(deltabase)
1313 chain = self._addrevision(node, None, transaction, link,
1315 chain = self._addrevision(node, None, transaction, link,
1314 p1, p2, (baserev, delta), ifh, dfh)
1316 p1, p2, (baserev, delta), ifh, dfh)
1315 if not dfh and not self._inline:
1317 if not dfh and not self._inline:
1316 # addrevision switched from inline to conventional
1318 # addrevision switched from inline to conventional
1317 # reopen the index
1319 # reopen the index
1318 ifh.close()
1320 ifh.close()
1319 dfh = self.opener(self.datafile, "a")
1321 dfh = self.opener(self.datafile, "a")
1320 ifh = self.opener(self.indexfile, "a")
1322 ifh = self.opener(self.indexfile, "a")
1321 finally:
1323 finally:
1322 if dfh:
1324 if dfh:
1323 dfh.close()
1325 dfh.close()
1324 ifh.close()
1326 ifh.close()
1325
1327
1326 return content
1328 return content
1327
1329
1328 def getstrippoint(self, minlink):
1330 def getstrippoint(self, minlink):
1329 """find the minimum rev that must be stripped to strip the linkrev
1331 """find the minimum rev that must be stripped to strip the linkrev
1330
1332
1331 Returns a tuple containing the minimum rev and a set of all revs that
1333 Returns a tuple containing the minimum rev and a set of all revs that
1332 have linkrevs that will be broken by this strip.
1334 have linkrevs that will be broken by this strip.
1333 """
1335 """
1334 brokenrevs = set()
1336 brokenrevs = set()
1335 strippoint = len(self)
1337 strippoint = len(self)
1336
1338
1337 heads = {}
1339 heads = {}
1338 futurelargelinkrevs = set()
1340 futurelargelinkrevs = set()
1339 for head in self.headrevs():
1341 for head in self.headrevs():
1340 headlinkrev = self.linkrev(head)
1342 headlinkrev = self.linkrev(head)
1341 heads[head] = headlinkrev
1343 heads[head] = headlinkrev
1342 if headlinkrev >= minlink:
1344 if headlinkrev >= minlink:
1343 futurelargelinkrevs.add(headlinkrev)
1345 futurelargelinkrevs.add(headlinkrev)
1344
1346
1345 # This algorithm involves walking down the rev graph, starting at the
1347 # This algorithm involves walking down the rev graph, starting at the
1346 # heads. Since the revs are topologically sorted according to linkrev,
1348 # heads. Since the revs are topologically sorted according to linkrev,
1347 # once all head linkrevs are below the minlink, we know there are
1349 # once all head linkrevs are below the minlink, we know there are
1348 # no more revs that could have a linkrev greater than minlink.
1350 # no more revs that could have a linkrev greater than minlink.
1349 # So we can stop walking.
1351 # So we can stop walking.
1350 while futurelargelinkrevs:
1352 while futurelargelinkrevs:
1351 strippoint -= 1
1353 strippoint -= 1
1352 linkrev = heads.pop(strippoint)
1354 linkrev = heads.pop(strippoint)
1353
1355
1354 if linkrev < minlink:
1356 if linkrev < minlink:
1355 brokenrevs.add(strippoint)
1357 brokenrevs.add(strippoint)
1356 else:
1358 else:
1357 futurelargelinkrevs.remove(linkrev)
1359 futurelargelinkrevs.remove(linkrev)
1358
1360
1359 for p in self.parentrevs(strippoint):
1361 for p in self.parentrevs(strippoint):
1360 if p != nullrev:
1362 if p != nullrev:
1361 plinkrev = self.linkrev(p)
1363 plinkrev = self.linkrev(p)
1362 heads[p] = plinkrev
1364 heads[p] = plinkrev
1363 if plinkrev >= minlink:
1365 if plinkrev >= minlink:
1364 futurelargelinkrevs.add(plinkrev)
1366 futurelargelinkrevs.add(plinkrev)
1365
1367
1366 return strippoint, brokenrevs
1368 return strippoint, brokenrevs
1367
1369
1368 def strip(self, minlink, transaction):
1370 def strip(self, minlink, transaction):
1369 """truncate the revlog on the first revision with a linkrev >= minlink
1371 """truncate the revlog on the first revision with a linkrev >= minlink
1370
1372
1371 This function is called when we're stripping revision minlink and
1373 This function is called when we're stripping revision minlink and
1372 its descendants from the repository.
1374 its descendants from the repository.
1373
1375
1374 We have to remove all revisions with linkrev >= minlink, because
1376 We have to remove all revisions with linkrev >= minlink, because
1375 the equivalent changelog revisions will be renumbered after the
1377 the equivalent changelog revisions will be renumbered after the
1376 strip.
1378 strip.
1377
1379
1378 So we truncate the revlog on the first of these revisions, and
1380 So we truncate the revlog on the first of these revisions, and
1379 trust that the caller has saved the revisions that shouldn't be
1381 trust that the caller has saved the revisions that shouldn't be
1380 removed and that it'll re-add them after this truncation.
1382 removed and that it'll re-add them after this truncation.
1381 """
1383 """
1382 if len(self) == 0:
1384 if len(self) == 0:
1383 return
1385 return
1384
1386
1385 rev, _ = self.getstrippoint(minlink)
1387 rev, _ = self.getstrippoint(minlink)
1386 if rev == len(self):
1388 if rev == len(self):
1387 return
1389 return
1388
1390
1389 # first truncate the files on disk
1391 # first truncate the files on disk
1390 end = self.start(rev)
1392 end = self.start(rev)
1391 if not self._inline:
1393 if not self._inline:
1392 transaction.add(self.datafile, end)
1394 transaction.add(self.datafile, end)
1393 end = rev * self._io.size
1395 end = rev * self._io.size
1394 else:
1396 else:
1395 end += rev * self._io.size
1397 end += rev * self._io.size
1396
1398
1397 transaction.add(self.indexfile, end)
1399 transaction.add(self.indexfile, end)
1398
1400
1399 # then reset internal state in memory to forget those revisions
1401 # then reset internal state in memory to forget those revisions
1400 self._cache = None
1402 self._cache = None
1401 self._chunkclear()
1403 self._chunkclear()
1402 for x in xrange(rev, len(self)):
1404 for x in xrange(rev, len(self)):
1403 del self.nodemap[self.node(x)]
1405 del self.nodemap[self.node(x)]
1404
1406
1405 del self.index[rev:-1]
1407 del self.index[rev:-1]
1406
1408
1407 def checksize(self):
1409 def checksize(self):
1408 expected = 0
1410 expected = 0
1409 if len(self):
1411 if len(self):
1410 expected = max(0, self.end(len(self) - 1))
1412 expected = max(0, self.end(len(self) - 1))
1411
1413
1412 try:
1414 try:
1413 f = self.opener(self.datafile)
1415 f = self.opener(self.datafile)
1414 f.seek(0, 2)
1416 f.seek(0, 2)
1415 actual = f.tell()
1417 actual = f.tell()
1416 f.close()
1418 f.close()
1417 dd = actual - expected
1419 dd = actual - expected
1418 except IOError, inst:
1420 except IOError, inst:
1419 if inst.errno != errno.ENOENT:
1421 if inst.errno != errno.ENOENT:
1420 raise
1422 raise
1421 dd = 0
1423 dd = 0
1422
1424
1423 try:
1425 try:
1424 f = self.opener(self.indexfile)
1426 f = self.opener(self.indexfile)
1425 f.seek(0, 2)
1427 f.seek(0, 2)
1426 actual = f.tell()
1428 actual = f.tell()
1427 f.close()
1429 f.close()
1428 s = self._io.size
1430 s = self._io.size
1429 i = max(0, actual // s)
1431 i = max(0, actual // s)
1430 di = actual - (i * s)
1432 di = actual - (i * s)
1431 if self._inline:
1433 if self._inline:
1432 databytes = 0
1434 databytes = 0
1433 for r in self:
1435 for r in self:
1434 databytes += max(0, self.length(r))
1436 databytes += max(0, self.length(r))
1435 dd = 0
1437 dd = 0
1436 di = actual - len(self) * s - databytes
1438 di = actual - len(self) * s - databytes
1437 except IOError, inst:
1439 except IOError, inst:
1438 if inst.errno != errno.ENOENT:
1440 if inst.errno != errno.ENOENT:
1439 raise
1441 raise
1440 di = 0
1442 di = 0
1441
1443
1442 return (dd, di)
1444 return (dd, di)
1443
1445
1444 def files(self):
1446 def files(self):
1445 res = [self.indexfile]
1447 res = [self.indexfile]
1446 if not self._inline:
1448 if not self._inline:
1447 res.append(self.datafile)
1449 res.append(self.datafile)
1448 return res
1450 return res
General Comments 0
You need to be logged in to leave comments. Login now