##// END OF EJS Templates
repoview: separate cache hash computation from cache reading...
Stanislau Hlebik -
r31049:20027be9 default
parent child Browse files
Show More
@@ -1,360 +1,363
1 1 # repoview.py - Filtered view of a localrepo object
2 2 #
3 3 # Copyright 2012 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
4 4 # Logilab SA <contact@logilab.fr>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import copy
12 12 import hashlib
13 13 import heapq
14 14 import struct
15 15
16 16 from .node import nullrev
17 17 from . import (
18 18 error,
19 19 obsolete,
20 20 phases,
21 21 tags as tagsmod,
22 22 )
23 23
24 24 def hideablerevs(repo):
25 25 """Revision candidates to be hidden
26 26
27 27 This is a standalone function to allow extensions to wrap it.
28 28
29 29 Because we use the set of immutable changesets as a fallback subset in
30 30 branchmap (see mercurial.branchmap.subsettable), you cannot set "public"
31 31 changesets as "hideable". Doing so would break multiple code assertions and
32 32 lead to crashes."""
33 33 return obsolete.getrevs(repo, 'obsolete')
34 34
35 35 def _getstatichidden(repo):
36 36 """Revision to be hidden (disregarding dynamic blocker)
37 37
38 38 To keep a consistent graph, we cannot hide any revisions with
39 39 non-hidden descendants. This function computes the set of
40 40 revisions that could be hidden while keeping the graph consistent.
41 41
42 42 A second pass will be done to apply "dynamic blocker" like bookmarks or
43 43 working directory parents.
44 44
45 45 """
46 46 assert not repo.changelog.filteredrevs
47 47 hidden = set(hideablerevs(repo))
48 48 if hidden:
49 49 getphase = repo._phasecache.phase
50 50 getparentrevs = repo.changelog.parentrevs
51 51 # Skip heads which are public (guaranteed to not be hidden)
52 52 heap = [-r for r in repo.changelog.headrevs() if getphase(repo, r)]
53 53 heapq.heapify(heap)
54 54 heappop = heapq.heappop
55 55 heappush = heapq.heappush
56 56 seen = set() # no need to init it with heads, they have no children
57 57 while heap:
58 58 rev = -heappop(heap)
59 59 # All children have been processed so at that point, if no children
60 60 # removed 'rev' from the 'hidden' set, 'rev' is going to be hidden.
61 61 blocker = rev not in hidden
62 62 for parent in getparentrevs(rev):
63 63 if parent == nullrev:
64 64 continue
65 65 if blocker:
66 66 # If visible, ensure parent will be visible too
67 67 hidden.discard(parent)
68 68 # - Avoid adding the same revision twice
69 69 # - Skip nodes which are public (guaranteed to not be hidden)
70 70 pre = len(seen)
71 71 seen.add(parent)
72 72 if pre < len(seen) and getphase(repo, rev):
73 73 heappush(heap, -parent)
74 74 return hidden
75 75
76 76 def _getdynamicblockers(repo):
77 77 """Non-cacheable revisions blocking hidden changesets from being filtered.
78 78
79 79 Get revisions that will block hidden changesets and are likely to change,
80 80 but unlikely to create hidden blockers. They won't be cached, so be careful
81 81 with adding additional computation."""
82 82
83 83 cl = repo.changelog
84 84 blockers = set()
85 85 blockers.update([par.rev() for par in repo[None].parents()])
86 86 blockers.update([cl.rev(bm) for bm in repo._bookmarks.values()])
87 87
88 88 tags = {}
89 89 tagsmod.readlocaltags(repo.ui, repo, tags, {})
90 90 if tags:
91 91 rev, nodemap = cl.rev, cl.nodemap
92 92 blockers.update(rev(t[0]) for t in tags.values() if t[0] in nodemap)
93 93 return blockers
94 94
95 95 cacheversion = 1
96 96 cachefile = 'cache/hidden'
97 97
98 98 def cachehash(repo, hideable):
99 99 """return sha1 hash of repository data to identify a valid cache.
100 100
101 101 We calculate a sha1 of repo heads and the content of the obsstore and write
102 102 it to the cache. Upon reading we can easily validate by checking the hash
103 103 against the stored one and discard the cache in case the hashes don't match.
104 104 """
105 105 h = hashlib.sha1()
106 106 h.update(''.join(repo.heads()))
107 107 h.update(str(hash(frozenset(hideable))))
108 108 return h.digest()
109 109
110 110 def _writehiddencache(cachefile, cachehash, hidden):
111 111 """write hidden data to a cache file"""
112 112 data = struct.pack('>%ii' % len(hidden), *sorted(hidden))
113 113 cachefile.write(struct.pack(">H", cacheversion))
114 114 cachefile.write(cachehash)
115 115 cachefile.write(data)
116 116
117 117 def trywritehiddencache(repo, hideable, hidden):
118 118 """write cache of hidden changesets to disk
119 119
120 120 Will not write the cache if a wlock cannot be obtained lazily.
121 121 The cache consists of a head of 22byte:
122 122 2 byte version number of the cache
123 123 20 byte sha1 to validate the cache
124 124 n*4 byte hidden revs
125 125 """
126 126 wlock = fh = None
127 127 try:
128 128 wlock = repo.wlock(wait=False)
129 129 # write cache to file
130 130 newhash = cachehash(repo, hideable)
131 131 fh = repo.vfs.open(cachefile, 'w+b', atomictemp=True)
132 132 _writehiddencache(fh, newhash, hidden)
133 133 fh.close()
134 134 except (IOError, OSError):
135 135 repo.ui.debug('error writing hidden changesets cache\n')
136 136 except error.LockHeld:
137 137 repo.ui.debug('cannot obtain lock to write hidden changesets cache\n')
138 138 finally:
139 139 if wlock:
140 140 wlock.release()
141 141
142 def tryreadcache(repo, hideable):
143 """read a cache if the cache exists and is valid, otherwise returns None."""
142 def _readhiddencache(repo, cachefilename, newhash):
144 143 hidden = fh = None
145 144 try:
146 145 if repo.vfs.exists(cachefile):
147 146 fh = repo.vfs.open(cachefile, 'rb')
148 147 version, = struct.unpack(">H", fh.read(2))
149 148 oldhash = fh.read(20)
150 newhash = cachehash(repo, hideable)
151 149 if (cacheversion, oldhash) == (version, newhash):
152 150 # cache is valid, so we can start reading the hidden revs
153 151 data = fh.read()
154 152 count = len(data) / 4
155 153 hidden = frozenset(struct.unpack('>%ii' % count, data))
156 154 return hidden
157 155 except struct.error:
158 156 repo.ui.debug('corrupted hidden cache\n')
159 157 # No need to fix the content as it will get rewritten
160 158 return None
161 159 except (IOError, OSError):
162 160 repo.ui.debug('cannot read hidden cache\n')
163 161 return None
164 162 finally:
165 163 if fh:
166 164 fh.close()
167 165
166 def tryreadcache(repo, hideable):
167 """read a cache if the cache exists and is valid, otherwise returns None."""
168 newhash = cachehash(repo, hideable)
169 return _readhiddencache(repo, cachefile, newhash)
170
168 171 def computehidden(repo):
169 172 """compute the set of hidden revision to filter
170 173
171 174 During most operation hidden should be filtered."""
172 175 assert not repo.changelog.filteredrevs
173 176
174 177 hidden = frozenset()
175 178 hideable = hideablerevs(repo)
176 179 if hideable:
177 180 cl = repo.changelog
178 181 hidden = tryreadcache(repo, hideable)
179 182 if hidden is None:
180 183 hidden = frozenset(_getstatichidden(repo))
181 184 trywritehiddencache(repo, hideable, hidden)
182 185
183 186 # check if we have wd parents, bookmarks or tags pointing to hidden
184 187 # changesets and remove those.
185 188 dynamic = hidden & _getdynamicblockers(repo)
186 189 if dynamic:
187 190 blocked = cl.ancestors(dynamic, inclusive=True)
188 191 hidden = frozenset(r for r in hidden if r not in blocked)
189 192 return hidden
190 193
191 194 def computeunserved(repo):
192 195 """compute the set of revision that should be filtered when used a server
193 196
194 197 Secret and hidden changeset should not pretend to be here."""
195 198 assert not repo.changelog.filteredrevs
196 199 # fast path in simple case to avoid impact of non optimised code
197 200 hiddens = filterrevs(repo, 'visible')
198 201 if phases.hassecret(repo):
199 202 cl = repo.changelog
200 203 secret = phases.secret
201 204 getphase = repo._phasecache.phase
202 205 first = min(cl.rev(n) for n in repo._phasecache.phaseroots[secret])
203 206 revs = cl.revs(start=first)
204 207 secrets = set(r for r in revs if getphase(repo, r) >= secret)
205 208 return frozenset(hiddens | secrets)
206 209 else:
207 210 return hiddens
208 211
209 212 def computemutable(repo):
210 213 """compute the set of revision that should be filtered when used a server
211 214
212 215 Secret and hidden changeset should not pretend to be here."""
213 216 assert not repo.changelog.filteredrevs
214 217 # fast check to avoid revset call on huge repo
215 218 if any(repo._phasecache.phaseroots[1:]):
216 219 getphase = repo._phasecache.phase
217 220 maymutable = filterrevs(repo, 'base')
218 221 return frozenset(r for r in maymutable if getphase(repo, r))
219 222 return frozenset()
220 223
221 224 def computeimpactable(repo):
222 225 """Everything impactable by mutable revision
223 226
224 227 The immutable filter still have some chance to get invalidated. This will
225 228 happen when:
226 229
227 230 - you garbage collect hidden changeset,
228 231 - public phase is moved backward,
229 232 - something is changed in the filtering (this could be fixed)
230 233
231 234 This filter out any mutable changeset and any public changeset that may be
232 235 impacted by something happening to a mutable revision.
233 236
234 237 This is achieved by filtered everything with a revision number egal or
235 238 higher than the first mutable changeset is filtered."""
236 239 assert not repo.changelog.filteredrevs
237 240 cl = repo.changelog
238 241 firstmutable = len(cl)
239 242 for roots in repo._phasecache.phaseroots[1:]:
240 243 if roots:
241 244 firstmutable = min(firstmutable, min(cl.rev(r) for r in roots))
242 245 # protect from nullrev root
243 246 firstmutable = max(0, firstmutable)
244 247 return frozenset(xrange(firstmutable, len(cl)))
245 248
246 249 # function to compute filtered set
247 250 #
248 251 # When adding a new filter you MUST update the table at:
249 252 # mercurial.branchmap.subsettable
250 253 # Otherwise your filter will have to recompute all its branches cache
251 254 # from scratch (very slow).
252 255 filtertable = {'visible': computehidden,
253 256 'served': computeunserved,
254 257 'immutable': computemutable,
255 258 'base': computeimpactable}
256 259
257 260 def filterrevs(repo, filtername):
258 261 """returns set of filtered revision for this filter name"""
259 262 if filtername not in repo.filteredrevcache:
260 263 func = filtertable[filtername]
261 264 repo.filteredrevcache[filtername] = func(repo.unfiltered())
262 265 return repo.filteredrevcache[filtername]
263 266
264 267 class repoview(object):
265 268 """Provide a read/write view of a repo through a filtered changelog
266 269
267 270 This object is used to access a filtered version of a repository without
268 271 altering the original repository object itself. We can not alter the
269 272 original object for two main reasons:
270 273 - It prevents the use of a repo with multiple filters at the same time. In
271 274 particular when multiple threads are involved.
272 275 - It makes scope of the filtering harder to control.
273 276
274 277 This object behaves very closely to the original repository. All attribute
275 278 operations are done on the original repository:
276 279 - An access to `repoview.someattr` actually returns `repo.someattr`,
277 280 - A write to `repoview.someattr` actually sets value of `repo.someattr`,
278 281 - A deletion of `repoview.someattr` actually drops `someattr`
279 282 from `repo.__dict__`.
280 283
281 284 The only exception is the `changelog` property. It is overridden to return
282 285 a (surface) copy of `repo.changelog` with some revisions filtered. The
283 286 `filtername` attribute of the view control the revisions that need to be
284 287 filtered. (the fact the changelog is copied is an implementation detail).
285 288
286 289 Unlike attributes, this object intercepts all method calls. This means that
287 290 all methods are run on the `repoview` object with the filtered `changelog`
288 291 property. For this purpose the simple `repoview` class must be mixed with
289 292 the actual class of the repository. This ensures that the resulting
290 293 `repoview` object have the very same methods than the repo object. This
291 294 leads to the property below.
292 295
293 296 repoview.method() --> repo.__class__.method(repoview)
294 297
295 298 The inheritance has to be done dynamically because `repo` can be of any
296 299 subclasses of `localrepo`. Eg: `bundlerepo` or `statichttprepo`.
297 300 """
298 301
299 302 def __init__(self, repo, filtername):
300 303 object.__setattr__(self, '_unfilteredrepo', repo)
301 304 object.__setattr__(self, 'filtername', filtername)
302 305 object.__setattr__(self, '_clcachekey', None)
303 306 object.__setattr__(self, '_clcache', None)
304 307
305 308 # not a propertycache on purpose we shall implement a proper cache later
306 309 @property
307 310 def changelog(self):
308 311 """return a filtered version of the changeset
309 312
310 313 this changelog must not be used for writing"""
311 314 # some cache may be implemented later
312 315 unfi = self._unfilteredrepo
313 316 unfichangelog = unfi.changelog
314 317 # bypass call to changelog.method
315 318 unfiindex = unfichangelog.index
316 319 unfilen = len(unfiindex) - 1
317 320 unfinode = unfiindex[unfilen - 1][7]
318 321
319 322 revs = filterrevs(unfi, self.filtername)
320 323 cl = self._clcache
321 324 newkey = (unfilen, unfinode, hash(revs), unfichangelog._delayed)
322 325 # if cl.index is not unfiindex, unfi.changelog would be
323 326 # recreated, and our clcache refers to garbage object
324 327 if (cl is not None and
325 328 (cl.index is not unfiindex or newkey != self._clcachekey)):
326 329 cl = None
327 330 # could have been made None by the previous if
328 331 if cl is None:
329 332 cl = copy.copy(unfichangelog)
330 333 cl.filteredrevs = revs
331 334 object.__setattr__(self, '_clcache', cl)
332 335 object.__setattr__(self, '_clcachekey', newkey)
333 336 return cl
334 337
335 338 def unfiltered(self):
336 339 """Return an unfiltered version of a repo"""
337 340 return self._unfilteredrepo
338 341
339 342 def filtered(self, name):
340 343 """Return a filtered version of a repository"""
341 344 if name == self.filtername:
342 345 return self
343 346 return self.unfiltered().filtered(name)
344 347
345 348 # everything access are forwarded to the proxied repo
346 349 def __getattr__(self, attr):
347 350 return getattr(self._unfilteredrepo, attr)
348 351
349 352 def __setattr__(self, attr, value):
350 353 return setattr(self._unfilteredrepo, attr, value)
351 354
352 355 def __delattr__(self, attr):
353 356 return delattr(self._unfilteredrepo, attr)
354 357
355 358 # The `requirements` attribute is initialized during __init__. But
356 359 # __getattr__ won't be called as it also exists on the class. We need
357 360 # explicit forwarding to main repo here
358 361 @property
359 362 def requirements(self):
360 363 return self._unfilteredrepo.requirements
General Comments 0
You need to be logged in to leave comments. Login now