##// END OF EJS Templates
revbranchcache: move cache writing to the transaction finalizer...
Durham Goode -
r24377:656f93ce default
parent child Browse files
Show More
@@ -1,459 +1,463 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from node import bin, hex, nullid, nullrev
8 from node import bin, hex, nullid, nullrev
9 import encoding
9 import encoding
10 import util
10 import util
11 import time
11 import time
12 from array import array
12 from array import array
13 from struct import calcsize, pack, unpack
13 from struct import calcsize, pack, unpack
14
14
15 def _filename(repo):
15 def _filename(repo):
16 """name of a branchcache file for a given repo or repoview"""
16 """name of a branchcache file for a given repo or repoview"""
17 filename = "cache/branch2"
17 filename = "cache/branch2"
18 if repo.filtername:
18 if repo.filtername:
19 filename = '%s-%s' % (filename, repo.filtername)
19 filename = '%s-%s' % (filename, repo.filtername)
20 return filename
20 return filename
21
21
22 def read(repo):
22 def read(repo):
23 try:
23 try:
24 f = repo.vfs(_filename(repo))
24 f = repo.vfs(_filename(repo))
25 lines = f.read().split('\n')
25 lines = f.read().split('\n')
26 f.close()
26 f.close()
27 except (IOError, OSError):
27 except (IOError, OSError):
28 return None
28 return None
29
29
30 try:
30 try:
31 cachekey = lines.pop(0).split(" ", 2)
31 cachekey = lines.pop(0).split(" ", 2)
32 last, lrev = cachekey[:2]
32 last, lrev = cachekey[:2]
33 last, lrev = bin(last), int(lrev)
33 last, lrev = bin(last), int(lrev)
34 filteredhash = None
34 filteredhash = None
35 if len(cachekey) > 2:
35 if len(cachekey) > 2:
36 filteredhash = bin(cachekey[2])
36 filteredhash = bin(cachekey[2])
37 partial = branchcache(tipnode=last, tiprev=lrev,
37 partial = branchcache(tipnode=last, tiprev=lrev,
38 filteredhash=filteredhash)
38 filteredhash=filteredhash)
39 if not partial.validfor(repo):
39 if not partial.validfor(repo):
40 # invalidate the cache
40 # invalidate the cache
41 raise ValueError('tip differs')
41 raise ValueError('tip differs')
42 for l in lines:
42 for l in lines:
43 if not l:
43 if not l:
44 continue
44 continue
45 node, state, label = l.split(" ", 2)
45 node, state, label = l.split(" ", 2)
46 if state not in 'oc':
46 if state not in 'oc':
47 raise ValueError('invalid branch state')
47 raise ValueError('invalid branch state')
48 label = encoding.tolocal(label.strip())
48 label = encoding.tolocal(label.strip())
49 if not node in repo:
49 if not node in repo:
50 raise ValueError('node %s does not exist' % node)
50 raise ValueError('node %s does not exist' % node)
51 node = bin(node)
51 node = bin(node)
52 partial.setdefault(label, []).append(node)
52 partial.setdefault(label, []).append(node)
53 if state == 'c':
53 if state == 'c':
54 partial._closednodes.add(node)
54 partial._closednodes.add(node)
55 except KeyboardInterrupt:
55 except KeyboardInterrupt:
56 raise
56 raise
57 except Exception, inst:
57 except Exception, inst:
58 if repo.ui.debugflag:
58 if repo.ui.debugflag:
59 msg = 'invalid branchheads cache'
59 msg = 'invalid branchheads cache'
60 if repo.filtername is not None:
60 if repo.filtername is not None:
61 msg += ' (%s)' % repo.filtername
61 msg += ' (%s)' % repo.filtername
62 msg += ': %s\n'
62 msg += ': %s\n'
63 repo.ui.debug(msg % inst)
63 repo.ui.debug(msg % inst)
64 partial = None
64 partial = None
65 return partial
65 return partial
66
66
67 ### Nearest subset relation
67 ### Nearest subset relation
68 # Nearest subset of filter X is a filter Y so that:
68 # Nearest subset of filter X is a filter Y so that:
69 # * Y is included in X,
69 # * Y is included in X,
70 # * X - Y is as small as possible.
70 # * X - Y is as small as possible.
71 # This create and ordering used for branchmap purpose.
71 # This create and ordering used for branchmap purpose.
72 # the ordering may be partial
72 # the ordering may be partial
73 subsettable = {None: 'visible',
73 subsettable = {None: 'visible',
74 'visible': 'served',
74 'visible': 'served',
75 'served': 'immutable',
75 'served': 'immutable',
76 'immutable': 'base'}
76 'immutable': 'base'}
77
77
78 def updatecache(repo):
78 def updatecache(repo):
79 cl = repo.changelog
79 cl = repo.changelog
80 filtername = repo.filtername
80 filtername = repo.filtername
81 partial = repo._branchcaches.get(filtername)
81 partial = repo._branchcaches.get(filtername)
82
82
83 revs = []
83 revs = []
84 if partial is None or not partial.validfor(repo):
84 if partial is None or not partial.validfor(repo):
85 partial = read(repo)
85 partial = read(repo)
86 if partial is None:
86 if partial is None:
87 subsetname = subsettable.get(filtername)
87 subsetname = subsettable.get(filtername)
88 if subsetname is None:
88 if subsetname is None:
89 partial = branchcache()
89 partial = branchcache()
90 else:
90 else:
91 subset = repo.filtered(subsetname)
91 subset = repo.filtered(subsetname)
92 partial = subset.branchmap().copy()
92 partial = subset.branchmap().copy()
93 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
93 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
94 revs.extend(r for r in extrarevs if r <= partial.tiprev)
94 revs.extend(r for r in extrarevs if r <= partial.tiprev)
95 revs.extend(cl.revs(start=partial.tiprev + 1))
95 revs.extend(cl.revs(start=partial.tiprev + 1))
96 if revs:
96 if revs:
97 partial.update(repo, revs)
97 partial.update(repo, revs)
98 partial.write(repo)
98 partial.write(repo)
99
99
100 if repo._revbranchcache is not None:
100 if repo._revbranchcache is not None:
101 repo._revbranchcache.write()
101 repo._revbranchcache.write()
102
102
103 assert partial.validfor(repo), filtername
103 assert partial.validfor(repo), filtername
104 repo._branchcaches[repo.filtername] = partial
104 repo._branchcaches[repo.filtername] = partial
105
105
106 class branchcache(dict):
106 class branchcache(dict):
107 """A dict like object that hold branches heads cache.
107 """A dict like object that hold branches heads cache.
108
108
109 This cache is used to avoid costly computations to determine all the
109 This cache is used to avoid costly computations to determine all the
110 branch heads of a repo.
110 branch heads of a repo.
111
111
112 The cache is serialized on disk in the following format:
112 The cache is serialized on disk in the following format:
113
113
114 <tip hex node> <tip rev number> [optional filtered repo hex hash]
114 <tip hex node> <tip rev number> [optional filtered repo hex hash]
115 <branch head hex node> <open/closed state> <branch name>
115 <branch head hex node> <open/closed state> <branch name>
116 <branch head hex node> <open/closed state> <branch name>
116 <branch head hex node> <open/closed state> <branch name>
117 ...
117 ...
118
118
119 The first line is used to check if the cache is still valid. If the
119 The first line is used to check if the cache is still valid. If the
120 branch cache is for a filtered repo view, an optional third hash is
120 branch cache is for a filtered repo view, an optional third hash is
121 included that hashes the hashes of all filtered revisions.
121 included that hashes the hashes of all filtered revisions.
122
122
123 The open/closed state is represented by a single letter 'o' or 'c'.
123 The open/closed state is represented by a single letter 'o' or 'c'.
124 This field can be used to avoid changelog reads when determining if a
124 This field can be used to avoid changelog reads when determining if a
125 branch head closes a branch or not.
125 branch head closes a branch or not.
126 """
126 """
127
127
128 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
128 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
129 filteredhash=None, closednodes=None):
129 filteredhash=None, closednodes=None):
130 super(branchcache, self).__init__(entries)
130 super(branchcache, self).__init__(entries)
131 self.tipnode = tipnode
131 self.tipnode = tipnode
132 self.tiprev = tiprev
132 self.tiprev = tiprev
133 self.filteredhash = filteredhash
133 self.filteredhash = filteredhash
134 # closednodes is a set of nodes that close their branch. If the branch
134 # closednodes is a set of nodes that close their branch. If the branch
135 # cache has been updated, it may contain nodes that are no longer
135 # cache has been updated, it may contain nodes that are no longer
136 # heads.
136 # heads.
137 if closednodes is None:
137 if closednodes is None:
138 self._closednodes = set()
138 self._closednodes = set()
139 else:
139 else:
140 self._closednodes = closednodes
140 self._closednodes = closednodes
141
141
142 def _hashfiltered(self, repo):
142 def _hashfiltered(self, repo):
143 """build hash of revision filtered in the current cache
143 """build hash of revision filtered in the current cache
144
144
145 Tracking tipnode and tiprev is not enough to ensure validity of the
145 Tracking tipnode and tiprev is not enough to ensure validity of the
146 cache as they do not help to distinct cache that ignored various
146 cache as they do not help to distinct cache that ignored various
147 revision bellow tiprev.
147 revision bellow tiprev.
148
148
149 To detect such difference, we build a cache of all ignored revisions.
149 To detect such difference, we build a cache of all ignored revisions.
150 """
150 """
151 cl = repo.changelog
151 cl = repo.changelog
152 if not cl.filteredrevs:
152 if not cl.filteredrevs:
153 return None
153 return None
154 key = None
154 key = None
155 revs = sorted(r for r in cl.filteredrevs if r <= self.tiprev)
155 revs = sorted(r for r in cl.filteredrevs if r <= self.tiprev)
156 if revs:
156 if revs:
157 s = util.sha1()
157 s = util.sha1()
158 for rev in revs:
158 for rev in revs:
159 s.update('%s;' % rev)
159 s.update('%s;' % rev)
160 key = s.digest()
160 key = s.digest()
161 return key
161 return key
162
162
163 def validfor(self, repo):
163 def validfor(self, repo):
164 """Is the cache content valid regarding a repo
164 """Is the cache content valid regarding a repo
165
165
166 - False when cached tipnode is unknown or if we detect a strip.
166 - False when cached tipnode is unknown or if we detect a strip.
167 - True when cache is up to date or a subset of current repo."""
167 - True when cache is up to date or a subset of current repo."""
168 try:
168 try:
169 return ((self.tipnode == repo.changelog.node(self.tiprev))
169 return ((self.tipnode == repo.changelog.node(self.tiprev))
170 and (self.filteredhash == self._hashfiltered(repo)))
170 and (self.filteredhash == self._hashfiltered(repo)))
171 except IndexError:
171 except IndexError:
172 return False
172 return False
173
173
174 def _branchtip(self, heads):
174 def _branchtip(self, heads):
175 '''Return tuple with last open head in heads and false,
175 '''Return tuple with last open head in heads and false,
176 otherwise return last closed head and true.'''
176 otherwise return last closed head and true.'''
177 tip = heads[-1]
177 tip = heads[-1]
178 closed = True
178 closed = True
179 for h in reversed(heads):
179 for h in reversed(heads):
180 if h not in self._closednodes:
180 if h not in self._closednodes:
181 tip = h
181 tip = h
182 closed = False
182 closed = False
183 break
183 break
184 return tip, closed
184 return tip, closed
185
185
186 def branchtip(self, branch):
186 def branchtip(self, branch):
187 '''Return the tipmost open head on branch head, otherwise return the
187 '''Return the tipmost open head on branch head, otherwise return the
188 tipmost closed head on branch.
188 tipmost closed head on branch.
189 Raise KeyError for unknown branch.'''
189 Raise KeyError for unknown branch.'''
190 return self._branchtip(self[branch])[0]
190 return self._branchtip(self[branch])[0]
191
191
192 def branchheads(self, branch, closed=False):
192 def branchheads(self, branch, closed=False):
193 heads = self[branch]
193 heads = self[branch]
194 if not closed:
194 if not closed:
195 heads = [h for h in heads if h not in self._closednodes]
195 heads = [h for h in heads if h not in self._closednodes]
196 return heads
196 return heads
197
197
198 def iterbranches(self):
198 def iterbranches(self):
199 for bn, heads in self.iteritems():
199 for bn, heads in self.iteritems():
200 yield (bn, heads) + self._branchtip(heads)
200 yield (bn, heads) + self._branchtip(heads)
201
201
202 def copy(self):
202 def copy(self):
203 """return an deep copy of the branchcache object"""
203 """return an deep copy of the branchcache object"""
204 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
204 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
205 self._closednodes)
205 self._closednodes)
206
206
207 def write(self, repo):
207 def write(self, repo):
208 try:
208 try:
209 f = repo.vfs(_filename(repo), "w", atomictemp=True)
209 f = repo.vfs(_filename(repo), "w", atomictemp=True)
210 cachekey = [hex(self.tipnode), str(self.tiprev)]
210 cachekey = [hex(self.tipnode), str(self.tiprev)]
211 if self.filteredhash is not None:
211 if self.filteredhash is not None:
212 cachekey.append(hex(self.filteredhash))
212 cachekey.append(hex(self.filteredhash))
213 f.write(" ".join(cachekey) + '\n')
213 f.write(" ".join(cachekey) + '\n')
214 nodecount = 0
214 nodecount = 0
215 for label, nodes in sorted(self.iteritems()):
215 for label, nodes in sorted(self.iteritems()):
216 for node in nodes:
216 for node in nodes:
217 nodecount += 1
217 nodecount += 1
218 if node in self._closednodes:
218 if node in self._closednodes:
219 state = 'c'
219 state = 'c'
220 else:
220 else:
221 state = 'o'
221 state = 'o'
222 f.write("%s %s %s\n" % (hex(node), state,
222 f.write("%s %s %s\n" % (hex(node), state,
223 encoding.fromlocal(label)))
223 encoding.fromlocal(label)))
224 f.close()
224 f.close()
225 repo.ui.log('branchcache',
225 repo.ui.log('branchcache',
226 'wrote %s branch cache with %d labels and %d nodes\n',
226 'wrote %s branch cache with %d labels and %d nodes\n',
227 repo.filtername, len(self), nodecount)
227 repo.filtername, len(self), nodecount)
228 except (IOError, OSError, util.Abort), inst:
228 except (IOError, OSError, util.Abort), inst:
229 repo.ui.debug("couldn't write branch cache: %s\n" % inst)
229 repo.ui.debug("couldn't write branch cache: %s\n" % inst)
230 # Abort may be raise by read only opener
230 # Abort may be raise by read only opener
231 pass
231 pass
232
232
233 def update(self, repo, revgen):
233 def update(self, repo, revgen):
234 """Given a branchhead cache, self, that may have extra nodes or be
234 """Given a branchhead cache, self, that may have extra nodes or be
235 missing heads, and a generator of nodes that are strictly a superset of
235 missing heads, and a generator of nodes that are strictly a superset of
236 heads missing, this function updates self to be correct.
236 heads missing, this function updates self to be correct.
237 """
237 """
238 starttime = time.time()
238 starttime = time.time()
239 cl = repo.changelog
239 cl = repo.changelog
240 # collect new branch entries
240 # collect new branch entries
241 newbranches = {}
241 newbranches = {}
242 getbranchinfo = repo.revbranchcache().branchinfo
242 getbranchinfo = repo.revbranchcache().branchinfo
243 for r in revgen:
243 for r in revgen:
244 branch, closesbranch = getbranchinfo(r)
244 branch, closesbranch = getbranchinfo(r)
245 newbranches.setdefault(branch, []).append(r)
245 newbranches.setdefault(branch, []).append(r)
246 if closesbranch:
246 if closesbranch:
247 self._closednodes.add(cl.node(r))
247 self._closednodes.add(cl.node(r))
248
248
249 # fetch current topological heads to speed up filtering
249 # fetch current topological heads to speed up filtering
250 topoheads = set(cl.headrevs())
250 topoheads = set(cl.headrevs())
251
251
252 # if older branchheads are reachable from new ones, they aren't
252 # if older branchheads are reachable from new ones, they aren't
253 # really branchheads. Note checking parents is insufficient:
253 # really branchheads. Note checking parents is insufficient:
254 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
254 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
255 for branch, newheadrevs in newbranches.iteritems():
255 for branch, newheadrevs in newbranches.iteritems():
256 bheads = self.setdefault(branch, [])
256 bheads = self.setdefault(branch, [])
257 bheadset = set(cl.rev(node) for node in bheads)
257 bheadset = set(cl.rev(node) for node in bheads)
258
258
259 # This have been tested True on all internal usage of this function.
259 # This have been tested True on all internal usage of this function.
260 # run it again in case of doubt
260 # run it again in case of doubt
261 # assert not (set(bheadrevs) & set(newheadrevs))
261 # assert not (set(bheadrevs) & set(newheadrevs))
262 newheadrevs.sort()
262 newheadrevs.sort()
263 bheadset.update(newheadrevs)
263 bheadset.update(newheadrevs)
264
264
265 # This prunes out two kinds of heads - heads that are superseded by
265 # This prunes out two kinds of heads - heads that are superseded by
266 # a head in newheadrevs, and newheadrevs that are not heads because
266 # a head in newheadrevs, and newheadrevs that are not heads because
267 # an existing head is their descendant.
267 # an existing head is their descendant.
268 uncertain = bheadset - topoheads
268 uncertain = bheadset - topoheads
269 if uncertain:
269 if uncertain:
270 floorrev = min(uncertain)
270 floorrev = min(uncertain)
271 ancestors = set(cl.ancestors(newheadrevs, floorrev))
271 ancestors = set(cl.ancestors(newheadrevs, floorrev))
272 bheadset -= ancestors
272 bheadset -= ancestors
273 bheadrevs = sorted(bheadset)
273 bheadrevs = sorted(bheadset)
274 self[branch] = [cl.node(rev) for rev in bheadrevs]
274 self[branch] = [cl.node(rev) for rev in bheadrevs]
275 tiprev = bheadrevs[-1]
275 tiprev = bheadrevs[-1]
276 if tiprev > self.tiprev:
276 if tiprev > self.tiprev:
277 self.tipnode = cl.node(tiprev)
277 self.tipnode = cl.node(tiprev)
278 self.tiprev = tiprev
278 self.tiprev = tiprev
279
279
280 if not self.validfor(repo):
280 if not self.validfor(repo):
281 # cache key are not valid anymore
281 # cache key are not valid anymore
282 self.tipnode = nullid
282 self.tipnode = nullid
283 self.tiprev = nullrev
283 self.tiprev = nullrev
284 for heads in self.values():
284 for heads in self.values():
285 tiprev = max(cl.rev(node) for node in heads)
285 tiprev = max(cl.rev(node) for node in heads)
286 if tiprev > self.tiprev:
286 if tiprev > self.tiprev:
287 self.tipnode = cl.node(tiprev)
287 self.tipnode = cl.node(tiprev)
288 self.tiprev = tiprev
288 self.tiprev = tiprev
289 self.filteredhash = self._hashfiltered(repo)
289 self.filteredhash = self._hashfiltered(repo)
290
290
291 duration = time.time() - starttime
291 duration = time.time() - starttime
292 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
292 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
293 repo.filtername, duration)
293 repo.filtername, duration)
294
294
295 # Revision branch info cache
295 # Revision branch info cache
296
296
297 _rbcversion = '-v1'
297 _rbcversion = '-v1'
298 _rbcnames = 'cache/rbc-names' + _rbcversion
298 _rbcnames = 'cache/rbc-names' + _rbcversion
299 _rbcrevs = 'cache/rbc-revs' + _rbcversion
299 _rbcrevs = 'cache/rbc-revs' + _rbcversion
300 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
300 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
301 _rbcrecfmt = '>4sI'
301 _rbcrecfmt = '>4sI'
302 _rbcrecsize = calcsize(_rbcrecfmt)
302 _rbcrecsize = calcsize(_rbcrecfmt)
303 _rbcnodelen = 4
303 _rbcnodelen = 4
304 _rbcbranchidxmask = 0x7fffffff
304 _rbcbranchidxmask = 0x7fffffff
305 _rbccloseflag = 0x80000000
305 _rbccloseflag = 0x80000000
306
306
307 class revbranchcache(object):
307 class revbranchcache(object):
308 """Persistent cache, mapping from revision number to branch name and close.
308 """Persistent cache, mapping from revision number to branch name and close.
309 This is a low level cache, independent of filtering.
309 This is a low level cache, independent of filtering.
310
310
311 Branch names are stored in rbc-names in internal encoding separated by 0.
311 Branch names are stored in rbc-names in internal encoding separated by 0.
312 rbc-names is append-only, and each branch name is only stored once and will
312 rbc-names is append-only, and each branch name is only stored once and will
313 thus have a unique index.
313 thus have a unique index.
314
314
315 The branch info for each revision is stored in rbc-revs as constant size
315 The branch info for each revision is stored in rbc-revs as constant size
316 records. The whole file is read into memory, but it is only 'parsed' on
316 records. The whole file is read into memory, but it is only 'parsed' on
317 demand. The file is usually append-only but will be truncated if repo
317 demand. The file is usually append-only but will be truncated if repo
318 modification is detected.
318 modification is detected.
319 The record for each revision contains the first 4 bytes of the
319 The record for each revision contains the first 4 bytes of the
320 corresponding node hash, and the record is only used if it still matches.
320 corresponding node hash, and the record is only used if it still matches.
321 Even a completely trashed rbc-revs fill thus still give the right result
321 Even a completely trashed rbc-revs fill thus still give the right result
322 while converging towards full recovery ... assuming no incorrectly matching
322 while converging towards full recovery ... assuming no incorrectly matching
323 node hashes.
323 node hashes.
324 The record also contains 4 bytes where 31 bits contains the index of the
324 The record also contains 4 bytes where 31 bits contains the index of the
325 branch and the last bit indicate that it is a branch close commit.
325 branch and the last bit indicate that it is a branch close commit.
326 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
326 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
327 and will grow with it but be 1/8th of its size.
327 and will grow with it but be 1/8th of its size.
328 """
328 """
329
329
330 def __init__(self, repo, readonly=True):
330 def __init__(self, repo, readonly=True):
331 assert repo.filtername is None
331 assert repo.filtername is None
332 self._repo = repo
332 self._repo = repo
333 self._names = [] # branch names in local encoding with static index
333 self._names = [] # branch names in local encoding with static index
334 self._rbcrevs = array('c') # structs of type _rbcrecfmt
334 self._rbcrevs = array('c') # structs of type _rbcrecfmt
335 self._rbcsnameslen = 0
335 self._rbcsnameslen = 0
336 try:
336 try:
337 bndata = repo.vfs.read(_rbcnames)
337 bndata = repo.vfs.read(_rbcnames)
338 self._rbcsnameslen = len(bndata) # for verification before writing
338 self._rbcsnameslen = len(bndata) # for verification before writing
339 self._names = [encoding.tolocal(bn) for bn in bndata.split('\0')]
339 self._names = [encoding.tolocal(bn) for bn in bndata.split('\0')]
340 except (IOError, OSError), inst:
340 except (IOError, OSError), inst:
341 repo.ui.debug("couldn't read revision branch cache names: %s\n" %
341 repo.ui.debug("couldn't read revision branch cache names: %s\n" %
342 inst)
342 inst)
343 if readonly:
343 if readonly:
344 # don't try to use cache - fall back to the slow path
344 # don't try to use cache - fall back to the slow path
345 self.branchinfo = self._branchinfo
345 self.branchinfo = self._branchinfo
346
346
347 if self._names:
347 if self._names:
348 try:
348 try:
349 data = repo.vfs.read(_rbcrevs)
349 data = repo.vfs.read(_rbcrevs)
350 self._rbcrevs.fromstring(data)
350 self._rbcrevs.fromstring(data)
351 except (IOError, OSError), inst:
351 except (IOError, OSError), inst:
352 repo.ui.debug("couldn't read revision branch cache: %s\n" %
352 repo.ui.debug("couldn't read revision branch cache: %s\n" %
353 inst)
353 inst)
354 # remember number of good records on disk
354 # remember number of good records on disk
355 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
355 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
356 len(repo.changelog))
356 len(repo.changelog))
357 if self._rbcrevslen == 0:
357 if self._rbcrevslen == 0:
358 self._names = []
358 self._names = []
359 self._rbcnamescount = len(self._names) # number of good names on disk
359 self._rbcnamescount = len(self._names) # number of good names on disk
360 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
360 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
361
361
362 def branchinfo(self, rev):
362 def branchinfo(self, rev):
363 """Return branch name and close flag for rev, using and updating
363 """Return branch name and close flag for rev, using and updating
364 persistent cache."""
364 persistent cache."""
365 changelog = self._repo.changelog
365 changelog = self._repo.changelog
366 rbcrevidx = rev * _rbcrecsize
366 rbcrevidx = rev * _rbcrecsize
367
367
368 # if requested rev is missing, add and populate all missing revs
368 # if requested rev is missing, add and populate all missing revs
369 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
369 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
370 self._rbcrevs.extend('\0' * (len(changelog) * _rbcrecsize -
370 self._rbcrevs.extend('\0' * (len(changelog) * _rbcrecsize -
371 len(self._rbcrevs)))
371 len(self._rbcrevs)))
372
372
373 # fast path: extract data from cache, use it if node is matching
373 # fast path: extract data from cache, use it if node is matching
374 reponode = changelog.node(rev)[:_rbcnodelen]
374 reponode = changelog.node(rev)[:_rbcnodelen]
375 cachenode, branchidx = unpack(
375 cachenode, branchidx = unpack(
376 _rbcrecfmt, buffer(self._rbcrevs, rbcrevidx, _rbcrecsize))
376 _rbcrecfmt, buffer(self._rbcrevs, rbcrevidx, _rbcrecsize))
377 close = bool(branchidx & _rbccloseflag)
377 close = bool(branchidx & _rbccloseflag)
378 if close:
378 if close:
379 branchidx &= _rbcbranchidxmask
379 branchidx &= _rbcbranchidxmask
380 if cachenode == '\0\0\0\0':
380 if cachenode == '\0\0\0\0':
381 pass
381 pass
382 elif cachenode == reponode:
382 elif cachenode == reponode:
383 return self._names[branchidx], close
383 return self._names[branchidx], close
384 else:
384 else:
385 # rev/node map has changed, invalidate the cache from here up
385 # rev/node map has changed, invalidate the cache from here up
386 truncate = rbcrevidx + _rbcrecsize
386 truncate = rbcrevidx + _rbcrecsize
387 del self._rbcrevs[truncate:]
387 del self._rbcrevs[truncate:]
388 self._rbcrevslen = min(self._rbcrevslen, truncate)
388 self._rbcrevslen = min(self._rbcrevslen, truncate)
389
389
390 # fall back to slow path and make sure it will be written to disk
390 # fall back to slow path and make sure it will be written to disk
391 return self._branchinfo(rev)
391 return self._branchinfo(rev)
392
392
393 def _branchinfo(self, rev):
393 def _branchinfo(self, rev):
394 """Retrieve branch info from changelog and update _rbcrevs"""
394 """Retrieve branch info from changelog and update _rbcrevs"""
395 changelog = self._repo.changelog
395 changelog = self._repo.changelog
396 b, close = changelog.branchinfo(rev)
396 b, close = changelog.branchinfo(rev)
397 if b in self._namesreverse:
397 if b in self._namesreverse:
398 branchidx = self._namesreverse[b]
398 branchidx = self._namesreverse[b]
399 else:
399 else:
400 branchidx = len(self._names)
400 branchidx = len(self._names)
401 self._names.append(b)
401 self._names.append(b)
402 self._namesreverse[b] = branchidx
402 self._namesreverse[b] = branchidx
403 reponode = changelog.node(rev)
403 reponode = changelog.node(rev)
404 if close:
404 if close:
405 branchidx |= _rbccloseflag
405 branchidx |= _rbccloseflag
406 self._setcachedata(rev, reponode, branchidx)
406 self._setcachedata(rev, reponode, branchidx)
407 return b, close
407 return b, close
408
408
409 def _setcachedata(self, rev, node, branchidx):
409 def _setcachedata(self, rev, node, branchidx):
410 """Writes the node's branch data to the in-memory cache data."""
410 """Writes the node's branch data to the in-memory cache data."""
411 rbcrevidx = rev * _rbcrecsize
411 rbcrevidx = rev * _rbcrecsize
412 rec = array('c')
412 rec = array('c')
413 rec.fromstring(pack(_rbcrecfmt, node, branchidx))
413 rec.fromstring(pack(_rbcrecfmt, node, branchidx))
414 self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec
414 self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec
415 self._rbcrevslen = min(self._rbcrevslen, rev)
415 self._rbcrevslen = min(self._rbcrevslen, rev)
416
416
417 def write(self):
417 tr = self._repo.currenttransaction()
418 if tr:
419 tr.addfinalize('write-revbranchcache', self.write)
420
421 def write(self, tr=None):
418 """Save branch cache if it is dirty."""
422 """Save branch cache if it is dirty."""
419 repo = self._repo
423 repo = self._repo
420 if self._rbcnamescount < len(self._names):
424 if self._rbcnamescount < len(self._names):
421 try:
425 try:
422 if self._rbcnamescount != 0:
426 if self._rbcnamescount != 0:
423 f = repo.vfs.open(_rbcnames, 'ab')
427 f = repo.vfs.open(_rbcnames, 'ab')
424 if f.tell() == self._rbcsnameslen:
428 if f.tell() == self._rbcsnameslen:
425 f.write('\0')
429 f.write('\0')
426 else:
430 else:
427 f.close()
431 f.close()
428 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
432 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
429 self._rbcnamescount = 0
433 self._rbcnamescount = 0
430 self._rbcrevslen = 0
434 self._rbcrevslen = 0
431 if self._rbcnamescount == 0:
435 if self._rbcnamescount == 0:
432 f = repo.vfs.open(_rbcnames, 'wb')
436 f = repo.vfs.open(_rbcnames, 'wb')
433 f.write('\0'.join(encoding.fromlocal(b)
437 f.write('\0'.join(encoding.fromlocal(b)
434 for b in self._names[self._rbcnamescount:]))
438 for b in self._names[self._rbcnamescount:]))
435 self._rbcsnameslen = f.tell()
439 self._rbcsnameslen = f.tell()
436 f.close()
440 f.close()
437 except (IOError, OSError, util.Abort), inst:
441 except (IOError, OSError, util.Abort), inst:
438 repo.ui.debug("couldn't write revision branch cache names: "
442 repo.ui.debug("couldn't write revision branch cache names: "
439 "%s\n" % inst)
443 "%s\n" % inst)
440 return
444 return
441 self._rbcnamescount = len(self._names)
445 self._rbcnamescount = len(self._names)
442
446
443 start = self._rbcrevslen * _rbcrecsize
447 start = self._rbcrevslen * _rbcrecsize
444 if start != len(self._rbcrevs):
448 if start != len(self._rbcrevs):
445 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
449 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
446 try:
450 try:
447 f = repo.vfs.open(_rbcrevs, 'ab')
451 f = repo.vfs.open(_rbcrevs, 'ab')
448 if f.tell() != start:
452 if f.tell() != start:
449 repo.ui.debug("truncating %s to %s\n" % (_rbcrevs, start))
453 repo.ui.debug("truncating %s to %s\n" % (_rbcrevs, start))
450 f.seek(start)
454 f.seek(start)
451 f.truncate()
455 f.truncate()
452 end = revs * _rbcrecsize
456 end = revs * _rbcrecsize
453 f.write(self._rbcrevs[start:end])
457 f.write(self._rbcrevs[start:end])
454 f.close()
458 f.close()
455 except (IOError, OSError, util.Abort), inst:
459 except (IOError, OSError, util.Abort), inst:
456 repo.ui.debug("couldn't write revision branch cache: %s\n" %
460 repo.ui.debug("couldn't write revision branch cache: %s\n" %
457 inst)
461 inst)
458 return
462 return
459 self._rbcrevslen = revs
463 self._rbcrevslen = revs
@@ -1,167 +1,168 b''
1 # statichttprepo.py - simple http repository class for mercurial
1 # statichttprepo.py - simple http repository class for mercurial
2 #
2 #
3 # This provides read-only repo access to repositories exported via static http
3 # This provides read-only repo access to repositories exported via static http
4 #
4 #
5 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 from i18n import _
10 from i18n import _
11 import changelog, byterange, url, error, namespaces
11 import changelog, byterange, url, error, namespaces
12 import localrepo, manifest, util, scmutil, store
12 import localrepo, manifest, util, scmutil, store
13 import urllib, urllib2, errno, os
13 import urllib, urllib2, errno, os
14
14
15 class httprangereader(object):
15 class httprangereader(object):
16 def __init__(self, url, opener):
16 def __init__(self, url, opener):
17 # we assume opener has HTTPRangeHandler
17 # we assume opener has HTTPRangeHandler
18 self.url = url
18 self.url = url
19 self.pos = 0
19 self.pos = 0
20 self.opener = opener
20 self.opener = opener
21 self.name = url
21 self.name = url
22 def seek(self, pos):
22 def seek(self, pos):
23 self.pos = pos
23 self.pos = pos
24 def read(self, bytes=None):
24 def read(self, bytes=None):
25 req = urllib2.Request(self.url)
25 req = urllib2.Request(self.url)
26 end = ''
26 end = ''
27 if bytes:
27 if bytes:
28 end = self.pos + bytes - 1
28 end = self.pos + bytes - 1
29 if self.pos or end:
29 if self.pos or end:
30 req.add_header('Range', 'bytes=%d-%s' % (self.pos, end))
30 req.add_header('Range', 'bytes=%d-%s' % (self.pos, end))
31
31
32 try:
32 try:
33 f = self.opener.open(req)
33 f = self.opener.open(req)
34 data = f.read()
34 data = f.read()
35 # Python 2.6+ defines a getcode() function, and 2.4 and
35 # Python 2.6+ defines a getcode() function, and 2.4 and
36 # 2.5 appear to always have an undocumented code attribute
36 # 2.5 appear to always have an undocumented code attribute
37 # set. If we can't read either of those, fall back to 206
37 # set. If we can't read either of those, fall back to 206
38 # and hope for the best.
38 # and hope for the best.
39 code = getattr(f, 'getcode', lambda : getattr(f, 'code', 206))()
39 code = getattr(f, 'getcode', lambda : getattr(f, 'code', 206))()
40 except urllib2.HTTPError, inst:
40 except urllib2.HTTPError, inst:
41 num = inst.code == 404 and errno.ENOENT or None
41 num = inst.code == 404 and errno.ENOENT or None
42 raise IOError(num, inst)
42 raise IOError(num, inst)
43 except urllib2.URLError, inst:
43 except urllib2.URLError, inst:
44 raise IOError(None, inst.reason[1])
44 raise IOError(None, inst.reason[1])
45
45
46 if code == 200:
46 if code == 200:
47 # HTTPRangeHandler does nothing if remote does not support
47 # HTTPRangeHandler does nothing if remote does not support
48 # Range headers and returns the full entity. Let's slice it.
48 # Range headers and returns the full entity. Let's slice it.
49 if bytes:
49 if bytes:
50 data = data[self.pos:self.pos + bytes]
50 data = data[self.pos:self.pos + bytes]
51 else:
51 else:
52 data = data[self.pos:]
52 data = data[self.pos:]
53 elif bytes:
53 elif bytes:
54 data = data[:bytes]
54 data = data[:bytes]
55 self.pos += len(data)
55 self.pos += len(data)
56 return data
56 return data
57 def readlines(self):
57 def readlines(self):
58 return self.read().splitlines(True)
58 return self.read().splitlines(True)
59 def __iter__(self):
59 def __iter__(self):
60 return iter(self.readlines())
60 return iter(self.readlines())
61 def close(self):
61 def close(self):
62 pass
62 pass
63
63
64 def build_opener(ui, authinfo):
64 def build_opener(ui, authinfo):
65 # urllib cannot handle URLs with embedded user or passwd
65 # urllib cannot handle URLs with embedded user or passwd
66 urlopener = url.opener(ui, authinfo)
66 urlopener = url.opener(ui, authinfo)
67 urlopener.add_handler(byterange.HTTPRangeHandler())
67 urlopener.add_handler(byterange.HTTPRangeHandler())
68
68
69 class statichttpvfs(scmutil.abstractvfs):
69 class statichttpvfs(scmutil.abstractvfs):
70 def __init__(self, base):
70 def __init__(self, base):
71 self.base = base
71 self.base = base
72
72
73 def __call__(self, path, mode='r', *args, **kw):
73 def __call__(self, path, mode='r', *args, **kw):
74 if mode not in ('r', 'rb'):
74 if mode not in ('r', 'rb'):
75 raise IOError('Permission denied')
75 raise IOError('Permission denied')
76 f = "/".join((self.base, urllib.quote(path)))
76 f = "/".join((self.base, urllib.quote(path)))
77 return httprangereader(f, urlopener)
77 return httprangereader(f, urlopener)
78
78
79 def join(self, path):
79 def join(self, path):
80 if path:
80 if path:
81 return os.path.join(self.base, path)
81 return os.path.join(self.base, path)
82 else:
82 else:
83 return self.base
83 return self.base
84
84
85 return statichttpvfs
85 return statichttpvfs
86
86
87 class statichttppeer(localrepo.localpeer):
87 class statichttppeer(localrepo.localpeer):
88 def local(self):
88 def local(self):
89 return None
89 return None
90 def canpush(self):
90 def canpush(self):
91 return False
91 return False
92
92
93 class statichttprepository(localrepo.localrepository):
93 class statichttprepository(localrepo.localrepository):
94 supported = localrepo.localrepository._basesupported
94 supported = localrepo.localrepository._basesupported
95
95
96 def __init__(self, ui, path):
96 def __init__(self, ui, path):
97 self._url = path
97 self._url = path
98 self.ui = ui
98 self.ui = ui
99
99
100 self.root = path
100 self.root = path
101 u = util.url(path.rstrip('/') + "/.hg")
101 u = util.url(path.rstrip('/') + "/.hg")
102 self.path, authinfo = u.authinfo()
102 self.path, authinfo = u.authinfo()
103
103
104 opener = build_opener(ui, authinfo)
104 opener = build_opener(ui, authinfo)
105 self.opener = opener(self.path)
105 self.opener = opener(self.path)
106 self.vfs = self.opener
106 self.vfs = self.opener
107 self._phasedefaults = []
107 self._phasedefaults = []
108
108
109 self.names = namespaces.namespaces()
109 self.names = namespaces.namespaces()
110
110
111 try:
111 try:
112 requirements = scmutil.readrequires(self.vfs, self.supported)
112 requirements = scmutil.readrequires(self.vfs, self.supported)
113 except IOError, inst:
113 except IOError, inst:
114 if inst.errno != errno.ENOENT:
114 if inst.errno != errno.ENOENT:
115 raise
115 raise
116 requirements = set()
116 requirements = set()
117
117
118 # check if it is a non-empty old-style repository
118 # check if it is a non-empty old-style repository
119 try:
119 try:
120 fp = self.vfs("00changelog.i")
120 fp = self.vfs("00changelog.i")
121 fp.read(1)
121 fp.read(1)
122 fp.close()
122 fp.close()
123 except IOError, inst:
123 except IOError, inst:
124 if inst.errno != errno.ENOENT:
124 if inst.errno != errno.ENOENT:
125 raise
125 raise
126 # we do not care about empty old-style repositories here
126 # we do not care about empty old-style repositories here
127 msg = _("'%s' does not appear to be an hg repository") % path
127 msg = _("'%s' does not appear to be an hg repository") % path
128 raise error.RepoError(msg)
128 raise error.RepoError(msg)
129
129
130 # setup store
130 # setup store
131 self.store = store.store(requirements, self.path, opener)
131 self.store = store.store(requirements, self.path, opener)
132 self.spath = self.store.path
132 self.spath = self.store.path
133 self.svfs = self.store.opener
133 self.svfs = self.store.opener
134 self.sopener = self.svfs
134 self.sopener = self.svfs
135 self.sjoin = self.store.join
135 self.sjoin = self.store.join
136 self._filecache = {}
136 self._filecache = {}
137 self.requirements = requirements
137 self.requirements = requirements
138
138
139 self.manifest = manifest.manifest(self.svfs)
139 self.manifest = manifest.manifest(self.svfs)
140 self.changelog = changelog.changelog(self.svfs)
140 self.changelog = changelog.changelog(self.svfs)
141 self._tags = None
141 self._tags = None
142 self.nodetagscache = None
142 self.nodetagscache = None
143 self._branchcaches = {}
143 self._branchcaches = {}
144 self._revbranchcache = None
144 self._revbranchcache = None
145 self.encodepats = None
145 self.encodepats = None
146 self.decodepats = None
146 self.decodepats = None
147 self._transref = None
147
148
148 def _restrictcapabilities(self, caps):
149 def _restrictcapabilities(self, caps):
149 caps = super(statichttprepository, self)._restrictcapabilities(caps)
150 caps = super(statichttprepository, self)._restrictcapabilities(caps)
150 return caps.difference(["pushkey"])
151 return caps.difference(["pushkey"])
151
152
152 def url(self):
153 def url(self):
153 return self._url
154 return self._url
154
155
155 def local(self):
156 def local(self):
156 return False
157 return False
157
158
158 def peer(self):
159 def peer(self):
159 return statichttppeer(self)
160 return statichttppeer(self)
160
161
161 def lock(self, wait=True):
162 def lock(self, wait=True):
162 raise util.Abort(_('cannot lock static-http repository'))
163 raise util.Abort(_('cannot lock static-http repository'))
163
164
164 def instance(ui, path, create):
165 def instance(ui, path, create):
165 if create:
166 if create:
166 raise util.Abort(_('cannot create new static-http repository'))
167 raise util.Abort(_('cannot create new static-http repository'))
167 return statichttprepository(ui, path[7:])
168 return statichttprepository(ui, path[7:])
General Comments 0
You need to be logged in to leave comments. Login now