##// END OF EJS Templates
branchmap: add documentation on the branchcache on-disk format
Brodie Rao -
r20181:b9515fb9 default
parent child Browse files
Show More
@@ -1,221 +1,236
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from node import bin, hex, nullid, nullrev
8 from node import bin, hex, nullid, nullrev
9 import encoding
9 import encoding
10 import util
10 import util
11
11
12 def _filename(repo):
12 def _filename(repo):
13 """name of a branchcache file for a given repo or repoview"""
13 """name of a branchcache file for a given repo or repoview"""
14 filename = "cache/branchheads"
14 filename = "cache/branchheads"
15 if repo.filtername:
15 if repo.filtername:
16 filename = '%s-%s' % (filename, repo.filtername)
16 filename = '%s-%s' % (filename, repo.filtername)
17 return filename
17 return filename
18
18
19 def read(repo):
19 def read(repo):
20 try:
20 try:
21 f = repo.opener(_filename(repo))
21 f = repo.opener(_filename(repo))
22 lines = f.read().split('\n')
22 lines = f.read().split('\n')
23 f.close()
23 f.close()
24 except (IOError, OSError):
24 except (IOError, OSError):
25 return None
25 return None
26
26
27 try:
27 try:
28 cachekey = lines.pop(0).split(" ", 2)
28 cachekey = lines.pop(0).split(" ", 2)
29 last, lrev = cachekey[:2]
29 last, lrev = cachekey[:2]
30 last, lrev = bin(last), int(lrev)
30 last, lrev = bin(last), int(lrev)
31 filteredhash = None
31 filteredhash = None
32 if len(cachekey) > 2:
32 if len(cachekey) > 2:
33 filteredhash = bin(cachekey[2])
33 filteredhash = bin(cachekey[2])
34 partial = branchcache(tipnode=last, tiprev=lrev,
34 partial = branchcache(tipnode=last, tiprev=lrev,
35 filteredhash=filteredhash)
35 filteredhash=filteredhash)
36 if not partial.validfor(repo):
36 if not partial.validfor(repo):
37 # invalidate the cache
37 # invalidate the cache
38 raise ValueError('tip differs')
38 raise ValueError('tip differs')
39 for l in lines:
39 for l in lines:
40 if not l:
40 if not l:
41 continue
41 continue
42 node, label = l.split(" ", 1)
42 node, label = l.split(" ", 1)
43 label = encoding.tolocal(label.strip())
43 label = encoding.tolocal(label.strip())
44 if not node in repo:
44 if not node in repo:
45 raise ValueError('node %s does not exist' % node)
45 raise ValueError('node %s does not exist' % node)
46 partial.setdefault(label, []).append(bin(node))
46 partial.setdefault(label, []).append(bin(node))
47 except KeyboardInterrupt:
47 except KeyboardInterrupt:
48 raise
48 raise
49 except Exception, inst:
49 except Exception, inst:
50 if repo.ui.debugflag:
50 if repo.ui.debugflag:
51 msg = 'invalid branchheads cache'
51 msg = 'invalid branchheads cache'
52 if repo.filtername is not None:
52 if repo.filtername is not None:
53 msg += ' (%s)' % repo.filtername
53 msg += ' (%s)' % repo.filtername
54 msg += ': %s\n'
54 msg += ': %s\n'
55 repo.ui.warn(msg % inst)
55 repo.ui.warn(msg % inst)
56 partial = None
56 partial = None
57 return partial
57 return partial
58
58
59
59
60
60
61 ### Nearest subset relation
61 ### Nearest subset relation
62 # Nearest subset of filter X is a filter Y so that:
62 # Nearest subset of filter X is a filter Y so that:
63 # * Y is included in X,
63 # * Y is included in X,
64 # * X - Y is as small as possible.
64 # * X - Y is as small as possible.
65 # This create and ordering used for branchmap purpose.
65 # This create and ordering used for branchmap purpose.
66 # the ordering may be partial
66 # the ordering may be partial
67 subsettable = {None: 'visible',
67 subsettable = {None: 'visible',
68 'visible': 'served',
68 'visible': 'served',
69 'served': 'immutable',
69 'served': 'immutable',
70 'immutable': 'base'}
70 'immutable': 'base'}
71
71
72 def updatecache(repo):
72 def updatecache(repo):
73 cl = repo.changelog
73 cl = repo.changelog
74 filtername = repo.filtername
74 filtername = repo.filtername
75 partial = repo._branchcaches.get(filtername)
75 partial = repo._branchcaches.get(filtername)
76
76
77 revs = []
77 revs = []
78 if partial is None or not partial.validfor(repo):
78 if partial is None or not partial.validfor(repo):
79 partial = read(repo)
79 partial = read(repo)
80 if partial is None:
80 if partial is None:
81 subsetname = subsettable.get(filtername)
81 subsetname = subsettable.get(filtername)
82 if subsetname is None:
82 if subsetname is None:
83 partial = branchcache()
83 partial = branchcache()
84 else:
84 else:
85 subset = repo.filtered(subsetname)
85 subset = repo.filtered(subsetname)
86 partial = subset.branchmap().copy()
86 partial = subset.branchmap().copy()
87 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
87 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
88 revs.extend(r for r in extrarevs if r <= partial.tiprev)
88 revs.extend(r for r in extrarevs if r <= partial.tiprev)
89 revs.extend(cl.revs(start=partial.tiprev + 1))
89 revs.extend(cl.revs(start=partial.tiprev + 1))
90 if revs:
90 if revs:
91 partial.update(repo, revs)
91 partial.update(repo, revs)
92 partial.write(repo)
92 partial.write(repo)
93 assert partial.validfor(repo), filtername
93 assert partial.validfor(repo), filtername
94 repo._branchcaches[repo.filtername] = partial
94 repo._branchcaches[repo.filtername] = partial
95
95
96 class branchcache(dict):
96 class branchcache(dict):
97 """A dict like object that hold branches heads cache"""
97 """A dict like object that hold branches heads cache.
98
99 This cache is used to avoid costly computations to determine all the
100 branch heads of a repo.
101
102 The cache is serialized on disk in the following format:
103
104 <tip hex node> <tip rev number> [optional filtered repo hex hash]
105 <branch head hex node> <branch name>
106 <branch head hex node> <branch name>
107 ...
108
109 The first line is used to check if the cache is still valid. If the
110 branch cache is for a filtered repo view, an optional third hash is
111 included that hashes the hashes of all filtered revisions.
112 """
98
113
99 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
114 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
100 filteredhash=None):
115 filteredhash=None):
101 super(branchcache, self).__init__(entries)
116 super(branchcache, self).__init__(entries)
102 self.tipnode = tipnode
117 self.tipnode = tipnode
103 self.tiprev = tiprev
118 self.tiprev = tiprev
104 self.filteredhash = filteredhash
119 self.filteredhash = filteredhash
105
120
106 def _hashfiltered(self, repo):
121 def _hashfiltered(self, repo):
107 """build hash of revision filtered in the current cache
122 """build hash of revision filtered in the current cache
108
123
109 Tracking tipnode and tiprev is not enough to ensure validity of the
124 Tracking tipnode and tiprev is not enough to ensure validity of the
110 cache as they do not help to distinct cache that ignored various
125 cache as they do not help to distinct cache that ignored various
111 revision bellow tiprev.
126 revision bellow tiprev.
112
127
113 To detect such difference, we build a cache of all ignored revisions.
128 To detect such difference, we build a cache of all ignored revisions.
114 """
129 """
115 cl = repo.changelog
130 cl = repo.changelog
116 if not cl.filteredrevs:
131 if not cl.filteredrevs:
117 return None
132 return None
118 key = None
133 key = None
119 revs = sorted(r for r in cl.filteredrevs if r <= self.tiprev)
134 revs = sorted(r for r in cl.filteredrevs if r <= self.tiprev)
120 if revs:
135 if revs:
121 s = util.sha1()
136 s = util.sha1()
122 for rev in revs:
137 for rev in revs:
123 s.update('%s;' % rev)
138 s.update('%s;' % rev)
124 key = s.digest()
139 key = s.digest()
125 return key
140 return key
126
141
127 def validfor(self, repo):
142 def validfor(self, repo):
128 """Is the cache content valid regarding a repo
143 """Is the cache content valid regarding a repo
129
144
130 - False when cached tipnode is unknown or if we detect a strip.
145 - False when cached tipnode is unknown or if we detect a strip.
131 - True when cache is up to date or a subset of current repo."""
146 - True when cache is up to date or a subset of current repo."""
132 try:
147 try:
133 return ((self.tipnode == repo.changelog.node(self.tiprev))
148 return ((self.tipnode == repo.changelog.node(self.tiprev))
134 and (self.filteredhash == self._hashfiltered(repo)))
149 and (self.filteredhash == self._hashfiltered(repo)))
135 except IndexError:
150 except IndexError:
136 return False
151 return False
137
152
138 def copy(self):
153 def copy(self):
139 """return an deep copy of the branchcache object"""
154 """return an deep copy of the branchcache object"""
140 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash)
155 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash)
141
156
142 def write(self, repo):
157 def write(self, repo):
143 try:
158 try:
144 f = repo.opener(_filename(repo), "w", atomictemp=True)
159 f = repo.opener(_filename(repo), "w", atomictemp=True)
145 cachekey = [hex(self.tipnode), str(self.tiprev)]
160 cachekey = [hex(self.tipnode), str(self.tiprev)]
146 if self.filteredhash is not None:
161 if self.filteredhash is not None:
147 cachekey.append(hex(self.filteredhash))
162 cachekey.append(hex(self.filteredhash))
148 f.write(" ".join(cachekey) + '\n')
163 f.write(" ".join(cachekey) + '\n')
149 for label, nodes in sorted(self.iteritems()):
164 for label, nodes in sorted(self.iteritems()):
150 for node in nodes:
165 for node in nodes:
151 f.write("%s %s\n" % (hex(node), encoding.fromlocal(label)))
166 f.write("%s %s\n" % (hex(node), encoding.fromlocal(label)))
152 f.close()
167 f.close()
153 except (IOError, OSError, util.Abort):
168 except (IOError, OSError, util.Abort):
154 # Abort may be raise by read only opener
169 # Abort may be raise by read only opener
155 pass
170 pass
156
171
157 def update(self, repo, revgen):
172 def update(self, repo, revgen):
158 """Given a branchhead cache, self, that may have extra nodes or be
173 """Given a branchhead cache, self, that may have extra nodes or be
159 missing heads, and a generator of nodes that are at least a superset of
174 missing heads, and a generator of nodes that are at least a superset of
160 heads missing, this function updates self to be correct.
175 heads missing, this function updates self to be correct.
161 """
176 """
162 cl = repo.changelog
177 cl = repo.changelog
163 # collect new branch entries
178 # collect new branch entries
164 newbranches = {}
179 newbranches = {}
165 getbranch = cl.branch
180 getbranch = cl.branch
166 for r in revgen:
181 for r in revgen:
167 newbranches.setdefault(getbranch(r), []).append(cl.node(r))
182 newbranches.setdefault(getbranch(r), []).append(cl.node(r))
168 # if older branchheads are reachable from new ones, they aren't
183 # if older branchheads are reachable from new ones, they aren't
169 # really branchheads. Note checking parents is insufficient:
184 # really branchheads. Note checking parents is insufficient:
170 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
185 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
171 for branch, newnodes in newbranches.iteritems():
186 for branch, newnodes in newbranches.iteritems():
172 bheads = self.setdefault(branch, [])
187 bheads = self.setdefault(branch, [])
173 # Remove candidate heads that no longer are in the repo (e.g., as
188 # Remove candidate heads that no longer are in the repo (e.g., as
174 # the result of a strip that just happened). Avoid using 'node in
189 # the result of a strip that just happened). Avoid using 'node in
175 # self' here because that dives down into branchcache code somewhat
190 # self' here because that dives down into branchcache code somewhat
176 # recursively.
191 # recursively.
177 bheadrevs = [cl.rev(node) for node in bheads
192 bheadrevs = [cl.rev(node) for node in bheads
178 if cl.hasnode(node)]
193 if cl.hasnode(node)]
179 newheadrevs = [cl.rev(node) for node in newnodes
194 newheadrevs = [cl.rev(node) for node in newnodes
180 if cl.hasnode(node)]
195 if cl.hasnode(node)]
181 ctxisnew = bheadrevs and min(newheadrevs) > max(bheadrevs)
196 ctxisnew = bheadrevs and min(newheadrevs) > max(bheadrevs)
182 # Remove duplicates - nodes that are in newheadrevs and are already
197 # Remove duplicates - nodes that are in newheadrevs and are already
183 # in bheadrevs. This can happen if you strip a node whose parent
198 # in bheadrevs. This can happen if you strip a node whose parent
184 # was already a head (because they're on different branches).
199 # was already a head (because they're on different branches).
185 bheadrevs = sorted(set(bheadrevs).union(newheadrevs))
200 bheadrevs = sorted(set(bheadrevs).union(newheadrevs))
186
201
187 # Starting from tip means fewer passes over reachable. If we know
202 # Starting from tip means fewer passes over reachable. If we know
188 # the new candidates are not ancestors of existing heads, we don't
203 # the new candidates are not ancestors of existing heads, we don't
189 # have to examine ancestors of existing heads
204 # have to examine ancestors of existing heads
190 if ctxisnew:
205 if ctxisnew:
191 iterrevs = sorted(newheadrevs)
206 iterrevs = sorted(newheadrevs)
192 else:
207 else:
193 iterrevs = list(bheadrevs)
208 iterrevs = list(bheadrevs)
194
209
195 # This loop prunes out two kinds of heads - heads that are
210 # This loop prunes out two kinds of heads - heads that are
196 # superseded by a head in newheadrevs, and newheadrevs that are not
211 # superseded by a head in newheadrevs, and newheadrevs that are not
197 # heads because an existing head is their descendant.
212 # heads because an existing head is their descendant.
198 while iterrevs:
213 while iterrevs:
199 latest = iterrevs.pop()
214 latest = iterrevs.pop()
200 if latest not in bheadrevs:
215 if latest not in bheadrevs:
201 continue
216 continue
202 ancestors = set(cl.ancestors([latest],
217 ancestors = set(cl.ancestors([latest],
203 bheadrevs[0]))
218 bheadrevs[0]))
204 if ancestors:
219 if ancestors:
205 bheadrevs = [b for b in bheadrevs if b not in ancestors]
220 bheadrevs = [b for b in bheadrevs if b not in ancestors]
206 self[branch] = [cl.node(rev) for rev in bheadrevs]
221 self[branch] = [cl.node(rev) for rev in bheadrevs]
207 tiprev = max(bheadrevs)
222 tiprev = max(bheadrevs)
208 if tiprev > self.tiprev:
223 if tiprev > self.tiprev:
209 self.tipnode = cl.node(tiprev)
224 self.tipnode = cl.node(tiprev)
210 self.tiprev = tiprev
225 self.tiprev = tiprev
211
226
212 if not self.validfor(repo):
227 if not self.validfor(repo):
213 # cache key are not valid anymore
228 # cache key are not valid anymore
214 self.tipnode = nullid
229 self.tipnode = nullid
215 self.tiprev = nullrev
230 self.tiprev = nullrev
216 for heads in self.values():
231 for heads in self.values():
217 tiprev = max(cl.rev(node) for node in heads)
232 tiprev = max(cl.rev(node) for node in heads)
218 if tiprev > self.tiprev:
233 if tiprev > self.tiprev:
219 self.tipnode = cl.node(tiprev)
234 self.tipnode = cl.node(tiprev)
220 self.tiprev = tiprev
235 self.tiprev = tiprev
221 self.filteredhash = self._hashfiltered(repo)
236 self.filteredhash = self._hashfiltered(repo)
General Comments 0
You need to be logged in to leave comments. Login now