##// END OF EJS Templates
branchmap: add documentation on the branchcache on-disk format
Brodie Rao -
r20181:b9515fb9 default
parent child Browse files
Show More
@@ -1,221 +1,236
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from node import bin, hex, nullid, nullrev
9 9 import encoding
10 10 import util
11 11
12 12 def _filename(repo):
13 13 """name of a branchcache file for a given repo or repoview"""
14 14 filename = "cache/branchheads"
15 15 if repo.filtername:
16 16 filename = '%s-%s' % (filename, repo.filtername)
17 17 return filename
18 18
19 19 def read(repo):
20 20 try:
21 21 f = repo.opener(_filename(repo))
22 22 lines = f.read().split('\n')
23 23 f.close()
24 24 except (IOError, OSError):
25 25 return None
26 26
27 27 try:
28 28 cachekey = lines.pop(0).split(" ", 2)
29 29 last, lrev = cachekey[:2]
30 30 last, lrev = bin(last), int(lrev)
31 31 filteredhash = None
32 32 if len(cachekey) > 2:
33 33 filteredhash = bin(cachekey[2])
34 34 partial = branchcache(tipnode=last, tiprev=lrev,
35 35 filteredhash=filteredhash)
36 36 if not partial.validfor(repo):
37 37 # invalidate the cache
38 38 raise ValueError('tip differs')
39 39 for l in lines:
40 40 if not l:
41 41 continue
42 42 node, label = l.split(" ", 1)
43 43 label = encoding.tolocal(label.strip())
44 44 if not node in repo:
45 45 raise ValueError('node %s does not exist' % node)
46 46 partial.setdefault(label, []).append(bin(node))
47 47 except KeyboardInterrupt:
48 48 raise
49 49 except Exception, inst:
50 50 if repo.ui.debugflag:
51 51 msg = 'invalid branchheads cache'
52 52 if repo.filtername is not None:
53 53 msg += ' (%s)' % repo.filtername
54 54 msg += ': %s\n'
55 55 repo.ui.warn(msg % inst)
56 56 partial = None
57 57 return partial
58 58
59 59
60 60
61 61 ### Nearest subset relation
62 62 # Nearest subset of filter X is a filter Y so that:
63 63 # * Y is included in X,
64 64 # * X - Y is as small as possible.
65 65 # This create and ordering used for branchmap purpose.
66 66 # the ordering may be partial
67 67 subsettable = {None: 'visible',
68 68 'visible': 'served',
69 69 'served': 'immutable',
70 70 'immutable': 'base'}
71 71
72 72 def updatecache(repo):
73 73 cl = repo.changelog
74 74 filtername = repo.filtername
75 75 partial = repo._branchcaches.get(filtername)
76 76
77 77 revs = []
78 78 if partial is None or not partial.validfor(repo):
79 79 partial = read(repo)
80 80 if partial is None:
81 81 subsetname = subsettable.get(filtername)
82 82 if subsetname is None:
83 83 partial = branchcache()
84 84 else:
85 85 subset = repo.filtered(subsetname)
86 86 partial = subset.branchmap().copy()
87 87 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
88 88 revs.extend(r for r in extrarevs if r <= partial.tiprev)
89 89 revs.extend(cl.revs(start=partial.tiprev + 1))
90 90 if revs:
91 91 partial.update(repo, revs)
92 92 partial.write(repo)
93 93 assert partial.validfor(repo), filtername
94 94 repo._branchcaches[repo.filtername] = partial
95 95
96 96 class branchcache(dict):
97 """A dict like object that hold branches heads cache"""
97 """A dict like object that hold branches heads cache.
98
99 This cache is used to avoid costly computations to determine all the
100 branch heads of a repo.
101
102 The cache is serialized on disk in the following format:
103
104 <tip hex node> <tip rev number> [optional filtered repo hex hash]
105 <branch head hex node> <branch name>
106 <branch head hex node> <branch name>
107 ...
108
109 The first line is used to check if the cache is still valid. If the
110 branch cache is for a filtered repo view, an optional third hash is
111 included that hashes the hashes of all filtered revisions.
112 """
98 113
99 114 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
100 115 filteredhash=None):
101 116 super(branchcache, self).__init__(entries)
102 117 self.tipnode = tipnode
103 118 self.tiprev = tiprev
104 119 self.filteredhash = filteredhash
105 120
106 121 def _hashfiltered(self, repo):
107 122 """build hash of revision filtered in the current cache
108 123
109 124 Tracking tipnode and tiprev is not enough to ensure validity of the
110 125 cache as they do not help to distinct cache that ignored various
111 126 revision bellow tiprev.
112 127
113 128 To detect such difference, we build a cache of all ignored revisions.
114 129 """
115 130 cl = repo.changelog
116 131 if not cl.filteredrevs:
117 132 return None
118 133 key = None
119 134 revs = sorted(r for r in cl.filteredrevs if r <= self.tiprev)
120 135 if revs:
121 136 s = util.sha1()
122 137 for rev in revs:
123 138 s.update('%s;' % rev)
124 139 key = s.digest()
125 140 return key
126 141
127 142 def validfor(self, repo):
128 143 """Is the cache content valid regarding a repo
129 144
130 145 - False when cached tipnode is unknown or if we detect a strip.
131 146 - True when cache is up to date or a subset of current repo."""
132 147 try:
133 148 return ((self.tipnode == repo.changelog.node(self.tiprev))
134 149 and (self.filteredhash == self._hashfiltered(repo)))
135 150 except IndexError:
136 151 return False
137 152
138 153 def copy(self):
139 154 """return an deep copy of the branchcache object"""
140 155 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash)
141 156
142 157 def write(self, repo):
143 158 try:
144 159 f = repo.opener(_filename(repo), "w", atomictemp=True)
145 160 cachekey = [hex(self.tipnode), str(self.tiprev)]
146 161 if self.filteredhash is not None:
147 162 cachekey.append(hex(self.filteredhash))
148 163 f.write(" ".join(cachekey) + '\n')
149 164 for label, nodes in sorted(self.iteritems()):
150 165 for node in nodes:
151 166 f.write("%s %s\n" % (hex(node), encoding.fromlocal(label)))
152 167 f.close()
153 168 except (IOError, OSError, util.Abort):
154 169 # Abort may be raise by read only opener
155 170 pass
156 171
157 172 def update(self, repo, revgen):
158 173 """Given a branchhead cache, self, that may have extra nodes or be
159 174 missing heads, and a generator of nodes that are at least a superset of
160 175 heads missing, this function updates self to be correct.
161 176 """
162 177 cl = repo.changelog
163 178 # collect new branch entries
164 179 newbranches = {}
165 180 getbranch = cl.branch
166 181 for r in revgen:
167 182 newbranches.setdefault(getbranch(r), []).append(cl.node(r))
168 183 # if older branchheads are reachable from new ones, they aren't
169 184 # really branchheads. Note checking parents is insufficient:
170 185 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
171 186 for branch, newnodes in newbranches.iteritems():
172 187 bheads = self.setdefault(branch, [])
173 188 # Remove candidate heads that no longer are in the repo (e.g., as
174 189 # the result of a strip that just happened). Avoid using 'node in
175 190 # self' here because that dives down into branchcache code somewhat
176 191 # recursively.
177 192 bheadrevs = [cl.rev(node) for node in bheads
178 193 if cl.hasnode(node)]
179 194 newheadrevs = [cl.rev(node) for node in newnodes
180 195 if cl.hasnode(node)]
181 196 ctxisnew = bheadrevs and min(newheadrevs) > max(bheadrevs)
182 197 # Remove duplicates - nodes that are in newheadrevs and are already
183 198 # in bheadrevs. This can happen if you strip a node whose parent
184 199 # was already a head (because they're on different branches).
185 200 bheadrevs = sorted(set(bheadrevs).union(newheadrevs))
186 201
187 202 # Starting from tip means fewer passes over reachable. If we know
188 203 # the new candidates are not ancestors of existing heads, we don't
189 204 # have to examine ancestors of existing heads
190 205 if ctxisnew:
191 206 iterrevs = sorted(newheadrevs)
192 207 else:
193 208 iterrevs = list(bheadrevs)
194 209
195 210 # This loop prunes out two kinds of heads - heads that are
196 211 # superseded by a head in newheadrevs, and newheadrevs that are not
197 212 # heads because an existing head is their descendant.
198 213 while iterrevs:
199 214 latest = iterrevs.pop()
200 215 if latest not in bheadrevs:
201 216 continue
202 217 ancestors = set(cl.ancestors([latest],
203 218 bheadrevs[0]))
204 219 if ancestors:
205 220 bheadrevs = [b for b in bheadrevs if b not in ancestors]
206 221 self[branch] = [cl.node(rev) for rev in bheadrevs]
207 222 tiprev = max(bheadrevs)
208 223 if tiprev > self.tiprev:
209 224 self.tipnode = cl.node(tiprev)
210 225 self.tiprev = tiprev
211 226
212 227 if not self.validfor(repo):
213 228 # cache key are not valid anymore
214 229 self.tipnode = nullid
215 230 self.tiprev = nullrev
216 231 for heads in self.values():
217 232 tiprev = max(cl.rev(node) for node in heads)
218 233 if tiprev > self.tiprev:
219 234 self.tipnode = cl.node(tiprev)
220 235 self.tiprev = tiprev
221 236 self.filteredhash = self._hashfiltered(repo)
General Comments 0
You need to be logged in to leave comments. Login now