##// END OF EJS Templates
merge with crew
Matt Mackall -
r18990:7373be70 merge default
parent child Browse files
Show More
@@ -1,264 +1,386 b''
1 1 # ancestor.py - generic DAG ancestor algorithm for mercurial
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import heapq, util
9 9 from node import nullrev
10 10
11 def ancestor(a, b, pfunc):
11 def ancestors(pfunc, *orignodes):
12 """
13 Returns the common ancestors of a and b that are furthest from a
14 root (as measured by longest path).
15
16 pfunc must return a list of parent vertices for a given vertex.
17 """
18 if not isinstance(orignodes, set):
19 orignodes = set(orignodes)
20 if nullrev in orignodes:
21 return set()
22 if len(orignodes) <= 1:
23 return orignodes
24
25 def candidates(nodes):
26 allseen = (1 << len(nodes)) - 1
27 seen = [0] * (max(nodes) + 1)
28 for i, n in enumerate(nodes):
29 seen[n] = 1 << i
30 poison = 1 << (i + 1)
31
32 gca = set()
33 interesting = left = len(nodes)
34 nv = len(seen) - 1
35 while nv >= 0 and interesting:
36 v = nv
37 nv -= 1
38 if not seen[v]:
39 continue
40 sv = seen[v]
41 if sv < poison:
42 interesting -= 1
43 if sv == allseen:
44 gca.add(v)
45 sv |= poison
46 if v in nodes:
47 left -= 1
48 if left <= 1:
49 # history is linear
50 return set([v])
51 if sv < poison:
52 for p in pfunc(v):
53 sp = seen[p]
54 if p == nullrev:
55 continue
56 if sp == 0:
57 seen[p] = sv
58 interesting += 1
59 elif sp != sv:
60 seen[p] |= sv
61 else:
62 for p in pfunc(v):
63 if p == nullrev:
64 continue
65 sp = seen[p]
66 if sp and sp < poison:
67 interesting -= 1
68 seen[p] = sv
69 return gca
70
71 def deepest(nodes):
72 interesting = {}
73 count = max(nodes) + 1
74 depth = [0] * count
75 seen = [0] * count
76 mapping = []
77 for (i, n) in enumerate(sorted(nodes)):
78 depth[n] = 1
79 b = 1 << i
80 seen[n] = b
81 interesting[b] = 1
82 mapping.append((b, n))
83 nv = count - 1
84 while nv >= 0 and len(interesting) > 1:
85 v = nv
86 nv -= 1
87 dv = depth[v]
88 if dv == 0:
89 continue
90 sv = seen[v]
91 for p in pfunc(v):
92 if p == nullrev:
93 continue
94 dp = depth[p]
95 nsp = sp = seen[p]
96 if dp <= dv:
97 depth[p] = dv + 1
98 if sp != sv:
99 interesting[sv] += 1
100 nsp = seen[p] = sv
101 if sp:
102 interesting[sp] -= 1
103 if interesting[sp] == 0:
104 del interesting[sp]
105 elif dv == dp - 1:
106 nsp = sp | sv
107 if nsp == sp:
108 continue
109 seen[p] = nsp
110 interesting.setdefault(nsp, 0)
111 interesting[nsp] += 1
112 interesting[sp] -= 1
113 if interesting[sp] == 0:
114 del interesting[sp]
115 interesting[sv] -= 1
116 if interesting[sv] == 0:
117 del interesting[sv]
118
119 if len(interesting) != 1:
120 return []
121
122 k = 0
123 for i in interesting:
124 k |= i
125 return set(n for (i, n) in mapping if k & i)
126
127 gca = candidates(orignodes)
128
129 if len(gca) <= 1:
130 return gca
131 return deepest(gca)
132
133 def genericancestor(a, b, pfunc):
12 134 """
13 135 Returns the common ancestor of a and b that is furthest from a
14 136 root (as measured by longest path) or None if no ancestor is
15 137 found. If there are multiple common ancestors at the same
16 138 distance, the first one found is returned.
17 139
18 140 pfunc must return a list of parent vertices for a given vertex
19 141 """
20 142
21 143 if a == b:
22 144 return a
23 145
24 146 a, b = sorted([a, b])
25 147
26 148 # find depth from root of all ancestors
27 149 # depth is stored as a negative for heapq
28 150 parentcache = {}
29 151 visit = [a, b]
30 152 depth = {}
31 153 while visit:
32 154 vertex = visit[-1]
33 pl = pfunc(vertex)
155 pl = [p for p in pfunc(vertex) if p != nullrev]
34 156 parentcache[vertex] = pl
35 157 if not pl:
36 158 depth[vertex] = 0
37 159 visit.pop()
38 160 else:
39 161 for p in pl:
40 162 if p == a or p == b: # did we find a or b as a parent?
41 163 return p # we're done
42 164 if p not in depth:
43 165 visit.append(p)
44 166 if visit[-1] == vertex:
45 167 # -(maximum distance of parents + 1)
46 168 depth[vertex] = min([depth[p] for p in pl]) - 1
47 169 visit.pop()
48 170
49 171 # traverse ancestors in order of decreasing distance from root
50 172 def ancestors(vertex):
51 173 h = [(depth[vertex], vertex)]
52 174 seen = set()
53 175 while h:
54 176 d, n = heapq.heappop(h)
55 177 if n not in seen:
56 178 seen.add(n)
57 179 yield (d, n)
58 180 for p in parentcache[n]:
59 181 heapq.heappush(h, (depth[p], p))
60 182
61 183 def generations(vertex):
62 184 sg, s = None, set()
63 185 for g, v in ancestors(vertex):
64 186 if g != sg:
65 187 if sg:
66 188 yield sg, s
67 189 sg, s = g, set((v,))
68 190 else:
69 191 s.add(v)
70 192 yield sg, s
71 193
72 194 x = generations(a)
73 195 y = generations(b)
74 196 gx = x.next()
75 197 gy = y.next()
76 198
77 199 # increment each ancestor list until it is closer to root than
78 200 # the other, or they match
79 201 try:
80 202 while True:
81 203 if gx[0] == gy[0]:
82 204 for v in gx[1]:
83 205 if v in gy[1]:
84 206 return v
85 207 gy = y.next()
86 208 gx = x.next()
87 209 elif gx[0] > gy[0]:
88 210 gy = y.next()
89 211 else:
90 212 gx = x.next()
91 213 except StopIteration:
92 214 return None
93 215
94 216 def missingancestors(revs, bases, pfunc):
95 217 """Return all the ancestors of revs that are not ancestors of bases.
96 218
97 219 This may include elements from revs.
98 220
99 221 Equivalent to the revset (::revs - ::bases). Revs are returned in
100 222 revision number order, which is a topological order.
101 223
102 224 revs and bases should both be iterables. pfunc must return a list of
103 225 parent revs for a given revs.
104 226 """
105 227
106 228 revsvisit = set(revs)
107 229 basesvisit = set(bases)
108 230 if not revsvisit:
109 231 return []
110 232 if not basesvisit:
111 233 basesvisit.add(nullrev)
112 234 start = max(max(revsvisit), max(basesvisit))
113 235 bothvisit = revsvisit.intersection(basesvisit)
114 236 revsvisit.difference_update(bothvisit)
115 237 basesvisit.difference_update(bothvisit)
116 238 # At this point, we hold the invariants that:
117 239 # - revsvisit is the set of nodes we know are an ancestor of at least one
118 240 # of the nodes in revs
119 241 # - basesvisit is the same for bases
120 242 # - bothvisit is the set of nodes we know are ancestors of at least one of
121 243 # the nodes in revs and one of the nodes in bases
122 244 # - a node may be in none or one, but not more, of revsvisit, basesvisit
123 245 # and bothvisit at any given time
124 246 # Now we walk down in reverse topo order, adding parents of nodes already
125 247 # visited to the sets while maintaining the invariants. When a node is
126 248 # found in both revsvisit and basesvisit, it is removed from them and
127 249 # added to bothvisit instead. When revsvisit becomes empty, there are no
128 250 # more ancestors of revs that aren't also ancestors of bases, so exit.
129 251
130 252 missing = []
131 253 for curr in xrange(start, nullrev, -1):
132 254 if not revsvisit:
133 255 break
134 256
135 257 if curr in bothvisit:
136 258 bothvisit.remove(curr)
137 259 # curr's parents might have made it into revsvisit or basesvisit
138 260 # through another path
139 261 for p in pfunc(curr):
140 262 revsvisit.discard(p)
141 263 basesvisit.discard(p)
142 264 bothvisit.add(p)
143 265 continue
144 266
145 267 # curr will never be in both revsvisit and basesvisit, since if it
146 268 # were it'd have been pushed to bothvisit
147 269 if curr in revsvisit:
148 270 missing.append(curr)
149 271 thisvisit = revsvisit
150 272 othervisit = basesvisit
151 273 elif curr in basesvisit:
152 274 thisvisit = basesvisit
153 275 othervisit = revsvisit
154 276 else:
155 277 # not an ancestor of revs or bases: ignore
156 278 continue
157 279
158 280 thisvisit.remove(curr)
159 281 for p in pfunc(curr):
160 282 if p == nullrev:
161 283 pass
162 284 elif p in othervisit or p in bothvisit:
163 285 # p is implicitly in thisvisit. This means p is or should be
164 286 # in bothvisit
165 287 revsvisit.discard(p)
166 288 basesvisit.discard(p)
167 289 bothvisit.add(p)
168 290 else:
169 291 # visit later
170 292 thisvisit.add(p)
171 293
172 294 missing.reverse()
173 295 return missing
174 296
175 297 class lazyancestors(object):
176 298 def __init__(self, cl, revs, stoprev=0, inclusive=False):
177 299 """Create a new object generating ancestors for the given revs. Does
178 300 not generate revs lower than stoprev.
179 301
180 302 This is computed lazily starting from revs. The object supports
181 303 iteration and membership.
182 304
183 305 cl should be a changelog and revs should be an iterable. inclusive is
184 306 a boolean that indicates whether revs should be included. Revs lower
185 307 than stoprev will not be generated.
186 308
187 309 Result does not include the null revision."""
188 310 self._parentrevs = cl.parentrevs
189 311 self._initrevs = revs
190 312 self._stoprev = stoprev
191 313 self._inclusive = inclusive
192 314
193 315 # Initialize data structures for __contains__.
194 316 # For __contains__, we use a heap rather than a deque because
195 317 # (a) it minimizes the number of parentrevs calls made
196 318 # (b) it makes the loop termination condition obvious
197 319 # Python's heap is a min-heap. Multiply all values by -1 to convert it
198 320 # into a max-heap.
199 321 self._containsvisit = [-rev for rev in revs]
200 322 heapq.heapify(self._containsvisit)
201 323 if inclusive:
202 324 self._containsseen = set(revs)
203 325 else:
204 326 self._containsseen = set()
205 327
206 328 def __iter__(self):
207 329 """Generate the ancestors of _initrevs in reverse topological order.
208 330
209 331 If inclusive is False, yield a sequence of revision numbers starting
210 332 with the parents of each revision in revs, i.e., each revision is *not*
211 333 considered an ancestor of itself. Results are in breadth-first order:
212 334 parents of each rev in revs, then parents of those, etc.
213 335
214 336 If inclusive is True, yield all the revs first (ignoring stoprev),
215 337 then yield all the ancestors of revs as when inclusive is False.
216 338 If an element in revs is an ancestor of a different rev it is not
217 339 yielded again."""
218 340 seen = set()
219 341 revs = self._initrevs
220 342 if self._inclusive:
221 343 for rev in revs:
222 344 yield rev
223 345 seen.update(revs)
224 346
225 347 parentrevs = self._parentrevs
226 348 stoprev = self._stoprev
227 349 visit = util.deque(revs)
228 350
229 351 while visit:
230 352 for parent in parentrevs(visit.popleft()):
231 353 if parent >= stoprev and parent not in seen:
232 354 visit.append(parent)
233 355 seen.add(parent)
234 356 yield parent
235 357
236 358 def __contains__(self, target):
237 359 """Test whether target is an ancestor of self._initrevs."""
238 360 # Trying to do both __iter__ and __contains__ using the same visit
239 361 # heap and seen set is complex enough that it slows down both. Keep
240 362 # them separate.
241 363 seen = self._containsseen
242 364 if target in seen:
243 365 return True
244 366
245 367 parentrevs = self._parentrevs
246 368 visit = self._containsvisit
247 369 stoprev = self._stoprev
248 370 heappop = heapq.heappop
249 371 heappush = heapq.heappush
250 372
251 373 targetseen = False
252 374
253 375 while visit and -visit[0] > target and not targetseen:
254 376 for parent in parentrevs(-heappop(visit)):
255 377 if parent < stoprev or parent in seen:
256 378 continue
257 379 # We need to make sure we push all parents into the heap so
258 380 # that we leave it in a consistent state for future calls.
259 381 heappush(visit, -parent)
260 382 seen.add(parent)
261 383 if parent == target:
262 384 targetseen = True
263 385
264 386 return targetseen
@@ -1,1371 +1,1371 b''
1 1 # context.py - changeset and file context objects for mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from node import nullid, nullrev, short, hex, bin
9 9 from i18n import _
10 10 import ancestor, mdiff, error, util, scmutil, subrepo, patch, encoding, phases
11 11 import copies
12 12 import match as matchmod
13 13 import os, errno, stat
14 14 import obsolete as obsmod
15 15 import repoview
16 16
17 17 propertycache = util.propertycache
18 18
19 19 class changectx(object):
20 20 """A changecontext object makes access to data related to a particular
21 21 changeset convenient."""
22 22 def __init__(self, repo, changeid=''):
23 23 """changeid is a revision number, node, or tag"""
24 24 if changeid == '':
25 25 changeid = '.'
26 26 self._repo = repo
27 27
28 28 if isinstance(changeid, int):
29 29 try:
30 30 self._node = repo.changelog.node(changeid)
31 31 except IndexError:
32 32 raise error.RepoLookupError(
33 33 _("unknown revision '%s'") % changeid)
34 34 self._rev = changeid
35 35 return
36 36 if isinstance(changeid, long):
37 37 changeid = str(changeid)
38 38 if changeid == '.':
39 39 self._node = repo.dirstate.p1()
40 40 self._rev = repo.changelog.rev(self._node)
41 41 return
42 42 if changeid == 'null':
43 43 self._node = nullid
44 44 self._rev = nullrev
45 45 return
46 46 if changeid == 'tip':
47 47 self._node = repo.changelog.tip()
48 48 self._rev = repo.changelog.rev(self._node)
49 49 return
50 50 if len(changeid) == 20:
51 51 try:
52 52 self._node = changeid
53 53 self._rev = repo.changelog.rev(changeid)
54 54 return
55 55 except LookupError:
56 56 pass
57 57
58 58 try:
59 59 r = int(changeid)
60 60 if str(r) != changeid:
61 61 raise ValueError
62 62 l = len(repo.changelog)
63 63 if r < 0:
64 64 r += l
65 65 if r < 0 or r >= l:
66 66 raise ValueError
67 67 self._rev = r
68 68 self._node = repo.changelog.node(r)
69 69 return
70 70 except (ValueError, OverflowError, IndexError):
71 71 pass
72 72
73 73 if len(changeid) == 40:
74 74 try:
75 75 self._node = bin(changeid)
76 76 self._rev = repo.changelog.rev(self._node)
77 77 return
78 78 except (TypeError, LookupError):
79 79 pass
80 80
81 81 if changeid in repo._bookmarks:
82 82 self._node = repo._bookmarks[changeid]
83 83 self._rev = repo.changelog.rev(self._node)
84 84 return
85 85 if changeid in repo._tagscache.tags:
86 86 self._node = repo._tagscache.tags[changeid]
87 87 self._rev = repo.changelog.rev(self._node)
88 88 return
89 89 try:
90 90 self._node = repo.branchtip(changeid)
91 91 self._rev = repo.changelog.rev(self._node)
92 92 return
93 93 except error.RepoLookupError:
94 94 pass
95 95
96 96 self._node = repo.changelog._partialmatch(changeid)
97 97 if self._node is not None:
98 98 self._rev = repo.changelog.rev(self._node)
99 99 return
100 100
101 101 # lookup failed
102 102 # check if it might have come from damaged dirstate
103 103 #
104 104 # XXX we could avoid the unfiltered if we had a recognizable exception
105 105 # for filtered changeset access
106 106 if changeid in repo.unfiltered().dirstate.parents():
107 107 raise error.Abort(_("working directory has unknown parent '%s'!")
108 108 % short(changeid))
109 109 try:
110 110 if len(changeid) == 20:
111 111 changeid = hex(changeid)
112 112 except TypeError:
113 113 pass
114 114 raise error.RepoLookupError(
115 115 _("unknown revision '%s'") % changeid)
116 116
117 117 def __str__(self):
118 118 return short(self.node())
119 119
120 120 def __int__(self):
121 121 return self.rev()
122 122
123 123 def __repr__(self):
124 124 return "<changectx %s>" % str(self)
125 125
126 126 def __hash__(self):
127 127 try:
128 128 return hash(self._rev)
129 129 except AttributeError:
130 130 return id(self)
131 131
132 132 def __eq__(self, other):
133 133 try:
134 134 return self._rev == other._rev
135 135 except AttributeError:
136 136 return False
137 137
138 138 def __ne__(self, other):
139 139 return not (self == other)
140 140
141 141 def __nonzero__(self):
142 142 return self._rev != nullrev
143 143
144 144 @propertycache
145 145 def _changeset(self):
146 146 return self._repo.changelog.read(self.rev())
147 147
148 148 @propertycache
149 149 def _manifest(self):
150 150 return self._repo.manifest.read(self._changeset[0])
151 151
152 152 @propertycache
153 153 def _manifestdelta(self):
154 154 return self._repo.manifest.readdelta(self._changeset[0])
155 155
156 156 @propertycache
157 157 def _parents(self):
158 158 p = self._repo.changelog.parentrevs(self._rev)
159 159 if p[1] == nullrev:
160 160 p = p[:-1]
161 161 return [changectx(self._repo, x) for x in p]
162 162
163 163 @propertycache
164 164 def substate(self):
165 165 return subrepo.state(self, self._repo.ui)
166 166
167 167 def __contains__(self, key):
168 168 return key in self._manifest
169 169
170 170 def __getitem__(self, key):
171 171 return self.filectx(key)
172 172
173 173 def __iter__(self):
174 174 for f in sorted(self._manifest):
175 175 yield f
176 176
177 177 def changeset(self):
178 178 return self._changeset
179 179 def manifest(self):
180 180 return self._manifest
181 181 def manifestnode(self):
182 182 return self._changeset[0]
183 183
184 184 def rev(self):
185 185 return self._rev
186 186 def node(self):
187 187 return self._node
188 188 def hex(self):
189 189 return hex(self._node)
190 190 def user(self):
191 191 return self._changeset[1]
192 192 def date(self):
193 193 return self._changeset[2]
194 194 def files(self):
195 195 return self._changeset[3]
196 196 def description(self):
197 197 return self._changeset[4]
198 198 def branch(self):
199 199 return encoding.tolocal(self._changeset[5].get("branch"))
200 200 def closesbranch(self):
201 201 return 'close' in self._changeset[5]
202 202 def extra(self):
203 203 return self._changeset[5]
204 204 def tags(self):
205 205 return self._repo.nodetags(self._node)
206 206 def bookmarks(self):
207 207 return self._repo.nodebookmarks(self._node)
208 208 def phase(self):
209 209 return self._repo._phasecache.phase(self._repo, self._rev)
210 210 def phasestr(self):
211 211 return phases.phasenames[self.phase()]
212 212 def mutable(self):
213 213 return self.phase() > phases.public
214 214 def hidden(self):
215 215 return self._rev in repoview.filterrevs(self._repo, 'visible')
216 216
217 217 def parents(self):
218 218 """return contexts for each parent changeset"""
219 219 return self._parents
220 220
221 221 def p1(self):
222 222 return self._parents[0]
223 223
224 224 def p2(self):
225 225 if len(self._parents) == 2:
226 226 return self._parents[1]
227 227 return changectx(self._repo, -1)
228 228
229 229 def children(self):
230 230 """return contexts for each child changeset"""
231 231 c = self._repo.changelog.children(self._node)
232 232 return [changectx(self._repo, x) for x in c]
233 233
234 234 def ancestors(self):
235 235 for a in self._repo.changelog.ancestors([self._rev]):
236 236 yield changectx(self._repo, a)
237 237
238 238 def descendants(self):
239 239 for d in self._repo.changelog.descendants([self._rev]):
240 240 yield changectx(self._repo, d)
241 241
242 242 def obsolete(self):
243 243 """True if the changeset is obsolete"""
244 244 return self.rev() in obsmod.getrevs(self._repo, 'obsolete')
245 245
246 246 def extinct(self):
247 247 """True if the changeset is extinct"""
248 248 return self.rev() in obsmod.getrevs(self._repo, 'extinct')
249 249
250 250 def unstable(self):
251 251 """True if the changeset is not obsolete but it's ancestor are"""
252 252 return self.rev() in obsmod.getrevs(self._repo, 'unstable')
253 253
254 254 def bumped(self):
255 255 """True if the changeset try to be a successor of a public changeset
256 256
257 257 Only non-public and non-obsolete changesets may be bumped.
258 258 """
259 259 return self.rev() in obsmod.getrevs(self._repo, 'bumped')
260 260
261 261 def divergent(self):
262 262 """Is a successors of a changeset with multiple possible successors set
263 263
264 264 Only non-public and non-obsolete changesets may be divergent.
265 265 """
266 266 return self.rev() in obsmod.getrevs(self._repo, 'divergent')
267 267
268 268 def troubled(self):
269 269 """True if the changeset is either unstable, bumped or divergent"""
270 270 return self.unstable() or self.bumped() or self.divergent()
271 271
272 272 def troubles(self):
273 273 """return the list of troubles affecting this changesets.
274 274
275 275 Troubles are returned as strings. possible values are:
276 276 - unstable,
277 277 - bumped,
278 278 - divergent.
279 279 """
280 280 troubles = []
281 281 if self.unstable():
282 282 troubles.append('unstable')
283 283 if self.bumped():
284 284 troubles.append('bumped')
285 285 if self.divergent():
286 286 troubles.append('divergent')
287 287 return troubles
288 288
289 289 def _fileinfo(self, path):
290 290 if '_manifest' in self.__dict__:
291 291 try:
292 292 return self._manifest[path], self._manifest.flags(path)
293 293 except KeyError:
294 294 raise error.ManifestLookupError(self._node, path,
295 295 _('not found in manifest'))
296 296 if '_manifestdelta' in self.__dict__ or path in self.files():
297 297 if path in self._manifestdelta:
298 298 return (self._manifestdelta[path],
299 299 self._manifestdelta.flags(path))
300 300 node, flag = self._repo.manifest.find(self._changeset[0], path)
301 301 if not node:
302 302 raise error.ManifestLookupError(self._node, path,
303 303 _('not found in manifest'))
304 304
305 305 return node, flag
306 306
307 307 def filenode(self, path):
308 308 return self._fileinfo(path)[0]
309 309
310 310 def flags(self, path):
311 311 try:
312 312 return self._fileinfo(path)[1]
313 313 except error.LookupError:
314 314 return ''
315 315
316 316 def filectx(self, path, fileid=None, filelog=None):
317 317 """get a file context from this changeset"""
318 318 if fileid is None:
319 319 fileid = self.filenode(path)
320 320 return filectx(self._repo, path, fileid=fileid,
321 321 changectx=self, filelog=filelog)
322 322
323 323 def ancestor(self, c2):
324 324 """
325 325 return the ancestor context of self and c2
326 326 """
327 327 # deal with workingctxs
328 328 n2 = c2._node
329 329 if n2 is None:
330 330 n2 = c2._parents[0]._node
331 331 n = self._repo.changelog.ancestor(self._node, n2)
332 332 return changectx(self._repo, n)
333 333
334 334 def descendant(self, other):
335 335 """True if other is descendant of this changeset"""
336 336 return self._repo.changelog.descendant(self._rev, other._rev)
337 337
338 338 def walk(self, match):
339 339 fset = set(match.files())
340 340 # for dirstate.walk, files=['.'] means "walk the whole tree".
341 341 # follow that here, too
342 342 fset.discard('.')
343 343 for fn in self:
344 344 if fn in fset:
345 345 # specified pattern is the exact name
346 346 fset.remove(fn)
347 347 if match(fn):
348 348 yield fn
349 349 for fn in sorted(fset):
350 350 if fn in self._dirs:
351 351 # specified pattern is a directory
352 352 continue
353 353 if match.bad(fn, _('no such file in rev %s') % self) and match(fn):
354 354 yield fn
355 355
356 356 def sub(self, path):
357 357 return subrepo.subrepo(self, path)
358 358
359 359 def match(self, pats=[], include=None, exclude=None, default='glob'):
360 360 r = self._repo
361 361 return matchmod.match(r.root, r.getcwd(), pats,
362 362 include, exclude, default,
363 363 auditor=r.auditor, ctx=self)
364 364
365 365 def diff(self, ctx2=None, match=None, **opts):
366 366 """Returns a diff generator for the given contexts and matcher"""
367 367 if ctx2 is None:
368 368 ctx2 = self.p1()
369 369 if ctx2 is not None and not isinstance(ctx2, changectx):
370 370 ctx2 = self._repo[ctx2]
371 371 diffopts = patch.diffopts(self._repo.ui, opts)
372 372 return patch.diff(self._repo, ctx2.node(), self.node(),
373 373 match=match, opts=diffopts)
374 374
375 375 @propertycache
376 376 def _dirs(self):
377 377 return scmutil.dirs(self._manifest)
378 378
379 379 def dirs(self):
380 380 return self._dirs
381 381
382 382 def dirty(self):
383 383 return False
384 384
385 385 class filectx(object):
386 386 """A filecontext object makes access to data related to a particular
387 387 filerevision convenient."""
388 388 def __init__(self, repo, path, changeid=None, fileid=None,
389 389 filelog=None, changectx=None):
390 390 """changeid can be a changeset revision, node, or tag.
391 391 fileid can be a file revision or node."""
392 392 self._repo = repo
393 393 self._path = path
394 394
395 395 assert (changeid is not None
396 396 or fileid is not None
397 397 or changectx is not None), \
398 398 ("bad args: changeid=%r, fileid=%r, changectx=%r"
399 399 % (changeid, fileid, changectx))
400 400
401 401 if filelog:
402 402 self._filelog = filelog
403 403
404 404 if changeid is not None:
405 405 self._changeid = changeid
406 406 if changectx is not None:
407 407 self._changectx = changectx
408 408 if fileid is not None:
409 409 self._fileid = fileid
410 410
411 411 @propertycache
412 412 def _changectx(self):
413 413 try:
414 414 return changectx(self._repo, self._changeid)
415 415 except error.RepoLookupError:
416 416 # Linkrev may point to any revision in the repository. When the
417 417 # repository is filtered this may lead to `filectx` trying to build
418 418 # `changectx` for filtered revision. In such case we fallback to
419 419 # creating `changectx` on the unfiltered version of the reposition.
420 420 # This fallback should not be an issue because `changectx` from
421 421 # `filectx` are not used in complex operations that care about
422 422 # filtering.
423 423 #
424 424 # This fallback is a cheap and dirty fix that prevent several
425 425 # crashes. It does not ensure the behavior is correct. However the
426 426 # behavior was not correct before filtering either and "incorrect
427 427 # behavior" is seen as better as "crash"
428 428 #
429 429 # Linkrevs have several serious troubles with filtering that are
430 430 # complicated to solve. Proper handling of the issue here should be
431 431 # considered when solving linkrev issue are on the table.
432 432 return changectx(self._repo.unfiltered(), self._changeid)
433 433
434 434 @propertycache
435 435 def _filelog(self):
436 436 return self._repo.file(self._path)
437 437
438 438 @propertycache
439 439 def _changeid(self):
440 440 if '_changectx' in self.__dict__:
441 441 return self._changectx.rev()
442 442 else:
443 443 return self._filelog.linkrev(self._filerev)
444 444
445 445 @propertycache
446 446 def _filenode(self):
447 447 if '_fileid' in self.__dict__:
448 448 return self._filelog.lookup(self._fileid)
449 449 else:
450 450 return self._changectx.filenode(self._path)
451 451
452 452 @propertycache
453 453 def _filerev(self):
454 454 return self._filelog.rev(self._filenode)
455 455
456 456 @propertycache
457 457 def _repopath(self):
458 458 return self._path
459 459
460 460 def __nonzero__(self):
461 461 try:
462 462 self._filenode
463 463 return True
464 464 except error.LookupError:
465 465 # file is missing
466 466 return False
467 467
468 468 def __str__(self):
469 469 return "%s@%s" % (self.path(), short(self.node()))
470 470
471 471 def __repr__(self):
472 472 return "<filectx %s>" % str(self)
473 473
474 474 def __hash__(self):
475 475 try:
476 476 return hash((self._path, self._filenode))
477 477 except AttributeError:
478 478 return id(self)
479 479
480 480 def __eq__(self, other):
481 481 try:
482 482 return (self._path == other._path
483 483 and self._filenode == other._filenode)
484 484 except AttributeError:
485 485 return False
486 486
487 487 def __ne__(self, other):
488 488 return not (self == other)
489 489
490 490 def filectx(self, fileid):
491 491 '''opens an arbitrary revision of the file without
492 492 opening a new filelog'''
493 493 return filectx(self._repo, self._path, fileid=fileid,
494 494 filelog=self._filelog)
495 495
496 496 def filerev(self):
497 497 return self._filerev
498 498 def filenode(self):
499 499 return self._filenode
500 500 def flags(self):
501 501 return self._changectx.flags(self._path)
502 502 def filelog(self):
503 503 return self._filelog
504 504
505 505 def rev(self):
506 506 if '_changectx' in self.__dict__:
507 507 return self._changectx.rev()
508 508 if '_changeid' in self.__dict__:
509 509 return self._changectx.rev()
510 510 return self._filelog.linkrev(self._filerev)
511 511
512 512 def linkrev(self):
513 513 return self._filelog.linkrev(self._filerev)
514 514 def node(self):
515 515 return self._changectx.node()
516 516 def hex(self):
517 517 return hex(self.node())
518 518 def user(self):
519 519 return self._changectx.user()
520 520 def date(self):
521 521 return self._changectx.date()
522 522 def files(self):
523 523 return self._changectx.files()
524 524 def description(self):
525 525 return self._changectx.description()
526 526 def branch(self):
527 527 return self._changectx.branch()
528 528 def extra(self):
529 529 return self._changectx.extra()
530 530 def phase(self):
531 531 return self._changectx.phase()
532 532 def phasestr(self):
533 533 return self._changectx.phasestr()
534 534 def manifest(self):
535 535 return self._changectx.manifest()
536 536 def changectx(self):
537 537 return self._changectx
538 538
539 539 def data(self):
540 540 return self._filelog.read(self._filenode)
541 541 def path(self):
542 542 return self._path
543 543 def size(self):
544 544 return self._filelog.size(self._filerev)
545 545
546 546 def isbinary(self):
547 547 try:
548 548 return util.binary(self.data())
549 549 except IOError:
550 550 return False
551 551
552 552 def cmp(self, fctx):
553 553 """compare with other file context
554 554
555 555 returns True if different than fctx.
556 556 """
557 557 if (fctx._filerev is None
558 558 and (self._repo._encodefilterpats
559 559 # if file data starts with '\1\n', empty metadata block is
560 560 # prepended, which adds 4 bytes to filelog.size().
561 561 or self.size() - 4 == fctx.size())
562 562 or self.size() == fctx.size()):
563 563 return self._filelog.cmp(self._filenode, fctx.data())
564 564
565 565 return True
566 566
567 567 def renamed(self):
568 568 """check if file was actually renamed in this changeset revision
569 569
570 570 If rename logged in file revision, we report copy for changeset only
571 571 if file revisions linkrev points back to the changeset in question
572 572 or both changeset parents contain different file revisions.
573 573 """
574 574
575 575 renamed = self._filelog.renamed(self._filenode)
576 576 if not renamed:
577 577 return renamed
578 578
579 579 if self.rev() == self.linkrev():
580 580 return renamed
581 581
582 582 name = self.path()
583 583 fnode = self._filenode
584 584 for p in self._changectx.parents():
585 585 try:
586 586 if fnode == p.filenode(name):
587 587 return None
588 588 except error.LookupError:
589 589 pass
590 590 return renamed
591 591
592 592 def parents(self):
593 593 p = self._path
594 594 fl = self._filelog
595 595 pl = [(p, n, fl) for n in self._filelog.parents(self._filenode)]
596 596
597 597 r = self._filelog.renamed(self._filenode)
598 598 if r:
599 599 pl[0] = (r[0], r[1], None)
600 600
601 601 return [filectx(self._repo, p, fileid=n, filelog=l)
602 602 for p, n, l in pl if n != nullid]
603 603
604 604 def p1(self):
605 605 return self.parents()[0]
606 606
607 607 def p2(self):
608 608 p = self.parents()
609 609 if len(p) == 2:
610 610 return p[1]
611 611 return filectx(self._repo, self._path, fileid=-1, filelog=self._filelog)
612 612
613 613 def children(self):
614 614 # hard for renames
615 615 c = self._filelog.children(self._filenode)
616 616 return [filectx(self._repo, self._path, fileid=x,
617 617 filelog=self._filelog) for x in c]
618 618
619 619 def annotate(self, follow=False, linenumber=None, diffopts=None):
620 620 '''returns a list of tuples of (ctx, line) for each line
621 621 in the file, where ctx is the filectx of the node where
622 622 that line was last changed.
623 623 This returns tuples of ((ctx, linenumber), line) for each line,
624 624 if "linenumber" parameter is NOT "None".
625 625 In such tuples, linenumber means one at the first appearance
626 626 in the managed file.
627 627 To reduce annotation cost,
628 628 this returns fixed value(False is used) as linenumber,
629 629 if "linenumber" parameter is "False".'''
630 630
631 631 def decorate_compat(text, rev):
632 632 return ([rev] * len(text.splitlines()), text)
633 633
634 634 def without_linenumber(text, rev):
635 635 return ([(rev, False)] * len(text.splitlines()), text)
636 636
637 637 def with_linenumber(text, rev):
638 638 size = len(text.splitlines())
639 639 return ([(rev, i) for i in xrange(1, size + 1)], text)
640 640
641 641 decorate = (((linenumber is None) and decorate_compat) or
642 642 (linenumber and with_linenumber) or
643 643 without_linenumber)
644 644
645 645 def pair(parent, child):
646 646 blocks = mdiff.allblocks(parent[1], child[1], opts=diffopts,
647 647 refine=True)
648 648 for (a1, a2, b1, b2), t in blocks:
649 649 # Changed blocks ('!') or blocks made only of blank lines ('~')
650 650 # belong to the child.
651 651 if t == '=':
652 652 child[0][b1:b2] = parent[0][a1:a2]
653 653 return child
654 654
655 655 getlog = util.lrucachefunc(lambda x: self._repo.file(x))
656 656 def getctx(path, fileid):
657 657 log = path == self._path and self._filelog or getlog(path)
658 658 return filectx(self._repo, path, fileid=fileid, filelog=log)
659 659 getctx = util.lrucachefunc(getctx)
660 660
661 661 def parents(f):
662 662 # we want to reuse filectx objects as much as possible
663 663 p = f._path
664 664 if f._filerev is None: # working dir
665 665 pl = [(n.path(), n.filerev()) for n in f.parents()]
666 666 else:
667 667 pl = [(p, n) for n in f._filelog.parentrevs(f._filerev)]
668 668
669 669 if follow:
670 670 r = f.renamed()
671 671 if r:
672 672 pl[0] = (r[0], getlog(r[0]).rev(r[1]))
673 673
674 674 return [getctx(p, n) for p, n in pl if n != nullrev]
675 675
676 676 # use linkrev to find the first changeset where self appeared
677 677 if self.rev() != self.linkrev():
678 678 base = self.filectx(self.filerev())
679 679 else:
680 680 base = self
681 681
682 682 # This algorithm would prefer to be recursive, but Python is a
683 683 # bit recursion-hostile. Instead we do an iterative
684 684 # depth-first search.
685 685
686 686 visit = [base]
687 687 hist = {}
688 688 pcache = {}
689 689 needed = {base: 1}
690 690 while visit:
691 691 f = visit[-1]
692 692 if f not in pcache:
693 693 pcache[f] = parents(f)
694 694
695 695 ready = True
696 696 pl = pcache[f]
697 697 for p in pl:
698 698 if p not in hist:
699 699 ready = False
700 700 visit.append(p)
701 701 needed[p] = needed.get(p, 0) + 1
702 702 if ready:
703 703 visit.pop()
704 704 curr = decorate(f.data(), f)
705 705 for p in pl:
706 706 curr = pair(hist[p], curr)
707 707 if needed[p] == 1:
708 708 del hist[p]
709 709 else:
710 710 needed[p] -= 1
711 711
712 712 hist[f] = curr
713 713 pcache[f] = []
714 714
715 715 return zip(hist[base][0], hist[base][1].splitlines(True))
716 716
717 717 def ancestor(self, fc2, actx):
718 718 """
719 719 find the common ancestor file context, if any, of self, and fc2
720 720
721 721 actx must be the changectx of the common ancestor
722 722 of self's and fc2's respective changesets.
723 723 """
724 724
725 725 # the easy case: no (relevant) renames
726 726 if fc2.path() == self.path() and self.path() in actx:
727 727 return actx[self.path()]
728 728
729 729 # the next easiest cases: unambiguous predecessor (name trumps
730 730 # history)
731 731 if self.path() in actx and fc2.path() not in actx:
732 732 return actx[self.path()]
733 733 if fc2.path() in actx and self.path() not in actx:
734 734 return actx[fc2.path()]
735 735
736 736 # prime the ancestor cache for the working directory
737 737 acache = {}
738 738 for c in (self, fc2):
739 739 if c._filerev is None:
740 740 pl = [(n.path(), n.filenode()) for n in c.parents()]
741 741 acache[(c._path, None)] = pl
742 742
743 743 flcache = {self._repopath:self._filelog, fc2._repopath:fc2._filelog}
744 744 def parents(vertex):
745 745 if vertex in acache:
746 746 return acache[vertex]
747 747 f, n = vertex
748 748 if f not in flcache:
749 749 flcache[f] = self._repo.file(f)
750 750 fl = flcache[f]
751 751 pl = [(f, p) for p in fl.parents(n) if p != nullid]
752 752 re = fl.renamed(n)
753 753 if re:
754 754 pl.append(re)
755 755 acache[vertex] = pl
756 756 return pl
757 757
758 758 a, b = (self._path, self._filenode), (fc2._path, fc2._filenode)
759 v = ancestor.ancestor(a, b, parents)
759 v = ancestor.genericancestor(a, b, parents)
760 760 if v:
761 761 f, n = v
762 762 return filectx(self._repo, f, fileid=n, filelog=flcache[f])
763 763
764 764 return None
765 765
766 766 def ancestors(self, followfirst=False):
767 767 visit = {}
768 768 c = self
769 769 cut = followfirst and 1 or None
770 770 while True:
771 771 for parent in c.parents()[:cut]:
772 772 visit[(parent.rev(), parent.node())] = parent
773 773 if not visit:
774 774 break
775 775 c = visit.pop(max(visit))
776 776 yield c
777 777
778 778 def copies(self, c2):
779 779 if not util.safehasattr(self, "_copycache"):
780 780 self._copycache = {}
781 781 sc2 = str(c2)
782 782 if sc2 not in self._copycache:
783 783 self._copycache[sc2] = copies.pathcopies(c2)
784 784 return self._copycache[sc2]
785 785
786 786 class workingctx(changectx):
787 787 """A workingctx object makes access to data related to
788 788 the current working directory convenient.
789 789 date - any valid date string or (unixtime, offset), or None.
790 790 user - username string, or None.
791 791 extra - a dictionary of extra values, or None.
792 792 changes - a list of file lists as returned by localrepo.status()
793 793 or None to use the repository status.
794 794 """
795 795 def __init__(self, repo, text="", user=None, date=None, extra=None,
796 796 changes=None):
797 797 self._repo = repo
798 798 self._rev = None
799 799 self._node = None
800 800 self._text = text
801 801 if date:
802 802 self._date = util.parsedate(date)
803 803 if user:
804 804 self._user = user
805 805 if changes:
806 806 self._status = list(changes[:4])
807 807 self._unknown = changes[4]
808 808 self._ignored = changes[5]
809 809 self._clean = changes[6]
810 810 else:
811 811 self._unknown = None
812 812 self._ignored = None
813 813 self._clean = None
814 814
815 815 self._extra = {}
816 816 if extra:
817 817 self._extra = extra.copy()
818 818 if 'branch' not in self._extra:
819 819 try:
820 820 branch = encoding.fromlocal(self._repo.dirstate.branch())
821 821 except UnicodeDecodeError:
822 822 raise util.Abort(_('branch name not in UTF-8!'))
823 823 self._extra['branch'] = branch
824 824 if self._extra['branch'] == '':
825 825 self._extra['branch'] = 'default'
826 826
827 827 def __str__(self):
828 828 return str(self._parents[0]) + "+"
829 829
830 830 def __repr__(self):
831 831 return "<workingctx %s>" % str(self)
832 832
833 833 def __nonzero__(self):
834 834 return True
835 835
836 836 def __contains__(self, key):
837 837 return self._repo.dirstate[key] not in "?r"
838 838
839 839 def _buildflagfunc(self):
840 840 # Create a fallback function for getting file flags when the
841 841 # filesystem doesn't support them
842 842
843 843 copiesget = self._repo.dirstate.copies().get
844 844
845 845 if len(self._parents) < 2:
846 846 # when we have one parent, it's easy: copy from parent
847 847 man = self._parents[0].manifest()
848 848 def func(f):
849 849 f = copiesget(f, f)
850 850 return man.flags(f)
851 851 else:
852 852 # merges are tricky: we try to reconstruct the unstored
853 853 # result from the merge (issue1802)
854 854 p1, p2 = self._parents
855 855 pa = p1.ancestor(p2)
856 856 m1, m2, ma = p1.manifest(), p2.manifest(), pa.manifest()
857 857
858 858 def func(f):
859 859 f = copiesget(f, f) # may be wrong for merges with copies
860 860 fl1, fl2, fla = m1.flags(f), m2.flags(f), ma.flags(f)
861 861 if fl1 == fl2:
862 862 return fl1
863 863 if fl1 == fla:
864 864 return fl2
865 865 if fl2 == fla:
866 866 return fl1
867 867 return '' # punt for conflicts
868 868
869 869 return func
870 870
871 871 @propertycache
872 872 def _flagfunc(self):
873 873 return self._repo.dirstate.flagfunc(self._buildflagfunc)
874 874
875 875 @propertycache
876 876 def _manifest(self):
877 877 """generate a manifest corresponding to the working directory"""
878 878
879 879 man = self._parents[0].manifest().copy()
880 880 if len(self._parents) > 1:
881 881 man2 = self.p2().manifest()
882 882 def getman(f):
883 883 if f in man:
884 884 return man
885 885 return man2
886 886 else:
887 887 getman = lambda f: man
888 888
889 889 copied = self._repo.dirstate.copies()
890 890 ff = self._flagfunc
891 891 modified, added, removed, deleted = self._status
892 892 for i, l in (("a", added), ("m", modified)):
893 893 for f in l:
894 894 orig = copied.get(f, f)
895 895 man[f] = getman(orig).get(orig, nullid) + i
896 896 try:
897 897 man.set(f, ff(f))
898 898 except OSError:
899 899 pass
900 900
901 901 for f in deleted + removed:
902 902 if f in man:
903 903 del man[f]
904 904
905 905 return man
906 906
907 907 def __iter__(self):
908 908 d = self._repo.dirstate
909 909 for f in d:
910 910 if d[f] != 'r':
911 911 yield f
912 912
913 913 @propertycache
914 914 def _status(self):
915 915 return self._repo.status()[:4]
916 916
917 917 @propertycache
918 918 def _user(self):
919 919 return self._repo.ui.username()
920 920
921 921 @propertycache
922 922 def _date(self):
923 923 return util.makedate()
924 924
925 925 @propertycache
926 926 def _parents(self):
927 927 p = self._repo.dirstate.parents()
928 928 if p[1] == nullid:
929 929 p = p[:-1]
930 930 return [changectx(self._repo, x) for x in p]
931 931
932 932 def status(self, ignored=False, clean=False, unknown=False):
933 933 """Explicit status query
934 934 Unless this method is used to query the working copy status, the
935 935 _status property will implicitly read the status using its default
936 936 arguments."""
937 937 stat = self._repo.status(ignored=ignored, clean=clean, unknown=unknown)
938 938 self._unknown = self._ignored = self._clean = None
939 939 if unknown:
940 940 self._unknown = stat[4]
941 941 if ignored:
942 942 self._ignored = stat[5]
943 943 if clean:
944 944 self._clean = stat[6]
945 945 self._status = stat[:4]
946 946 return stat
947 947
948 948 def manifest(self):
949 949 return self._manifest
950 950 def user(self):
951 951 return self._user or self._repo.ui.username()
952 952 def date(self):
953 953 return self._date
954 954 def description(self):
955 955 return self._text
956 956 def files(self):
957 957 return sorted(self._status[0] + self._status[1] + self._status[2])
958 958
959 959 def modified(self):
960 960 return self._status[0]
961 961 def added(self):
962 962 return self._status[1]
963 963 def removed(self):
964 964 return self._status[2]
965 965 def deleted(self):
966 966 return self._status[3]
967 967 def unknown(self):
968 968 assert self._unknown is not None # must call status first
969 969 return self._unknown
970 970 def ignored(self):
971 971 assert self._ignored is not None # must call status first
972 972 return self._ignored
973 973 def clean(self):
974 974 assert self._clean is not None # must call status first
975 975 return self._clean
976 976 def branch(self):
977 977 return encoding.tolocal(self._extra['branch'])
978 978 def closesbranch(self):
979 979 return 'close' in self._extra
980 980 def extra(self):
981 981 return self._extra
982 982
983 983 def tags(self):
984 984 t = []
985 985 for p in self.parents():
986 986 t.extend(p.tags())
987 987 return t
988 988
989 989 def bookmarks(self):
990 990 b = []
991 991 for p in self.parents():
992 992 b.extend(p.bookmarks())
993 993 return b
994 994
995 995 def phase(self):
996 996 phase = phases.draft # default phase to draft
997 997 for p in self.parents():
998 998 phase = max(phase, p.phase())
999 999 return phase
1000 1000
1001 1001 def hidden(self):
1002 1002 return False
1003 1003
1004 1004 def children(self):
1005 1005 return []
1006 1006
1007 1007 def flags(self, path):
1008 1008 if '_manifest' in self.__dict__:
1009 1009 try:
1010 1010 return self._manifest.flags(path)
1011 1011 except KeyError:
1012 1012 return ''
1013 1013
1014 1014 try:
1015 1015 return self._flagfunc(path)
1016 1016 except OSError:
1017 1017 return ''
1018 1018
1019 1019 def filectx(self, path, filelog=None):
1020 1020 """get a file context from the working directory"""
1021 1021 return workingfilectx(self._repo, path, workingctx=self,
1022 1022 filelog=filelog)
1023 1023
1024 1024 def ancestor(self, c2):
1025 1025 """return the ancestor context of self and c2"""
1026 1026 return self._parents[0].ancestor(c2) # punt on two parents for now
1027 1027
1028 1028 def walk(self, match):
1029 1029 return sorted(self._repo.dirstate.walk(match, sorted(self.substate),
1030 1030 True, False))
1031 1031
1032 1032 def dirty(self, missing=False, merge=True, branch=True):
1033 1033 "check whether a working directory is modified"
1034 1034 # check subrepos first
1035 1035 for s in sorted(self.substate):
1036 1036 if self.sub(s).dirty():
1037 1037 return True
1038 1038 # check current working dir
1039 1039 return ((merge and self.p2()) or
1040 1040 (branch and self.branch() != self.p1().branch()) or
1041 1041 self.modified() or self.added() or self.removed() or
1042 1042 (missing and self.deleted()))
1043 1043
1044 1044 def add(self, list, prefix=""):
1045 1045 join = lambda f: os.path.join(prefix, f)
1046 1046 wlock = self._repo.wlock()
1047 1047 ui, ds = self._repo.ui, self._repo.dirstate
1048 1048 try:
1049 1049 rejected = []
1050 1050 for f in list:
1051 1051 scmutil.checkportable(ui, join(f))
1052 1052 p = self._repo.wjoin(f)
1053 1053 try:
1054 1054 st = os.lstat(p)
1055 1055 except OSError:
1056 1056 ui.warn(_("%s does not exist!\n") % join(f))
1057 1057 rejected.append(f)
1058 1058 continue
1059 1059 if st.st_size > 10000000:
1060 1060 ui.warn(_("%s: up to %d MB of RAM may be required "
1061 1061 "to manage this file\n"
1062 1062 "(use 'hg revert %s' to cancel the "
1063 1063 "pending addition)\n")
1064 1064 % (f, 3 * st.st_size // 1000000, join(f)))
1065 1065 if not (stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode)):
1066 1066 ui.warn(_("%s not added: only files and symlinks "
1067 1067 "supported currently\n") % join(f))
1068 1068 rejected.append(p)
1069 1069 elif ds[f] in 'amn':
1070 1070 ui.warn(_("%s already tracked!\n") % join(f))
1071 1071 elif ds[f] == 'r':
1072 1072 ds.normallookup(f)
1073 1073 else:
1074 1074 ds.add(f)
1075 1075 return rejected
1076 1076 finally:
1077 1077 wlock.release()
1078 1078
1079 1079 def forget(self, files, prefix=""):
1080 1080 join = lambda f: os.path.join(prefix, f)
1081 1081 wlock = self._repo.wlock()
1082 1082 try:
1083 1083 rejected = []
1084 1084 for f in files:
1085 1085 if f not in self._repo.dirstate:
1086 1086 self._repo.ui.warn(_("%s not tracked!\n") % join(f))
1087 1087 rejected.append(f)
1088 1088 elif self._repo.dirstate[f] != 'a':
1089 1089 self._repo.dirstate.remove(f)
1090 1090 else:
1091 1091 self._repo.dirstate.drop(f)
1092 1092 return rejected
1093 1093 finally:
1094 1094 wlock.release()
1095 1095
1096 1096 def ancestors(self):
1097 1097 for a in self._repo.changelog.ancestors(
1098 1098 [p.rev() for p in self._parents]):
1099 1099 yield changectx(self._repo, a)
1100 1100
1101 1101 def undelete(self, list):
1102 1102 pctxs = self.parents()
1103 1103 wlock = self._repo.wlock()
1104 1104 try:
1105 1105 for f in list:
1106 1106 if self._repo.dirstate[f] != 'r':
1107 1107 self._repo.ui.warn(_("%s not removed!\n") % f)
1108 1108 else:
1109 1109 fctx = f in pctxs[0] and pctxs[0][f] or pctxs[1][f]
1110 1110 t = fctx.data()
1111 1111 self._repo.wwrite(f, t, fctx.flags())
1112 1112 self._repo.dirstate.normal(f)
1113 1113 finally:
1114 1114 wlock.release()
1115 1115
1116 1116 def copy(self, source, dest):
1117 1117 p = self._repo.wjoin(dest)
1118 1118 if not os.path.lexists(p):
1119 1119 self._repo.ui.warn(_("%s does not exist!\n") % dest)
1120 1120 elif not (os.path.isfile(p) or os.path.islink(p)):
1121 1121 self._repo.ui.warn(_("copy failed: %s is not a file or a "
1122 1122 "symbolic link\n") % dest)
1123 1123 else:
1124 1124 wlock = self._repo.wlock()
1125 1125 try:
1126 1126 if self._repo.dirstate[dest] in '?r':
1127 1127 self._repo.dirstate.add(dest)
1128 1128 self._repo.dirstate.copy(source, dest)
1129 1129 finally:
1130 1130 wlock.release()
1131 1131
1132 1132 def markcommitted(self, node):
1133 1133 """Perform post-commit cleanup necessary after committing this ctx
1134 1134
1135 1135 Specifically, this updates backing stores this working context
1136 1136 wraps to reflect the fact that the changes reflected by this
1137 1137 workingctx have been committed. For example, it marks
1138 1138 modified and added files as normal in the dirstate.
1139 1139
1140 1140 """
1141 1141
1142 1142 for f in self.modified() + self.added():
1143 1143 self._repo.dirstate.normal(f)
1144 1144 for f in self.removed():
1145 1145 self._repo.dirstate.drop(f)
1146 1146 self._repo.dirstate.setparents(node)
1147 1147
1148 1148 def dirs(self):
1149 1149 return self._repo.dirstate.dirs()
1150 1150
1151 1151 class workingfilectx(filectx):
1152 1152 """A workingfilectx object makes access to data related to a particular
1153 1153 file in the working directory convenient."""
1154 1154 def __init__(self, repo, path, filelog=None, workingctx=None):
1155 1155 """changeid can be a changeset revision, node, or tag.
1156 1156 fileid can be a file revision or node."""
1157 1157 self._repo = repo
1158 1158 self._path = path
1159 1159 self._changeid = None
1160 1160 self._filerev = self._filenode = None
1161 1161
1162 1162 if filelog:
1163 1163 self._filelog = filelog
1164 1164 if workingctx:
1165 1165 self._changectx = workingctx
1166 1166
1167 1167 @propertycache
1168 1168 def _changectx(self):
1169 1169 return workingctx(self._repo)
1170 1170
1171 1171 def __nonzero__(self):
1172 1172 return True
1173 1173
1174 1174 def __str__(self):
1175 1175 return "%s@%s" % (self.path(), self._changectx)
1176 1176
1177 1177 def __repr__(self):
1178 1178 return "<workingfilectx %s>" % str(self)
1179 1179
1180 1180 def data(self):
1181 1181 return self._repo.wread(self._path)
1182 1182 def renamed(self):
1183 1183 rp = self._repo.dirstate.copied(self._path)
1184 1184 if not rp:
1185 1185 return None
1186 1186 return rp, self._changectx._parents[0]._manifest.get(rp, nullid)
1187 1187
1188 1188 def parents(self):
1189 1189 '''return parent filectxs, following copies if necessary'''
1190 1190 def filenode(ctx, path):
1191 1191 return ctx._manifest.get(path, nullid)
1192 1192
1193 1193 path = self._path
1194 1194 fl = self._filelog
1195 1195 pcl = self._changectx._parents
1196 1196 renamed = self.renamed()
1197 1197
1198 1198 if renamed:
1199 1199 pl = [renamed + (None,)]
1200 1200 else:
1201 1201 pl = [(path, filenode(pcl[0], path), fl)]
1202 1202
1203 1203 for pc in pcl[1:]:
1204 1204 pl.append((path, filenode(pc, path), fl))
1205 1205
1206 1206 return [filectx(self._repo, p, fileid=n, filelog=l)
1207 1207 for p, n, l in pl if n != nullid]
1208 1208
1209 1209 def children(self):
1210 1210 return []
1211 1211
1212 1212 def size(self):
1213 1213 return os.lstat(self._repo.wjoin(self._path)).st_size
1214 1214 def date(self):
1215 1215 t, tz = self._changectx.date()
1216 1216 try:
1217 1217 return (int(os.lstat(self._repo.wjoin(self._path)).st_mtime), tz)
1218 1218 except OSError, err:
1219 1219 if err.errno != errno.ENOENT:
1220 1220 raise
1221 1221 return (t, tz)
1222 1222
1223 1223 def cmp(self, fctx):
1224 1224 """compare with other file context
1225 1225
1226 1226 returns True if different than fctx.
1227 1227 """
1228 1228 # fctx should be a filectx (not a workingfilectx)
1229 1229 # invert comparison to reuse the same code path
1230 1230 return fctx.cmp(self)
1231 1231
1232 1232 class memctx(object):
1233 1233 """Use memctx to perform in-memory commits via localrepo.commitctx().
1234 1234
1235 1235 Revision information is supplied at initialization time while
1236 1236 related files data and is made available through a callback
1237 1237 mechanism. 'repo' is the current localrepo, 'parents' is a
1238 1238 sequence of two parent revisions identifiers (pass None for every
1239 1239 missing parent), 'text' is the commit message and 'files' lists
1240 1240 names of files touched by the revision (normalized and relative to
1241 1241 repository root).
1242 1242
1243 1243 filectxfn(repo, memctx, path) is a callable receiving the
1244 1244 repository, the current memctx object and the normalized path of
1245 1245 requested file, relative to repository root. It is fired by the
1246 1246 commit function for every file in 'files', but calls order is
1247 1247 undefined. If the file is available in the revision being
1248 1248 committed (updated or added), filectxfn returns a memfilectx
1249 1249 object. If the file was removed, filectxfn raises an
1250 1250 IOError. Moved files are represented by marking the source file
1251 1251 removed and the new file added with copy information (see
1252 1252 memfilectx).
1253 1253
1254 1254 user receives the committer name and defaults to current
1255 1255 repository username, date is the commit date in any format
1256 1256 supported by util.parsedate() and defaults to current date, extra
1257 1257 is a dictionary of metadata or is left empty.
1258 1258 """
1259 1259 def __init__(self, repo, parents, text, files, filectxfn, user=None,
1260 1260 date=None, extra=None):
1261 1261 self._repo = repo
1262 1262 self._rev = None
1263 1263 self._node = None
1264 1264 self._text = text
1265 1265 self._date = date and util.parsedate(date) or util.makedate()
1266 1266 self._user = user
1267 1267 parents = [(p or nullid) for p in parents]
1268 1268 p1, p2 = parents
1269 1269 self._parents = [changectx(self._repo, p) for p in (p1, p2)]
1270 1270 files = sorted(set(files))
1271 1271 self._status = [files, [], [], [], []]
1272 1272 self._filectxfn = filectxfn
1273 1273
1274 1274 self._extra = extra and extra.copy() or {}
1275 1275 if self._extra.get('branch', '') == '':
1276 1276 self._extra['branch'] = 'default'
1277 1277
1278 1278 def __str__(self):
1279 1279 return str(self._parents[0]) + "+"
1280 1280
1281 1281 def __int__(self):
1282 1282 return self._rev
1283 1283
1284 1284 def __nonzero__(self):
1285 1285 return True
1286 1286
1287 1287 def __getitem__(self, key):
1288 1288 return self.filectx(key)
1289 1289
1290 1290 def p1(self):
1291 1291 return self._parents[0]
1292 1292 def p2(self):
1293 1293 return self._parents[1]
1294 1294
1295 1295 def user(self):
1296 1296 return self._user or self._repo.ui.username()
1297 1297 def date(self):
1298 1298 return self._date
1299 1299 def description(self):
1300 1300 return self._text
1301 1301 def files(self):
1302 1302 return self.modified()
1303 1303 def modified(self):
1304 1304 return self._status[0]
1305 1305 def added(self):
1306 1306 return self._status[1]
1307 1307 def removed(self):
1308 1308 return self._status[2]
1309 1309 def deleted(self):
1310 1310 return self._status[3]
1311 1311 def unknown(self):
1312 1312 return self._status[4]
1313 1313 def ignored(self):
1314 1314 return self._status[5]
1315 1315 def clean(self):
1316 1316 return self._status[6]
1317 1317 def branch(self):
1318 1318 return encoding.tolocal(self._extra['branch'])
1319 1319 def extra(self):
1320 1320 return self._extra
1321 1321 def flags(self, f):
1322 1322 return self[f].flags()
1323 1323
1324 1324 def parents(self):
1325 1325 """return contexts for each parent changeset"""
1326 1326 return self._parents
1327 1327
1328 1328 def filectx(self, path, filelog=None):
1329 1329 """get a file context from the working directory"""
1330 1330 return self._filectxfn(self._repo, self, path)
1331 1331
1332 1332 def commit(self):
1333 1333 """commit context to the repo"""
1334 1334 return self._repo.commitctx(self)
1335 1335
1336 1336 class memfilectx(object):
1337 1337 """memfilectx represents an in-memory file to commit.
1338 1338
1339 1339 See memctx for more details.
1340 1340 """
1341 1341 def __init__(self, path, data, islink=False, isexec=False, copied=None):
1342 1342 """
1343 1343 path is the normalized file path relative to repository root.
1344 1344 data is the file content as a string.
1345 1345 islink is True if the file is a symbolic link.
1346 1346 isexec is True if the file is executable.
1347 1347 copied is the source file path if current file was copied in the
1348 1348 revision being committed, or None."""
1349 1349 self._path = path
1350 1350 self._data = data
1351 1351 self._flags = (islink and 'l' or '') + (isexec and 'x' or '')
1352 1352 self._copied = None
1353 1353 if copied:
1354 1354 self._copied = (copied, nullid)
1355 1355
1356 1356 def __nonzero__(self):
1357 1357 return True
1358 1358 def __str__(self):
1359 1359 return "%s@%s" % (self.path(), self._changectx)
1360 1360 def path(self):
1361 1361 return self._path
1362 1362 def data(self):
1363 1363 return self._data
1364 1364 def flags(self):
1365 1365 return self._flags
1366 1366 def isexec(self):
1367 1367 return 'x' in self._flags
1368 1368 def islink(self):
1369 1369 return 'l' in self._flags
1370 1370 def renamed(self):
1371 1371 return self._copied
@@ -1,1573 +1,1935 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "util.h"
16 16
17 17 static inline int hexdigit(const char *p, Py_ssize_t off)
18 18 {
19 19 char c = p[off];
20 20
21 21 if (c >= '0' && c <= '9')
22 22 return c - '0';
23 23 if (c >= 'a' && c <= 'f')
24 24 return c - 'a' + 10;
25 25 if (c >= 'A' && c <= 'F')
26 26 return c - 'A' + 10;
27 27
28 28 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
29 29 return 0;
30 30 }
31 31
32 32 /*
33 33 * Turn a hex-encoded string into binary.
34 34 */
35 35 static PyObject *unhexlify(const char *str, int len)
36 36 {
37 37 PyObject *ret;
38 38 char *d;
39 39 int i;
40 40
41 41 ret = PyBytes_FromStringAndSize(NULL, len / 2);
42 42
43 43 if (!ret)
44 44 return NULL;
45 45
46 46 d = PyBytes_AsString(ret);
47 47
48 48 for (i = 0; i < len;) {
49 49 int hi = hexdigit(str, i++);
50 50 int lo = hexdigit(str, i++);
51 51 *d++ = (hi << 4) | lo;
52 52 }
53 53
54 54 return ret;
55 55 }
56 56
57 57 /*
58 58 * This code assumes that a manifest is stitched together with newline
59 59 * ('\n') characters.
60 60 */
61 61 static PyObject *parse_manifest(PyObject *self, PyObject *args)
62 62 {
63 63 PyObject *mfdict, *fdict;
64 64 char *str, *cur, *start, *zero;
65 65 int len;
66 66
67 67 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
68 68 &PyDict_Type, &mfdict,
69 69 &PyDict_Type, &fdict,
70 70 &str, &len))
71 71 goto quit;
72 72
73 73 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
74 74 PyObject *file = NULL, *node = NULL;
75 75 PyObject *flags = NULL;
76 76 ptrdiff_t nlen;
77 77
78 78 if (!*cur) {
79 79 zero = cur;
80 80 continue;
81 81 }
82 82 else if (*cur != '\n')
83 83 continue;
84 84
85 85 if (!zero) {
86 86 PyErr_SetString(PyExc_ValueError,
87 87 "manifest entry has no separator");
88 88 goto quit;
89 89 }
90 90
91 91 file = PyBytes_FromStringAndSize(start, zero - start);
92 92
93 93 if (!file)
94 94 goto bail;
95 95
96 96 nlen = cur - zero - 1;
97 97
98 98 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
99 99 if (!node)
100 100 goto bail;
101 101
102 102 if (nlen > 40) {
103 103 flags = PyBytes_FromStringAndSize(zero + 41,
104 104 nlen - 40);
105 105 if (!flags)
106 106 goto bail;
107 107
108 108 if (PyDict_SetItem(fdict, file, flags) == -1)
109 109 goto bail;
110 110 }
111 111
112 112 if (PyDict_SetItem(mfdict, file, node) == -1)
113 113 goto bail;
114 114
115 115 start = cur + 1;
116 116 zero = NULL;
117 117
118 118 Py_XDECREF(flags);
119 119 Py_XDECREF(node);
120 120 Py_XDECREF(file);
121 121 continue;
122 122 bail:
123 123 Py_XDECREF(flags);
124 124 Py_XDECREF(node);
125 125 Py_XDECREF(file);
126 126 goto quit;
127 127 }
128 128
129 129 if (len > 0 && *(cur - 1) != '\n') {
130 130 PyErr_SetString(PyExc_ValueError,
131 131 "manifest contains trailing garbage");
132 132 goto quit;
133 133 }
134 134
135 135 Py_INCREF(Py_None);
136 136 return Py_None;
137 137 quit:
138 138 return NULL;
139 139 }
140 140
141 141 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
142 142 {
143 143 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
144 144 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
145 145 char *str, *cur, *end, *cpos;
146 146 int state, mode, size, mtime;
147 147 unsigned int flen;
148 148 int len;
149 149
150 150 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
151 151 &PyDict_Type, &dmap,
152 152 &PyDict_Type, &cmap,
153 153 &str, &len))
154 154 goto quit;
155 155
156 156 /* read parents */
157 157 if (len < 40)
158 158 goto quit;
159 159
160 160 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
161 161 if (!parents)
162 162 goto quit;
163 163
164 164 /* read filenames */
165 165 cur = str + 40;
166 166 end = str + len;
167 167
168 168 while (cur < end - 17) {
169 169 /* unpack header */
170 170 state = *cur;
171 171 mode = getbe32(cur + 1);
172 172 size = getbe32(cur + 5);
173 173 mtime = getbe32(cur + 9);
174 174 flen = getbe32(cur + 13);
175 175 cur += 17;
176 176 if (cur + flen > end || cur + flen < cur) {
177 177 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
178 178 goto quit;
179 179 }
180 180
181 181 entry = Py_BuildValue("ciii", state, mode, size, mtime);
182 182 if (!entry)
183 183 goto quit;
184 184 PyObject_GC_UnTrack(entry); /* don't waste time with this */
185 185
186 186 cpos = memchr(cur, 0, flen);
187 187 if (cpos) {
188 188 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
189 189 cname = PyBytes_FromStringAndSize(cpos + 1,
190 190 flen - (cpos - cur) - 1);
191 191 if (!fname || !cname ||
192 192 PyDict_SetItem(cmap, fname, cname) == -1 ||
193 193 PyDict_SetItem(dmap, fname, entry) == -1)
194 194 goto quit;
195 195 Py_DECREF(cname);
196 196 } else {
197 197 fname = PyBytes_FromStringAndSize(cur, flen);
198 198 if (!fname ||
199 199 PyDict_SetItem(dmap, fname, entry) == -1)
200 200 goto quit;
201 201 }
202 202 cur += flen;
203 203 Py_DECREF(fname);
204 204 Py_DECREF(entry);
205 205 fname = cname = entry = NULL;
206 206 }
207 207
208 208 ret = parents;
209 209 Py_INCREF(ret);
210 210 quit:
211 211 Py_XDECREF(fname);
212 212 Py_XDECREF(cname);
213 213 Py_XDECREF(entry);
214 214 Py_XDECREF(parents);
215 215 return ret;
216 216 }
217 217
218 218 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
219 219 {
220 220 PyObject *o = PyTuple_GET_ITEM(tuple, off);
221 221 long val;
222 222
223 223 if (PyInt_Check(o))
224 224 val = PyInt_AS_LONG(o);
225 225 else if (PyLong_Check(o)) {
226 226 val = PyLong_AsLong(o);
227 227 if (val == -1 && PyErr_Occurred())
228 228 return -1;
229 229 } else {
230 230 PyErr_SetString(PyExc_TypeError, "expected an int or long");
231 231 return -1;
232 232 }
233 233 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
234 234 PyErr_SetString(PyExc_OverflowError,
235 235 "Python value to large to convert to uint32_t");
236 236 return -1;
237 237 }
238 238 *v = (uint32_t)val;
239 239 return 0;
240 240 }
241 241
242 242 static PyObject *dirstate_unset;
243 243
244 244 /*
245 245 * Efficiently pack a dirstate object into its on-disk format.
246 246 */
247 247 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
248 248 {
249 249 PyObject *packobj = NULL;
250 250 PyObject *map, *copymap, *pl;
251 251 Py_ssize_t nbytes, pos, l;
252 252 PyObject *k, *v, *pn;
253 253 char *p, *s;
254 254 double now;
255 255
256 256 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
257 257 &PyDict_Type, &map, &PyDict_Type, &copymap,
258 258 &pl, &now))
259 259 return NULL;
260 260
261 261 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
262 262 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
263 263 return NULL;
264 264 }
265 265
266 266 /* Figure out how much we need to allocate. */
267 267 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
268 268 PyObject *c;
269 269 if (!PyString_Check(k)) {
270 270 PyErr_SetString(PyExc_TypeError, "expected string key");
271 271 goto bail;
272 272 }
273 273 nbytes += PyString_GET_SIZE(k) + 17;
274 274 c = PyDict_GetItem(copymap, k);
275 275 if (c) {
276 276 if (!PyString_Check(c)) {
277 277 PyErr_SetString(PyExc_TypeError,
278 278 "expected string key");
279 279 goto bail;
280 280 }
281 281 nbytes += PyString_GET_SIZE(c) + 1;
282 282 }
283 283 }
284 284
285 285 packobj = PyString_FromStringAndSize(NULL, nbytes);
286 286 if (packobj == NULL)
287 287 goto bail;
288 288
289 289 p = PyString_AS_STRING(packobj);
290 290
291 291 pn = PySequence_ITEM(pl, 0);
292 292 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
293 293 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
294 294 goto bail;
295 295 }
296 296 memcpy(p, s, l);
297 297 p += 20;
298 298 pn = PySequence_ITEM(pl, 1);
299 299 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
300 300 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
301 301 goto bail;
302 302 }
303 303 memcpy(p, s, l);
304 304 p += 20;
305 305
306 306 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
307 307 uint32_t mode, size, mtime;
308 308 Py_ssize_t len, l;
309 309 PyObject *o;
310 310 char *s, *t;
311 311
312 312 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
313 313 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
314 314 goto bail;
315 315 }
316 316 o = PyTuple_GET_ITEM(v, 0);
317 317 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
318 318 PyErr_SetString(PyExc_TypeError, "expected one byte");
319 319 goto bail;
320 320 }
321 321 *p++ = *s;
322 322 if (getintat(v, 1, &mode) == -1)
323 323 goto bail;
324 324 if (getintat(v, 2, &size) == -1)
325 325 goto bail;
326 326 if (getintat(v, 3, &mtime) == -1)
327 327 goto bail;
328 328 if (*s == 'n' && mtime == (uint32_t)now) {
329 329 /* See pure/parsers.py:pack_dirstate for why we do
330 330 * this. */
331 331 if (PyDict_SetItem(map, k, dirstate_unset) == -1)
332 332 goto bail;
333 333 mode = 0, size = -1, mtime = -1;
334 334 }
335 335 putbe32(mode, p);
336 336 putbe32(size, p + 4);
337 337 putbe32(mtime, p + 8);
338 338 t = p + 12;
339 339 p += 16;
340 340 len = PyString_GET_SIZE(k);
341 341 memcpy(p, PyString_AS_STRING(k), len);
342 342 p += len;
343 343 o = PyDict_GetItem(copymap, k);
344 344 if (o) {
345 345 *p++ = '\0';
346 346 l = PyString_GET_SIZE(o);
347 347 memcpy(p, PyString_AS_STRING(o), l);
348 348 p += l;
349 349 len += l + 1;
350 350 }
351 351 putbe32((uint32_t)len, t);
352 352 }
353 353
354 354 pos = p - PyString_AS_STRING(packobj);
355 355 if (pos != nbytes) {
356 356 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
357 357 (long)pos, (long)nbytes);
358 358 goto bail;
359 359 }
360 360
361 361 return packobj;
362 362 bail:
363 363 Py_XDECREF(packobj);
364 364 return NULL;
365 365 }
366 366
367 367 /*
368 368 * A base-16 trie for fast node->rev mapping.
369 369 *
370 370 * Positive value is index of the next node in the trie
371 371 * Negative value is a leaf: -(rev + 1)
372 372 * Zero is empty
373 373 */
374 374 typedef struct {
375 375 int children[16];
376 376 } nodetree;
377 377
378 378 /*
379 379 * This class has two behaviours.
380 380 *
381 381 * When used in a list-like way (with integer keys), we decode an
382 382 * entry in a RevlogNG index file on demand. Our last entry is a
383 383 * sentinel, always a nullid. We have limited support for
384 384 * integer-keyed insert and delete, only at elements right before the
385 385 * sentinel.
386 386 *
387 387 * With string keys, we lazily perform a reverse mapping from node to
388 388 * rev, using a base-16 trie.
389 389 */
390 390 typedef struct {
391 391 PyObject_HEAD
392 392 /* Type-specific fields go here. */
393 393 PyObject *data; /* raw bytes of index */
394 394 PyObject **cache; /* cached tuples */
395 395 const char **offsets; /* populated on demand */
396 396 Py_ssize_t raw_length; /* original number of elements */
397 397 Py_ssize_t length; /* current number of elements */
398 398 PyObject *added; /* populated on demand */
399 399 PyObject *headrevs; /* cache, invalidated on changes */
400 400 nodetree *nt; /* base-16 trie */
401 401 int ntlength; /* # nodes in use */
402 402 int ntcapacity; /* # nodes allocated */
403 403 int ntdepth; /* maximum depth of tree */
404 404 int ntsplits; /* # splits performed */
405 405 int ntrev; /* last rev scanned */
406 406 int ntlookups; /* # lookups */
407 407 int ntmisses; /* # lookups that miss the cache */
408 408 int inlined;
409 409 } indexObject;
410 410
411 411 static Py_ssize_t index_length(const indexObject *self)
412 412 {
413 413 if (self->added == NULL)
414 414 return self->length;
415 415 return self->length + PyList_GET_SIZE(self->added);
416 416 }
417 417
418 418 static PyObject *nullentry;
419 419 static const char nullid[20];
420 420
421 421 static long inline_scan(indexObject *self, const char **offsets);
422 422
423 423 #if LONG_MAX == 0x7fffffffL
424 424 static char *tuple_format = "Kiiiiiis#";
425 425 #else
426 426 static char *tuple_format = "kiiiiiis#";
427 427 #endif
428 428
429 429 /* A RevlogNG v1 index entry is 64 bytes long. */
430 430 static const long v1_hdrsize = 64;
431 431
432 432 /*
433 433 * Return a pointer to the beginning of a RevlogNG record.
434 434 */
435 435 static const char *index_deref(indexObject *self, Py_ssize_t pos)
436 436 {
437 437 if (self->inlined && pos > 0) {
438 438 if (self->offsets == NULL) {
439 439 self->offsets = malloc(self->raw_length *
440 440 sizeof(*self->offsets));
441 441 if (self->offsets == NULL)
442 442 return (const char *)PyErr_NoMemory();
443 443 inline_scan(self, self->offsets);
444 444 }
445 445 return self->offsets[pos];
446 446 }
447 447
448 448 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
449 449 }
450 450
451 451 /*
452 452 * RevlogNG format (all in big endian, data may be inlined):
453 453 * 6 bytes: offset
454 454 * 2 bytes: flags
455 455 * 4 bytes: compressed length
456 456 * 4 bytes: uncompressed length
457 457 * 4 bytes: base revision
458 458 * 4 bytes: link revision
459 459 * 4 bytes: parent 1 revision
460 460 * 4 bytes: parent 2 revision
461 461 * 32 bytes: nodeid (only 20 bytes used)
462 462 */
463 463 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
464 464 {
465 465 uint64_t offset_flags;
466 466 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
467 467 const char *c_node_id;
468 468 const char *data;
469 469 Py_ssize_t length = index_length(self);
470 470 PyObject *entry;
471 471
472 472 if (pos < 0)
473 473 pos += length;
474 474
475 475 if (pos < 0 || pos >= length) {
476 476 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
477 477 return NULL;
478 478 }
479 479
480 480 if (pos == length - 1) {
481 481 Py_INCREF(nullentry);
482 482 return nullentry;
483 483 }
484 484
485 485 if (pos >= self->length - 1) {
486 486 PyObject *obj;
487 487 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
488 488 Py_INCREF(obj);
489 489 return obj;
490 490 }
491 491
492 492 if (self->cache) {
493 493 if (self->cache[pos]) {
494 494 Py_INCREF(self->cache[pos]);
495 495 return self->cache[pos];
496 496 }
497 497 } else {
498 498 self->cache = calloc(self->raw_length, sizeof(PyObject *));
499 499 if (self->cache == NULL)
500 500 return PyErr_NoMemory();
501 501 }
502 502
503 503 data = index_deref(self, pos);
504 504 if (data == NULL)
505 505 return NULL;
506 506
507 507 offset_flags = getbe32(data + 4);
508 508 if (pos == 0) /* mask out version number for the first entry */
509 509 offset_flags &= 0xFFFF;
510 510 else {
511 511 uint32_t offset_high = getbe32(data);
512 512 offset_flags |= ((uint64_t)offset_high) << 32;
513 513 }
514 514
515 515 comp_len = getbe32(data + 8);
516 516 uncomp_len = getbe32(data + 12);
517 517 base_rev = getbe32(data + 16);
518 518 link_rev = getbe32(data + 20);
519 519 parent_1 = getbe32(data + 24);
520 520 parent_2 = getbe32(data + 28);
521 521 c_node_id = data + 32;
522 522
523 523 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
524 524 uncomp_len, base_rev, link_rev,
525 525 parent_1, parent_2, c_node_id, 20);
526 526
527 527 if (entry)
528 528 PyObject_GC_UnTrack(entry);
529 529
530 530 self->cache[pos] = entry;
531 531 Py_INCREF(entry);
532 532
533 533 return entry;
534 534 }
535 535
536 536 /*
537 537 * Return the 20-byte SHA of the node corresponding to the given rev.
538 538 */
539 539 static const char *index_node(indexObject *self, Py_ssize_t pos)
540 540 {
541 541 Py_ssize_t length = index_length(self);
542 542 const char *data;
543 543
544 544 if (pos == length - 1 || pos == INT_MAX)
545 545 return nullid;
546 546
547 547 if (pos >= length)
548 548 return NULL;
549 549
550 550 if (pos >= self->length - 1) {
551 551 PyObject *tuple, *str;
552 552 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
553 553 str = PyTuple_GetItem(tuple, 7);
554 554 return str ? PyString_AS_STRING(str) : NULL;
555 555 }
556 556
557 557 data = index_deref(self, pos);
558 558 return data ? data + 32 : NULL;
559 559 }
560 560
561 561 static int nt_insert(indexObject *self, const char *node, int rev);
562 562
563 563 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
564 564 {
565 565 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
566 566 return -1;
567 567 if (*nodelen == 20)
568 568 return 0;
569 569 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
570 570 return -1;
571 571 }
572 572
573 573 static PyObject *index_insert(indexObject *self, PyObject *args)
574 574 {
575 575 PyObject *obj;
576 576 char *node;
577 577 long offset;
578 578 Py_ssize_t len, nodelen;
579 579
580 580 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
581 581 return NULL;
582 582
583 583 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
584 584 PyErr_SetString(PyExc_TypeError, "8-tuple required");
585 585 return NULL;
586 586 }
587 587
588 588 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
589 589 return NULL;
590 590
591 591 len = index_length(self);
592 592
593 593 if (offset < 0)
594 594 offset += len;
595 595
596 596 if (offset != len - 1) {
597 597 PyErr_SetString(PyExc_IndexError,
598 598 "insert only supported at index -1");
599 599 return NULL;
600 600 }
601 601
602 602 if (offset > INT_MAX) {
603 603 PyErr_SetString(PyExc_ValueError,
604 604 "currently only 2**31 revs supported");
605 605 return NULL;
606 606 }
607 607
608 608 if (self->added == NULL) {
609 609 self->added = PyList_New(0);
610 610 if (self->added == NULL)
611 611 return NULL;
612 612 }
613 613
614 614 if (PyList_Append(self->added, obj) == -1)
615 615 return NULL;
616 616
617 617 if (self->nt)
618 618 nt_insert(self, node, (int)offset);
619 619
620 620 Py_CLEAR(self->headrevs);
621 621 Py_RETURN_NONE;
622 622 }
623 623
624 624 static void _index_clearcaches(indexObject *self)
625 625 {
626 626 if (self->cache) {
627 627 Py_ssize_t i;
628 628
629 629 for (i = 0; i < self->raw_length; i++)
630 630 Py_CLEAR(self->cache[i]);
631 631 free(self->cache);
632 632 self->cache = NULL;
633 633 }
634 634 if (self->offsets) {
635 635 free(self->offsets);
636 636 self->offsets = NULL;
637 637 }
638 638 if (self->nt) {
639 639 free(self->nt);
640 640 self->nt = NULL;
641 641 }
642 642 Py_CLEAR(self->headrevs);
643 643 }
644 644
645 645 static PyObject *index_clearcaches(indexObject *self)
646 646 {
647 647 _index_clearcaches(self);
648 648 self->ntlength = self->ntcapacity = 0;
649 649 self->ntdepth = self->ntsplits = 0;
650 650 self->ntrev = -1;
651 651 self->ntlookups = self->ntmisses = 0;
652 652 Py_RETURN_NONE;
653 653 }
654 654
655 655 static PyObject *index_stats(indexObject *self)
656 656 {
657 657 PyObject *obj = PyDict_New();
658 658
659 659 if (obj == NULL)
660 660 return NULL;
661 661
662 662 #define istat(__n, __d) \
663 663 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
664 664 goto bail;
665 665
666 666 if (self->added) {
667 667 Py_ssize_t len = PyList_GET_SIZE(self->added);
668 668 if (PyDict_SetItemString(obj, "index entries added",
669 669 PyInt_FromSsize_t(len)) == -1)
670 670 goto bail;
671 671 }
672 672
673 673 if (self->raw_length != self->length - 1)
674 674 istat(raw_length, "revs on disk");
675 675 istat(length, "revs in memory");
676 676 istat(ntcapacity, "node trie capacity");
677 677 istat(ntdepth, "node trie depth");
678 678 istat(ntlength, "node trie count");
679 679 istat(ntlookups, "node trie lookups");
680 680 istat(ntmisses, "node trie misses");
681 681 istat(ntrev, "node trie last rev scanned");
682 682 istat(ntsplits, "node trie splits");
683 683
684 684 #undef istat
685 685
686 686 return obj;
687 687
688 688 bail:
689 689 Py_XDECREF(obj);
690 690 return NULL;
691 691 }
692 692
693 693 /*
694 694 * When we cache a list, we want to be sure the caller can't mutate
695 695 * the cached copy.
696 696 */
697 697 static PyObject *list_copy(PyObject *list)
698 698 {
699 699 Py_ssize_t len = PyList_GET_SIZE(list);
700 700 PyObject *newlist = PyList_New(len);
701 701 Py_ssize_t i;
702 702
703 703 if (newlist == NULL)
704 704 return NULL;
705 705
706 706 for (i = 0; i < len; i++) {
707 707 PyObject *obj = PyList_GET_ITEM(list, i);
708 708 Py_INCREF(obj);
709 709 PyList_SET_ITEM(newlist, i, obj);
710 710 }
711 711
712 712 return newlist;
713 713 }
714 714
715 715 static PyObject *index_headrevs(indexObject *self)
716 716 {
717 717 Py_ssize_t i, len, addlen;
718 718 char *nothead = NULL;
719 719 PyObject *heads;
720 720
721 721 if (self->headrevs)
722 722 return list_copy(self->headrevs);
723 723
724 724 len = index_length(self) - 1;
725 725 heads = PyList_New(0);
726 726 if (heads == NULL)
727 727 goto bail;
728 728 if (len == 0) {
729 729 PyObject *nullid = PyInt_FromLong(-1);
730 730 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
731 731 Py_XDECREF(nullid);
732 732 goto bail;
733 733 }
734 734 goto done;
735 735 }
736 736
737 737 nothead = calloc(len, 1);
738 738 if (nothead == NULL)
739 739 goto bail;
740 740
741 741 for (i = 0; i < self->raw_length; i++) {
742 742 const char *data = index_deref(self, i);
743 743 int parent_1 = getbe32(data + 24);
744 744 int parent_2 = getbe32(data + 28);
745 745 if (parent_1 >= 0)
746 746 nothead[parent_1] = 1;
747 747 if (parent_2 >= 0)
748 748 nothead[parent_2] = 1;
749 749 }
750 750
751 751 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
752 752
753 753 for (i = 0; i < addlen; i++) {
754 754 PyObject *rev = PyList_GET_ITEM(self->added, i);
755 755 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
756 756 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
757 757 long parent_1, parent_2;
758 758
759 759 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
760 760 PyErr_SetString(PyExc_TypeError,
761 761 "revlog parents are invalid");
762 762 goto bail;
763 763 }
764 764 parent_1 = PyInt_AS_LONG(p1);
765 765 parent_2 = PyInt_AS_LONG(p2);
766 766 if (parent_1 >= 0)
767 767 nothead[parent_1] = 1;
768 768 if (parent_2 >= 0)
769 769 nothead[parent_2] = 1;
770 770 }
771 771
772 772 for (i = 0; i < len; i++) {
773 773 PyObject *head;
774 774
775 775 if (nothead[i])
776 776 continue;
777 777 head = PyInt_FromLong(i);
778 778 if (head == NULL || PyList_Append(heads, head) == -1) {
779 779 Py_XDECREF(head);
780 780 goto bail;
781 781 }
782 782 }
783 783
784 784 done:
785 785 self->headrevs = heads;
786 786 free(nothead);
787 787 return list_copy(self->headrevs);
788 788 bail:
789 789 Py_XDECREF(heads);
790 790 free(nothead);
791 791 return NULL;
792 792 }
793 793
794 794 static inline int nt_level(const char *node, Py_ssize_t level)
795 795 {
796 796 int v = node[level>>1];
797 797 if (!(level & 1))
798 798 v >>= 4;
799 799 return v & 0xf;
800 800 }
801 801
802 802 /*
803 803 * Return values:
804 804 *
805 805 * -4: match is ambiguous (multiple candidates)
806 806 * -2: not found
807 807 * rest: valid rev
808 808 */
809 809 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
810 810 int hex)
811 811 {
812 812 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
813 813 int level, maxlevel, off;
814 814
815 815 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
816 816 return -1;
817 817
818 818 if (self->nt == NULL)
819 819 return -2;
820 820
821 821 if (hex)
822 822 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
823 823 else
824 824 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
825 825
826 826 for (level = off = 0; level < maxlevel; level++) {
827 827 int k = getnybble(node, level);
828 828 nodetree *n = &self->nt[off];
829 829 int v = n->children[k];
830 830
831 831 if (v < 0) {
832 832 const char *n;
833 833 Py_ssize_t i;
834 834
835 835 v = -v - 1;
836 836 n = index_node(self, v);
837 837 if (n == NULL)
838 838 return -2;
839 839 for (i = level; i < maxlevel; i++)
840 840 if (getnybble(node, i) != nt_level(n, i))
841 841 return -2;
842 842 return v;
843 843 }
844 844 if (v == 0)
845 845 return -2;
846 846 off = v;
847 847 }
848 848 /* multiple matches against an ambiguous prefix */
849 849 return -4;
850 850 }
851 851
852 852 static int nt_new(indexObject *self)
853 853 {
854 854 if (self->ntlength == self->ntcapacity) {
855 855 self->ntcapacity *= 2;
856 856 self->nt = realloc(self->nt,
857 857 self->ntcapacity * sizeof(nodetree));
858 858 if (self->nt == NULL) {
859 859 PyErr_SetString(PyExc_MemoryError, "out of memory");
860 860 return -1;
861 861 }
862 862 memset(&self->nt[self->ntlength], 0,
863 863 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
864 864 }
865 865 return self->ntlength++;
866 866 }
867 867
868 868 static int nt_insert(indexObject *self, const char *node, int rev)
869 869 {
870 870 int level = 0;
871 871 int off = 0;
872 872
873 873 while (level < 40) {
874 874 int k = nt_level(node, level);
875 875 nodetree *n;
876 876 int v;
877 877
878 878 n = &self->nt[off];
879 879 v = n->children[k];
880 880
881 881 if (v == 0) {
882 882 n->children[k] = -rev - 1;
883 883 return 0;
884 884 }
885 885 if (v < 0) {
886 886 const char *oldnode = index_node(self, -v - 1);
887 887 int noff;
888 888
889 889 if (!oldnode || !memcmp(oldnode, node, 20)) {
890 890 n->children[k] = -rev - 1;
891 891 return 0;
892 892 }
893 893 noff = nt_new(self);
894 894 if (noff == -1)
895 895 return -1;
896 896 /* self->nt may have been changed by realloc */
897 897 self->nt[off].children[k] = noff;
898 898 off = noff;
899 899 n = &self->nt[off];
900 900 n->children[nt_level(oldnode, ++level)] = v;
901 901 if (level > self->ntdepth)
902 902 self->ntdepth = level;
903 903 self->ntsplits += 1;
904 904 } else {
905 905 level += 1;
906 906 off = v;
907 907 }
908 908 }
909 909
910 910 return -1;
911 911 }
912 912
913 913 static int nt_init(indexObject *self)
914 914 {
915 915 if (self->nt == NULL) {
916 916 self->ntcapacity = self->raw_length < 4
917 917 ? 4 : self->raw_length / 2;
918 918 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
919 919 if (self->nt == NULL) {
920 920 PyErr_NoMemory();
921 921 return -1;
922 922 }
923 923 self->ntlength = 1;
924 924 self->ntrev = (int)index_length(self) - 1;
925 925 self->ntlookups = 1;
926 926 self->ntmisses = 0;
927 927 if (nt_insert(self, nullid, INT_MAX) == -1)
928 928 return -1;
929 929 }
930 930 return 0;
931 931 }
932 932
933 933 /*
934 934 * Return values:
935 935 *
936 936 * -3: error (exception set)
937 937 * -2: not found (no exception set)
938 938 * rest: valid rev
939 939 */
940 940 static int index_find_node(indexObject *self,
941 941 const char *node, Py_ssize_t nodelen)
942 942 {
943 943 int rev;
944 944
945 945 self->ntlookups++;
946 946 rev = nt_find(self, node, nodelen, 0);
947 947 if (rev >= -1)
948 948 return rev;
949 949
950 950 if (nt_init(self) == -1)
951 951 return -3;
952 952
953 953 /*
954 954 * For the first handful of lookups, we scan the entire index,
955 955 * and cache only the matching nodes. This optimizes for cases
956 956 * like "hg tip", where only a few nodes are accessed.
957 957 *
958 958 * After that, we cache every node we visit, using a single
959 959 * scan amortized over multiple lookups. This gives the best
960 960 * bulk performance, e.g. for "hg log".
961 961 */
962 962 if (self->ntmisses++ < 4) {
963 963 for (rev = self->ntrev - 1; rev >= 0; rev--) {
964 964 const char *n = index_node(self, rev);
965 965 if (n == NULL)
966 966 return -2;
967 967 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
968 968 if (nt_insert(self, n, rev) == -1)
969 969 return -3;
970 970 break;
971 971 }
972 972 }
973 973 } else {
974 974 for (rev = self->ntrev - 1; rev >= 0; rev--) {
975 975 const char *n = index_node(self, rev);
976 976 if (n == NULL) {
977 977 self->ntrev = rev + 1;
978 978 return -2;
979 979 }
980 980 if (nt_insert(self, n, rev) == -1) {
981 981 self->ntrev = rev + 1;
982 982 return -3;
983 983 }
984 984 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
985 985 break;
986 986 }
987 987 }
988 988 self->ntrev = rev;
989 989 }
990 990
991 991 if (rev >= 0)
992 992 return rev;
993 993 return -2;
994 994 }
995 995
996 996 static PyObject *raise_revlog_error(void)
997 997 {
998 998 static PyObject *errclass;
999 999 PyObject *mod = NULL, *errobj;
1000 1000
1001 1001 if (errclass == NULL) {
1002 1002 PyObject *dict;
1003 1003
1004 1004 mod = PyImport_ImportModule("mercurial.error");
1005 1005 if (mod == NULL)
1006 1006 goto classfail;
1007 1007
1008 1008 dict = PyModule_GetDict(mod);
1009 1009 if (dict == NULL)
1010 1010 goto classfail;
1011 1011
1012 1012 errclass = PyDict_GetItemString(dict, "RevlogError");
1013 1013 if (errclass == NULL) {
1014 1014 PyErr_SetString(PyExc_SystemError,
1015 1015 "could not find RevlogError");
1016 1016 goto classfail;
1017 1017 }
1018 1018 Py_INCREF(errclass);
1019 1019 }
1020 1020
1021 1021 errobj = PyObject_CallFunction(errclass, NULL);
1022 1022 if (errobj == NULL)
1023 1023 return NULL;
1024 1024 PyErr_SetObject(errclass, errobj);
1025 1025 return errobj;
1026 1026
1027 1027 classfail:
1028 1028 Py_XDECREF(mod);
1029 1029 return NULL;
1030 1030 }
1031 1031
1032 1032 static PyObject *index_getitem(indexObject *self, PyObject *value)
1033 1033 {
1034 1034 char *node;
1035 1035 Py_ssize_t nodelen;
1036 1036 int rev;
1037 1037
1038 1038 if (PyInt_Check(value))
1039 1039 return index_get(self, PyInt_AS_LONG(value));
1040 1040
1041 1041 if (node_check(value, &node, &nodelen) == -1)
1042 1042 return NULL;
1043 1043 rev = index_find_node(self, node, nodelen);
1044 1044 if (rev >= -1)
1045 1045 return PyInt_FromLong(rev);
1046 1046 if (rev == -2)
1047 1047 raise_revlog_error();
1048 1048 return NULL;
1049 1049 }
1050 1050
1051 1051 static int nt_partialmatch(indexObject *self, const char *node,
1052 1052 Py_ssize_t nodelen)
1053 1053 {
1054 1054 int rev;
1055 1055
1056 1056 if (nt_init(self) == -1)
1057 1057 return -3;
1058 1058
1059 1059 if (self->ntrev > 0) {
1060 1060 /* ensure that the radix tree is fully populated */
1061 1061 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1062 1062 const char *n = index_node(self, rev);
1063 1063 if (n == NULL)
1064 1064 return -2;
1065 1065 if (nt_insert(self, n, rev) == -1)
1066 1066 return -3;
1067 1067 }
1068 1068 self->ntrev = rev;
1069 1069 }
1070 1070
1071 1071 return nt_find(self, node, nodelen, 1);
1072 1072 }
1073 1073
1074 1074 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1075 1075 {
1076 1076 const char *fullnode;
1077 1077 int nodelen;
1078 1078 char *node;
1079 1079 int rev, i;
1080 1080
1081 1081 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1082 1082 return NULL;
1083 1083
1084 1084 if (nodelen < 4) {
1085 1085 PyErr_SetString(PyExc_ValueError, "key too short");
1086 1086 return NULL;
1087 1087 }
1088 1088
1089 1089 if (nodelen > 40) {
1090 1090 PyErr_SetString(PyExc_ValueError, "key too long");
1091 1091 return NULL;
1092 1092 }
1093 1093
1094 1094 for (i = 0; i < nodelen; i++)
1095 1095 hexdigit(node, i);
1096 1096 if (PyErr_Occurred()) {
1097 1097 /* input contains non-hex characters */
1098 1098 PyErr_Clear();
1099 1099 Py_RETURN_NONE;
1100 1100 }
1101 1101
1102 1102 rev = nt_partialmatch(self, node, nodelen);
1103 1103
1104 1104 switch (rev) {
1105 1105 case -4:
1106 1106 raise_revlog_error();
1107 1107 case -3:
1108 1108 return NULL;
1109 1109 case -2:
1110 1110 Py_RETURN_NONE;
1111 1111 case -1:
1112 1112 return PyString_FromStringAndSize(nullid, 20);
1113 1113 }
1114 1114
1115 1115 fullnode = index_node(self, rev);
1116 1116 if (fullnode == NULL) {
1117 1117 PyErr_Format(PyExc_IndexError,
1118 1118 "could not access rev %d", rev);
1119 1119 return NULL;
1120 1120 }
1121 1121 return PyString_FromStringAndSize(fullnode, 20);
1122 1122 }
1123 1123
1124 1124 static PyObject *index_m_get(indexObject *self, PyObject *args)
1125 1125 {
1126 1126 Py_ssize_t nodelen;
1127 1127 PyObject *val;
1128 1128 char *node;
1129 1129 int rev;
1130 1130
1131 1131 if (!PyArg_ParseTuple(args, "O", &val))
1132 1132 return NULL;
1133 1133 if (node_check(val, &node, &nodelen) == -1)
1134 1134 return NULL;
1135 1135 rev = index_find_node(self, node, nodelen);
1136 1136 if (rev == -3)
1137 1137 return NULL;
1138 1138 if (rev == -2)
1139 1139 Py_RETURN_NONE;
1140 1140 return PyInt_FromLong(rev);
1141 1141 }
1142 1142
1143 1143 static int index_contains(indexObject *self, PyObject *value)
1144 1144 {
1145 1145 char *node;
1146 1146 Py_ssize_t nodelen;
1147 1147
1148 1148 if (PyInt_Check(value)) {
1149 1149 long rev = PyInt_AS_LONG(value);
1150 1150 return rev >= -1 && rev < index_length(self);
1151 1151 }
1152 1152
1153 1153 if (node_check(value, &node, &nodelen) == -1)
1154 1154 return -1;
1155 1155
1156 1156 switch (index_find_node(self, node, nodelen)) {
1157 1157 case -3:
1158 1158 return -1;
1159 1159 case -2:
1160 1160 return 0;
1161 1161 default:
1162 1162 return 1;
1163 1163 }
1164 1164 }
1165 1165
1166 static inline void index_get_parents(indexObject *self, int rev, int *ps)
1167 {
1168 if (rev >= self->length - 1) {
1169 PyObject *tuple = PyList_GET_ITEM(self->added,
1170 rev - self->length + 1);
1171 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
1172 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
1173 } else {
1174 const char *data = index_deref(self, rev);
1175 ps[0] = getbe32(data + 24);
1176 ps[1] = getbe32(data + 28);
1177 }
1178 }
1179
1180 typedef uint64_t bitmask;
1181
1182 /*
1183 * Given a disjoint set of revs, return all candidates for the
1184 * greatest common ancestor. In revset notation, this is the set
1185 * "heads(::a and ::b and ...)"
1186 */
1187 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1188 int revcount)
1189 {
1190 const bitmask allseen = (1ull << revcount) - 1;
1191 const bitmask poison = 1ull << revcount;
1192 PyObject *gca = PyList_New(0);
1193 int i, v, interesting, left;
1194 int maxrev = -1;
1195 bitmask *seen;
1196
1197 for (i = 0; i < revcount; i++) {
1198 if (revs[i] > maxrev)
1199 maxrev = revs[i];
1200 }
1201
1202 seen = calloc(sizeof(*seen), maxrev + 1);
1203 if (seen == NULL)
1204 return PyErr_NoMemory();
1205
1206 for (i = 0; i < revcount; i++)
1207 seen[revs[i]] = 1ull << i;
1208
1209 interesting = left = revcount;
1210
1211 for (v = maxrev; v >= 0 && interesting; v--) {
1212 long sv = seen[v];
1213 int parents[2];
1214
1215 if (!sv)
1216 continue;
1217
1218 if (sv < poison) {
1219 interesting -= 1;
1220 if (sv == allseen) {
1221 PyObject *obj = PyInt_FromLong(v);
1222 if (obj == NULL)
1223 goto bail;
1224 if (PyList_Append(gca, obj) == -1) {
1225 Py_DECREF(obj);
1226 goto bail;
1227 }
1228 sv |= poison;
1229 for (i = 0; i < revcount; i++) {
1230 if (revs[i] == v) {
1231 if (--left <= 1)
1232 goto done;
1233 break;
1234 }
1235 }
1236 }
1237 }
1238 index_get_parents(self, v, parents);
1239
1240 for (i = 0; i < 2; i++) {
1241 int p = parents[i];
1242 if (p == -1)
1243 continue;
1244 const long sp = seen[p];
1245 if (sv < poison) {
1246 if (sp == 0) {
1247 seen[p] = sv;
1248 interesting++;
1249 }
1250 else if (sp != sv)
1251 seen[p] |= sv;
1252 } else {
1253 if (sp && sp < poison)
1254 interesting--;
1255 seen[p] = sv;
1256 }
1257 }
1258 }
1259
1260 done:
1261 free(seen);
1262 return gca;
1263 bail:
1264 free(seen);
1265 Py_XDECREF(gca);
1266 return NULL;
1267 }
1268
1269 /*
1270 * Given a disjoint set of revs, return the subset with the longest
1271 * path to the root.
1272 */
1273 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1274 {
1275 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1276 static const Py_ssize_t capacity = 24;
1277 int *depth, *interesting = NULL;
1278 int i, j, v, ninteresting;
1279 PyObject *dict = NULL, *keys;
1280 long *seen = NULL;
1281 int maxrev = -1;
1282 long final;
1283
1284 if (revcount > capacity) {
1285 PyErr_Format(PyExc_OverflowError,
1286 "bitset size (%ld) > capacity (%ld)",
1287 revcount, capacity);
1288 return NULL;
1289 }
1290
1291 for (i = 0; i < revcount; i++) {
1292 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1293 if (n > maxrev)
1294 maxrev = n;
1295 }
1296
1297 depth = calloc(sizeof(*depth), maxrev + 1);
1298 if (depth == NULL)
1299 return PyErr_NoMemory();
1300
1301 seen = calloc(sizeof(*seen), maxrev + 1);
1302 if (seen == NULL) {
1303 PyErr_NoMemory();
1304 goto bail;
1305 }
1306
1307 interesting = calloc(sizeof(*interesting), 2 << revcount);
1308 if (interesting == NULL) {
1309 PyErr_NoMemory();
1310 goto bail;
1311 }
1312
1313 for (i = 0; i < revcount; i++) {
1314 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1315 long b = 1l << i;
1316 depth[n] = 1;
1317 seen[n] = b;
1318 interesting[b] = 1;
1319 }
1320
1321 ninteresting = (int)revcount;
1322
1323 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1324 int dv = depth[v];
1325 int parents[2];
1326 long sv;
1327
1328 if (dv == 0)
1329 continue;
1330
1331 sv = seen[v];
1332 index_get_parents(self, v, parents);
1333
1334 for (i = 0; i < 2; i++) {
1335 int p = parents[i];
1336 long nsp, sp;
1337 int dp;
1338
1339 if (p == -1)
1340 continue;
1341
1342 dp = depth[p];
1343 nsp = sp = seen[p];
1344 if (dp <= dv) {
1345 depth[p] = dv + 1;
1346 if (sp != sv) {
1347 interesting[sv] += 1;
1348 nsp = seen[p] = sv;
1349 if (sp) {
1350 interesting[sp] -= 1;
1351 if (interesting[sp] == 0)
1352 ninteresting -= 1;
1353 }
1354 }
1355 }
1356 else if (dv == dp - 1) {
1357 nsp = sp | sv;
1358 if (nsp == sp)
1359 continue;
1360 seen[p] = nsp;
1361 interesting[nsp] += 1;
1362 interesting[sp] -= 1;
1363 if (interesting[sp] == 0)
1364 ninteresting -= 1;
1365 }
1366 }
1367 interesting[sv] -= 1;
1368 if (interesting[sv] == 0)
1369 ninteresting -= 1;
1370 }
1371
1372 final = 0;
1373 j = ninteresting;
1374 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1375 if (interesting[i] == 0)
1376 continue;
1377 final |= i;
1378 j -= 1;
1379 }
1380 if (final == 0)
1381 return PyList_New(0);
1382
1383 dict = PyDict_New();
1384 if (dict == NULL)
1385 goto bail;
1386
1387 j = ninteresting;
1388 for (i = 0; i < revcount && j > 0; i++) {
1389 PyObject *key;
1390
1391 if ((final & (1 << i)) == 0)
1392 continue;
1393
1394 key = PyList_GET_ITEM(revs, i);
1395 Py_INCREF(key);
1396 Py_INCREF(Py_None);
1397 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1398 Py_DECREF(key);
1399 Py_DECREF(Py_None);
1400 goto bail;
1401 }
1402 j -= 1;
1403 }
1404
1405 keys = PyDict_Keys(dict);
1406
1407 free(depth);
1408 free(seen);
1409 free(interesting);
1410 Py_DECREF(dict);
1411
1412 return keys;
1413 bail:
1414 free(depth);
1415 free(seen);
1416 free(interesting);
1417 Py_XDECREF(dict);
1418
1419 return NULL;
1420 }
1421
1422 /*
1423 * Given a (possibly overlapping) set of revs, return the greatest
1424 * common ancestors: those with the longest path to the root.
1425 */
1426 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1427 {
1428 PyObject *ret = NULL, *gca = NULL;
1429 Py_ssize_t argcount, i, len;
1430 bitmask repeat = 0;
1431 int revcount = 0;
1432 int *revs;
1433
1434 argcount = PySequence_Length(args);
1435 revs = malloc(argcount * sizeof(*revs));
1436 if (argcount > 0 && revs == NULL)
1437 return PyErr_NoMemory();
1438 len = index_length(self) - 1;
1439
1440 for (i = 0; i < argcount; i++) {
1441 static const int capacity = 24;
1442 PyObject *obj = PySequence_GetItem(args, i);
1443 bitmask x;
1444 long val;
1445
1446 if (!PyInt_Check(obj)) {
1447 PyErr_SetString(PyExc_TypeError,
1448 "arguments must all be ints");
1449 goto bail;
1450 }
1451 val = PyInt_AsLong(obj);
1452 if (val == -1) {
1453 ret = PyList_New(0);
1454 goto done;
1455 }
1456 if (val < 0 || val >= len) {
1457 PyErr_SetString(PyExc_IndexError,
1458 "index out of range");
1459 goto bail;
1460 }
1461 /* this cheesy bloom filter lets us avoid some more
1462 * expensive duplicate checks in the common set-is-disjoint
1463 * case */
1464 x = 1ull << (val & 0x3f);
1465 if (repeat & x) {
1466 int k;
1467 for (k = 0; k < revcount; k++) {
1468 if (val == revs[k])
1469 goto duplicate;
1470 }
1471 }
1472 else repeat |= x;
1473 if (revcount >= capacity) {
1474 PyErr_Format(PyExc_OverflowError,
1475 "bitset size (%d) > capacity (%d)",
1476 revcount, capacity);
1477 goto bail;
1478 }
1479 revs[revcount++] = (int)val;
1480 duplicate:;
1481 }
1482
1483 if (revcount == 0) {
1484 ret = PyList_New(0);
1485 goto done;
1486 }
1487 if (revcount == 1) {
1488 PyObject *obj;
1489 ret = PyList_New(1);
1490 if (ret == NULL)
1491 goto bail;
1492 obj = PyInt_FromLong(revs[0]);
1493 if (obj == NULL)
1494 goto bail;
1495 PyList_SET_ITEM(ret, 0, obj);
1496 goto done;
1497 }
1498
1499 gca = find_gca_candidates(self, revs, revcount);
1500 if (gca == NULL)
1501 goto bail;
1502
1503 if (PyList_GET_SIZE(gca) <= 1) {
1504 ret = gca;
1505 Py_INCREF(gca);
1506 }
1507 else if (PyList_GET_SIZE(gca) == 1) {
1508 ret = PyList_GET_ITEM(gca, 0);
1509 Py_INCREF(ret);
1510 }
1511 else ret = find_deepest(self, gca);
1512
1513 done:
1514 free(revs);
1515 Py_XDECREF(gca);
1516
1517 return ret;
1518
1519 bail:
1520 free(revs);
1521 Py_XDECREF(gca);
1522 Py_XDECREF(ret);
1523 return NULL;
1524 }
1525
1166 1526 /*
1167 1527 * Invalidate any trie entries introduced by added revs.
1168 1528 */
1169 1529 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1170 1530 {
1171 1531 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1172 1532
1173 1533 for (i = start; i < len; i++) {
1174 1534 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1175 1535 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1176 1536
1177 1537 nt_insert(self, PyString_AS_STRING(node), -1);
1178 1538 }
1179 1539
1180 1540 if (start == 0)
1181 1541 Py_CLEAR(self->added);
1182 1542 }
1183 1543
1184 1544 /*
1185 1545 * Delete a numeric range of revs, which must be at the end of the
1186 1546 * range, but exclude the sentinel nullid entry.
1187 1547 */
1188 1548 static int index_slice_del(indexObject *self, PyObject *item)
1189 1549 {
1190 1550 Py_ssize_t start, stop, step, slicelength;
1191 1551 Py_ssize_t length = index_length(self);
1192 1552 int ret = 0;
1193 1553
1194 1554 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1195 1555 &start, &stop, &step, &slicelength) < 0)
1196 1556 return -1;
1197 1557
1198 1558 if (slicelength <= 0)
1199 1559 return 0;
1200 1560
1201 1561 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1202 1562 stop = start;
1203 1563
1204 1564 if (step < 0) {
1205 1565 stop = start + 1;
1206 1566 start = stop + step*(slicelength - 1) - 1;
1207 1567 step = -step;
1208 1568 }
1209 1569
1210 1570 if (step != 1) {
1211 1571 PyErr_SetString(PyExc_ValueError,
1212 1572 "revlog index delete requires step size of 1");
1213 1573 return -1;
1214 1574 }
1215 1575
1216 1576 if (stop != length - 1) {
1217 1577 PyErr_SetString(PyExc_IndexError,
1218 1578 "revlog index deletion indices are invalid");
1219 1579 return -1;
1220 1580 }
1221 1581
1222 1582 if (start < self->length - 1) {
1223 1583 if (self->nt) {
1224 1584 Py_ssize_t i;
1225 1585
1226 1586 for (i = start + 1; i < self->length - 1; i++) {
1227 1587 const char *node = index_node(self, i);
1228 1588
1229 1589 if (node)
1230 1590 nt_insert(self, node, -1);
1231 1591 }
1232 1592 if (self->added)
1233 1593 nt_invalidate_added(self, 0);
1234 1594 if (self->ntrev > start)
1235 1595 self->ntrev = (int)start;
1236 1596 }
1237 1597 self->length = start + 1;
1238 1598 if (start < self->raw_length) {
1239 1599 if (self->cache) {
1240 1600 Py_ssize_t i;
1241 1601 for (i = start; i < self->raw_length; i++)
1242 1602 Py_CLEAR(self->cache[i]);
1243 1603 }
1244 1604 self->raw_length = start;
1245 1605 }
1246 1606 goto done;
1247 1607 }
1248 1608
1249 1609 if (self->nt) {
1250 1610 nt_invalidate_added(self, start - self->length + 1);
1251 1611 if (self->ntrev > start)
1252 1612 self->ntrev = (int)start;
1253 1613 }
1254 1614 if (self->added)
1255 1615 ret = PyList_SetSlice(self->added, start - self->length + 1,
1256 1616 PyList_GET_SIZE(self->added), NULL);
1257 1617 done:
1258 1618 Py_CLEAR(self->headrevs);
1259 1619 return ret;
1260 1620 }
1261 1621
1262 1622 /*
1263 1623 * Supported ops:
1264 1624 *
1265 1625 * slice deletion
1266 1626 * string assignment (extend node->rev mapping)
1267 1627 * string deletion (shrink node->rev mapping)
1268 1628 */
1269 1629 static int index_assign_subscript(indexObject *self, PyObject *item,
1270 1630 PyObject *value)
1271 1631 {
1272 1632 char *node;
1273 1633 Py_ssize_t nodelen;
1274 1634 long rev;
1275 1635
1276 1636 if (PySlice_Check(item) && value == NULL)
1277 1637 return index_slice_del(self, item);
1278 1638
1279 1639 if (node_check(item, &node, &nodelen) == -1)
1280 1640 return -1;
1281 1641
1282 1642 if (value == NULL)
1283 1643 return self->nt ? nt_insert(self, node, -1) : 0;
1284 1644 rev = PyInt_AsLong(value);
1285 1645 if (rev > INT_MAX || rev < 0) {
1286 1646 if (!PyErr_Occurred())
1287 1647 PyErr_SetString(PyExc_ValueError, "rev out of range");
1288 1648 return -1;
1289 1649 }
1290 1650 return nt_insert(self, node, (int)rev);
1291 1651 }
1292 1652
1293 1653 /*
1294 1654 * Find all RevlogNG entries in an index that has inline data. Update
1295 1655 * the optional "offsets" table with those entries.
1296 1656 */
1297 1657 static long inline_scan(indexObject *self, const char **offsets)
1298 1658 {
1299 1659 const char *data = PyString_AS_STRING(self->data);
1300 1660 const char *end = data + PyString_GET_SIZE(self->data);
1301 1661 long incr = v1_hdrsize;
1302 1662 Py_ssize_t len = 0;
1303 1663
1304 1664 while (data + v1_hdrsize <= end) {
1305 1665 uint32_t comp_len;
1306 1666 const char *old_data;
1307 1667 /* 3rd element of header is length of compressed inline data */
1308 1668 comp_len = getbe32(data + 8);
1309 1669 incr = v1_hdrsize + comp_len;
1310 1670 if (incr < v1_hdrsize)
1311 1671 break;
1312 1672 if (offsets)
1313 1673 offsets[len] = data;
1314 1674 len++;
1315 1675 old_data = data;
1316 1676 data += incr;
1317 1677 if (data <= old_data)
1318 1678 break;
1319 1679 }
1320 1680
1321 1681 if (data != end && data + v1_hdrsize != end) {
1322 1682 if (!PyErr_Occurred())
1323 1683 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1324 1684 return -1;
1325 1685 }
1326 1686
1327 1687 return len;
1328 1688 }
1329 1689
1330 1690 static int index_init(indexObject *self, PyObject *args)
1331 1691 {
1332 1692 PyObject *data_obj, *inlined_obj;
1333 1693 Py_ssize_t size;
1334 1694
1335 1695 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1336 1696 return -1;
1337 1697 if (!PyString_Check(data_obj)) {
1338 1698 PyErr_SetString(PyExc_TypeError, "data is not a string");
1339 1699 return -1;
1340 1700 }
1341 1701 size = PyString_GET_SIZE(data_obj);
1342 1702
1343 1703 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1344 1704 self->data = data_obj;
1345 1705 self->cache = NULL;
1346 1706
1347 1707 self->added = NULL;
1348 1708 self->headrevs = NULL;
1349 1709 self->offsets = NULL;
1350 1710 self->nt = NULL;
1351 1711 self->ntlength = self->ntcapacity = 0;
1352 1712 self->ntdepth = self->ntsplits = 0;
1353 1713 self->ntlookups = self->ntmisses = 0;
1354 1714 self->ntrev = -1;
1355 1715 Py_INCREF(self->data);
1356 1716
1357 1717 if (self->inlined) {
1358 1718 long len = inline_scan(self, NULL);
1359 1719 if (len == -1)
1360 1720 goto bail;
1361 1721 self->raw_length = len;
1362 1722 self->length = len + 1;
1363 1723 } else {
1364 1724 if (size % v1_hdrsize) {
1365 1725 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1366 1726 goto bail;
1367 1727 }
1368 1728 self->raw_length = size / v1_hdrsize;
1369 1729 self->length = self->raw_length + 1;
1370 1730 }
1371 1731
1372 1732 return 0;
1373 1733 bail:
1374 1734 return -1;
1375 1735 }
1376 1736
1377 1737 static PyObject *index_nodemap(indexObject *self)
1378 1738 {
1379 1739 Py_INCREF(self);
1380 1740 return (PyObject *)self;
1381 1741 }
1382 1742
1383 1743 static void index_dealloc(indexObject *self)
1384 1744 {
1385 1745 _index_clearcaches(self);
1386 1746 Py_DECREF(self->data);
1387 1747 Py_XDECREF(self->added);
1388 1748 PyObject_Del(self);
1389 1749 }
1390 1750
1391 1751 static PySequenceMethods index_sequence_methods = {
1392 1752 (lenfunc)index_length, /* sq_length */
1393 1753 0, /* sq_concat */
1394 1754 0, /* sq_repeat */
1395 1755 (ssizeargfunc)index_get, /* sq_item */
1396 1756 0, /* sq_slice */
1397 1757 0, /* sq_ass_item */
1398 1758 0, /* sq_ass_slice */
1399 1759 (objobjproc)index_contains, /* sq_contains */
1400 1760 };
1401 1761
1402 1762 static PyMappingMethods index_mapping_methods = {
1403 1763 (lenfunc)index_length, /* mp_length */
1404 1764 (binaryfunc)index_getitem, /* mp_subscript */
1405 1765 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1406 1766 };
1407 1767
1408 1768 static PyMethodDef index_methods[] = {
1769 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
1770 "return the gca set of the given revs"},
1409 1771 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1410 1772 "clear the index caches"},
1411 1773 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1412 1774 "get an index entry"},
1413 1775 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1414 1776 "get head revisions"},
1415 1777 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1416 1778 "insert an index entry"},
1417 1779 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1418 1780 "match a potentially ambiguous node ID"},
1419 1781 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1420 1782 "stats for the index"},
1421 1783 {NULL} /* Sentinel */
1422 1784 };
1423 1785
1424 1786 static PyGetSetDef index_getset[] = {
1425 1787 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1426 1788 {NULL} /* Sentinel */
1427 1789 };
1428 1790
1429 1791 static PyTypeObject indexType = {
1430 1792 PyObject_HEAD_INIT(NULL)
1431 1793 0, /* ob_size */
1432 1794 "parsers.index", /* tp_name */
1433 1795 sizeof(indexObject), /* tp_basicsize */
1434 1796 0, /* tp_itemsize */
1435 1797 (destructor)index_dealloc, /* tp_dealloc */
1436 1798 0, /* tp_print */
1437 1799 0, /* tp_getattr */
1438 1800 0, /* tp_setattr */
1439 1801 0, /* tp_compare */
1440 1802 0, /* tp_repr */
1441 1803 0, /* tp_as_number */
1442 1804 &index_sequence_methods, /* tp_as_sequence */
1443 1805 &index_mapping_methods, /* tp_as_mapping */
1444 1806 0, /* tp_hash */
1445 1807 0, /* tp_call */
1446 1808 0, /* tp_str */
1447 1809 0, /* tp_getattro */
1448 1810 0, /* tp_setattro */
1449 1811 0, /* tp_as_buffer */
1450 1812 Py_TPFLAGS_DEFAULT, /* tp_flags */
1451 1813 "revlog index", /* tp_doc */
1452 1814 0, /* tp_traverse */
1453 1815 0, /* tp_clear */
1454 1816 0, /* tp_richcompare */
1455 1817 0, /* tp_weaklistoffset */
1456 1818 0, /* tp_iter */
1457 1819 0, /* tp_iternext */
1458 1820 index_methods, /* tp_methods */
1459 1821 0, /* tp_members */
1460 1822 index_getset, /* tp_getset */
1461 1823 0, /* tp_base */
1462 1824 0, /* tp_dict */
1463 1825 0, /* tp_descr_get */
1464 1826 0, /* tp_descr_set */
1465 1827 0, /* tp_dictoffset */
1466 1828 (initproc)index_init, /* tp_init */
1467 1829 0, /* tp_alloc */
1468 1830 };
1469 1831
1470 1832 /*
1471 1833 * returns a tuple of the form (index, index, cache) with elements as
1472 1834 * follows:
1473 1835 *
1474 1836 * index: an index object that lazily parses RevlogNG records
1475 1837 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1476 1838 *
1477 1839 * added complications are for backwards compatibility
1478 1840 */
1479 1841 static PyObject *parse_index2(PyObject *self, PyObject *args)
1480 1842 {
1481 1843 PyObject *tuple = NULL, *cache = NULL;
1482 1844 indexObject *idx;
1483 1845 int ret;
1484 1846
1485 1847 idx = PyObject_New(indexObject, &indexType);
1486 1848 if (idx == NULL)
1487 1849 goto bail;
1488 1850
1489 1851 ret = index_init(idx, args);
1490 1852 if (ret == -1)
1491 1853 goto bail;
1492 1854
1493 1855 if (idx->inlined) {
1494 1856 cache = Py_BuildValue("iO", 0, idx->data);
1495 1857 if (cache == NULL)
1496 1858 goto bail;
1497 1859 } else {
1498 1860 cache = Py_None;
1499 1861 Py_INCREF(cache);
1500 1862 }
1501 1863
1502 1864 tuple = Py_BuildValue("NN", idx, cache);
1503 1865 if (!tuple)
1504 1866 goto bail;
1505 1867 return tuple;
1506 1868
1507 1869 bail:
1508 1870 Py_XDECREF(idx);
1509 1871 Py_XDECREF(cache);
1510 1872 Py_XDECREF(tuple);
1511 1873 return NULL;
1512 1874 }
1513 1875
1514 1876 static char parsers_doc[] = "Efficient content parsing.";
1515 1877
1516 1878 PyObject *encodedir(PyObject *self, PyObject *args);
1517 1879 PyObject *pathencode(PyObject *self, PyObject *args);
1518 1880 PyObject *lowerencode(PyObject *self, PyObject *args);
1519 1881
1520 1882 static PyMethodDef methods[] = {
1521 1883 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1522 1884 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1523 1885 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1524 1886 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1525 1887 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1526 1888 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1527 1889 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1528 1890 {NULL, NULL}
1529 1891 };
1530 1892
1531 1893 void dirs_module_init(PyObject *mod);
1532 1894
1533 1895 static void module_init(PyObject *mod)
1534 1896 {
1535 1897 dirs_module_init(mod);
1536 1898
1537 1899 indexType.tp_new = PyType_GenericNew;
1538 1900 if (PyType_Ready(&indexType) < 0)
1539 1901 return;
1540 1902 Py_INCREF(&indexType);
1541 1903
1542 1904 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1543 1905
1544 1906 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1545 1907 -1, -1, -1, -1, nullid, 20);
1546 1908 if (nullentry)
1547 1909 PyObject_GC_UnTrack(nullentry);
1548 1910
1549 1911 dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
1550 1912 }
1551 1913
1552 1914 #ifdef IS_PY3K
1553 1915 static struct PyModuleDef parsers_module = {
1554 1916 PyModuleDef_HEAD_INIT,
1555 1917 "parsers",
1556 1918 parsers_doc,
1557 1919 -1,
1558 1920 methods
1559 1921 };
1560 1922
1561 1923 PyMODINIT_FUNC PyInit_parsers(void)
1562 1924 {
1563 1925 PyObject *mod = PyModule_Create(&parsers_module);
1564 1926 module_init(mod);
1565 1927 return mod;
1566 1928 }
1567 1929 #else
1568 1930 PyMODINIT_FUNC initparsers(void)
1569 1931 {
1570 1932 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1571 1933 module_init(mod);
1572 1934 }
1573 1935 #endif
@@ -1,1345 +1,1340 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, dagutil
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def decompress(bin):
79 79 """ decompress the given input """
80 80 if not bin:
81 81 return bin
82 82 t = bin[0]
83 83 if t == '\0':
84 84 return bin
85 85 if t == 'x':
86 86 try:
87 87 return _decompress(bin)
88 88 except zlib.error, e:
89 89 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 90 if t == 'u':
91 91 return bin[1:]
92 92 raise RevlogError(_("unknown compression type %r") % t)
93 93
94 94 # index v0:
95 95 # 4 bytes: offset
96 96 # 4 bytes: compressed length
97 97 # 4 bytes: base rev
98 98 # 4 bytes: link rev
99 99 # 32 bytes: parent 1 nodeid
100 100 # 32 bytes: parent 2 nodeid
101 101 # 32 bytes: nodeid
102 102 indexformatv0 = ">4l20s20s20s"
103 103 v0shaoffset = 56
104 104
105 105 class revlogoldio(object):
106 106 def __init__(self):
107 107 self.size = struct.calcsize(indexformatv0)
108 108
109 109 def parseindex(self, data, inline):
110 110 s = self.size
111 111 index = []
112 112 nodemap = {nullid: nullrev}
113 113 n = off = 0
114 114 l = len(data)
115 115 while off + s <= l:
116 116 cur = data[off:off + s]
117 117 off += s
118 118 e = _unpack(indexformatv0, cur)
119 119 # transform to revlogv1 format
120 120 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
121 121 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
122 122 index.append(e2)
123 123 nodemap[e[6]] = n
124 124 n += 1
125 125
126 126 # add the magic null revision at -1
127 127 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
128 128
129 129 return index, nodemap, None
130 130
131 131 def packentry(self, entry, node, version, rev):
132 132 if gettype(entry[0]):
133 133 raise RevlogError(_("index entry flags need RevlogNG"))
134 134 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
135 135 node(entry[5]), node(entry[6]), entry[7])
136 136 return _pack(indexformatv0, *e2)
137 137
138 138 # index ng:
139 139 # 6 bytes: offset
140 140 # 2 bytes: flags
141 141 # 4 bytes: compressed length
142 142 # 4 bytes: uncompressed length
143 143 # 4 bytes: base rev
144 144 # 4 bytes: link rev
145 145 # 4 bytes: parent 1 rev
146 146 # 4 bytes: parent 2 rev
147 147 # 32 bytes: nodeid
148 148 indexformatng = ">Qiiiiii20s12x"
149 149 ngshaoffset = 32
150 150 versionformat = ">I"
151 151
152 152 class revlogio(object):
153 153 def __init__(self):
154 154 self.size = struct.calcsize(indexformatng)
155 155
156 156 def parseindex(self, data, inline):
157 157 # call the C implementation to parse the index data
158 158 index, cache = parsers.parse_index2(data, inline)
159 159 return index, getattr(index, 'nodemap', None), cache
160 160
161 161 def packentry(self, entry, node, version, rev):
162 162 p = _pack(indexformatng, *entry)
163 163 if rev == 0:
164 164 p = _pack(versionformat, version) + p[4:]
165 165 return p
166 166
167 167 class revlog(object):
168 168 """
169 169 the underlying revision storage object
170 170
171 171 A revlog consists of two parts, an index and the revision data.
172 172
173 173 The index is a file with a fixed record size containing
174 174 information on each revision, including its nodeid (hash), the
175 175 nodeids of its parents, the position and offset of its data within
176 176 the data file, and the revision it's based on. Finally, each entry
177 177 contains a linkrev entry that can serve as a pointer to external
178 178 data.
179 179
180 180 The revision data itself is a linear collection of data chunks.
181 181 Each chunk represents a revision and is usually represented as a
182 182 delta against the previous chunk. To bound lookup time, runs of
183 183 deltas are limited to about 2 times the length of the original
184 184 version data. This makes retrieval of a version proportional to
185 185 its size, or O(1) relative to the number of revisions.
186 186
187 187 Both pieces of the revlog are written to in an append-only
188 188 fashion, which means we never need to rewrite a file to insert or
189 189 remove data, and can use some simple techniques to avoid the need
190 190 for locking while reading.
191 191 """
192 192 def __init__(self, opener, indexfile):
193 193 """
194 194 create a revlog object
195 195
196 196 opener is a function that abstracts the file opening operation
197 197 and can be used to implement COW semantics or the like.
198 198 """
199 199 self.indexfile = indexfile
200 200 self.datafile = indexfile[:-2] + ".d"
201 201 self.opener = opener
202 202 self._cache = None
203 203 self._basecache = (0, 0)
204 204 self._chunkcache = (0, '')
205 205 self.index = []
206 206 self._pcache = {}
207 207 self._nodecache = {nullid: nullrev}
208 208 self._nodepos = None
209 209
210 210 v = REVLOG_DEFAULT_VERSION
211 211 opts = getattr(opener, 'options', None)
212 212 if opts is not None:
213 213 if 'revlogv1' in opts:
214 214 if 'generaldelta' in opts:
215 215 v |= REVLOGGENERALDELTA
216 216 else:
217 217 v = 0
218 218
219 219 i = ''
220 220 self._initempty = True
221 221 try:
222 222 f = self.opener(self.indexfile)
223 223 i = f.read()
224 224 f.close()
225 225 if len(i) > 0:
226 226 v = struct.unpack(versionformat, i[:4])[0]
227 227 self._initempty = False
228 228 except IOError, inst:
229 229 if inst.errno != errno.ENOENT:
230 230 raise
231 231
232 232 self.version = v
233 233 self._inline = v & REVLOGNGINLINEDATA
234 234 self._generaldelta = v & REVLOGGENERALDELTA
235 235 flags = v & ~0xFFFF
236 236 fmt = v & 0xFFFF
237 237 if fmt == REVLOGV0 and flags:
238 238 raise RevlogError(_("index %s unknown flags %#04x for format v0")
239 239 % (self.indexfile, flags >> 16))
240 240 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
241 241 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
242 242 % (self.indexfile, flags >> 16))
243 243 elif fmt > REVLOGNG:
244 244 raise RevlogError(_("index %s unknown format %d")
245 245 % (self.indexfile, fmt))
246 246
247 247 self._io = revlogio()
248 248 if self.version == REVLOGV0:
249 249 self._io = revlogoldio()
250 250 try:
251 251 d = self._io.parseindex(i, self._inline)
252 252 except (ValueError, IndexError):
253 253 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
254 254 self.index, nodemap, self._chunkcache = d
255 255 if nodemap is not None:
256 256 self.nodemap = self._nodecache = nodemap
257 257 if not self._chunkcache:
258 258 self._chunkclear()
259 259
260 260 def tip(self):
261 261 return self.node(len(self.index) - 2)
262 262 def __len__(self):
263 263 return len(self.index) - 1
264 264 def __iter__(self):
265 265 return iter(xrange(len(self)))
266 266 def revs(self, start=0, stop=None):
267 267 """iterate over all rev in this revlog (from start to stop)"""
268 268 step = 1
269 269 if stop is not None:
270 270 if start > stop:
271 271 step = -1
272 272 stop += step
273 273 else:
274 274 stop = len(self)
275 275 return xrange(start, stop, step)
276 276
277 277 @util.propertycache
278 278 def nodemap(self):
279 279 self.rev(self.node(0))
280 280 return self._nodecache
281 281
282 282 def hasnode(self, node):
283 283 try:
284 284 self.rev(node)
285 285 return True
286 286 except KeyError:
287 287 return False
288 288
289 289 def clearcaches(self):
290 290 try:
291 291 self._nodecache.clearcaches()
292 292 except AttributeError:
293 293 self._nodecache = {nullid: nullrev}
294 294 self._nodepos = None
295 295
296 296 def rev(self, node):
297 297 try:
298 298 return self._nodecache[node]
299 299 except RevlogError:
300 300 # parsers.c radix tree lookup failed
301 301 raise LookupError(node, self.indexfile, _('no node'))
302 302 except KeyError:
303 303 # pure python cache lookup failed
304 304 n = self._nodecache
305 305 i = self.index
306 306 p = self._nodepos
307 307 if p is None:
308 308 p = len(i) - 2
309 309 for r in xrange(p, -1, -1):
310 310 v = i[r][7]
311 311 n[v] = r
312 312 if v == node:
313 313 self._nodepos = r - 1
314 314 return r
315 315 raise LookupError(node, self.indexfile, _('no node'))
316 316
317 317 def node(self, rev):
318 318 return self.index[rev][7]
319 319 def linkrev(self, rev):
320 320 return self.index[rev][4]
321 321 def parents(self, node):
322 322 i = self.index
323 323 d = i[self.rev(node)]
324 324 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
325 325 def parentrevs(self, rev):
326 326 return self.index[rev][5:7]
327 327 def start(self, rev):
328 328 return int(self.index[rev][0] >> 16)
329 329 def end(self, rev):
330 330 return self.start(rev) + self.length(rev)
331 331 def length(self, rev):
332 332 return self.index[rev][1]
333 333 def chainbase(self, rev):
334 334 index = self.index
335 335 base = index[rev][3]
336 336 while base != rev:
337 337 rev = base
338 338 base = index[rev][3]
339 339 return base
340 340 def flags(self, rev):
341 341 return self.index[rev][0] & 0xFFFF
342 342 def rawsize(self, rev):
343 343 """return the length of the uncompressed text for a given revision"""
344 344 l = self.index[rev][2]
345 345 if l >= 0:
346 346 return l
347 347
348 348 t = self.revision(self.node(rev))
349 349 return len(t)
350 350 size = rawsize
351 351
352 352 def ancestors(self, revs, stoprev=0, inclusive=False):
353 353 """Generate the ancestors of 'revs' in reverse topological order.
354 354 Does not generate revs lower than stoprev.
355 355
356 356 See the documentation for ancestor.lazyancestors for more details."""
357 357
358 358 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
359 359 inclusive=inclusive)
360 360
361 361 def descendants(self, revs):
362 362 """Generate the descendants of 'revs' in revision order.
363 363
364 364 Yield a sequence of revision numbers starting with a child of
365 365 some rev in revs, i.e., each revision is *not* considered a
366 366 descendant of itself. Results are ordered by revision number (a
367 367 topological sort)."""
368 368 first = min(revs)
369 369 if first == nullrev:
370 370 for i in self:
371 371 yield i
372 372 return
373 373
374 374 seen = set(revs)
375 375 for i in self.revs(start=first + 1):
376 376 for x in self.parentrevs(i):
377 377 if x != nullrev and x in seen:
378 378 seen.add(i)
379 379 yield i
380 380 break
381 381
382 382 def findcommonmissing(self, common=None, heads=None):
383 383 """Return a tuple of the ancestors of common and the ancestors of heads
384 384 that are not ancestors of common. In revset terminology, we return the
385 385 tuple:
386 386
387 387 ::common, (::heads) - (::common)
388 388
389 389 The list is sorted by revision number, meaning it is
390 390 topologically sorted.
391 391
392 392 'heads' and 'common' are both lists of node IDs. If heads is
393 393 not supplied, uses all of the revlog's heads. If common is not
394 394 supplied, uses nullid."""
395 395 if common is None:
396 396 common = [nullid]
397 397 if heads is None:
398 398 heads = self.heads()
399 399
400 400 common = [self.rev(n) for n in common]
401 401 heads = [self.rev(n) for n in heads]
402 402
403 403 # we want the ancestors, but inclusive
404 404 has = set(self.ancestors(common))
405 405 has.add(nullrev)
406 406 has.update(common)
407 407
408 408 # take all ancestors from heads that aren't in has
409 409 missing = set()
410 410 visit = util.deque(r for r in heads if r not in has)
411 411 while visit:
412 412 r = visit.popleft()
413 413 if r in missing:
414 414 continue
415 415 else:
416 416 missing.add(r)
417 417 for p in self.parentrevs(r):
418 418 if p not in has:
419 419 visit.append(p)
420 420 missing = list(missing)
421 421 missing.sort()
422 422 return has, [self.node(r) for r in missing]
423 423
424 424 def findmissingrevs(self, common=None, heads=None):
425 425 """Return the revision numbers of the ancestors of heads that
426 426 are not ancestors of common.
427 427
428 428 More specifically, return a list of revision numbers corresponding to
429 429 nodes N such that every N satisfies the following constraints:
430 430
431 431 1. N is an ancestor of some node in 'heads'
432 432 2. N is not an ancestor of any node in 'common'
433 433
434 434 The list is sorted by revision number, meaning it is
435 435 topologically sorted.
436 436
437 437 'heads' and 'common' are both lists of revision numbers. If heads is
438 438 not supplied, uses all of the revlog's heads. If common is not
439 439 supplied, uses nullid."""
440 440 if common is None:
441 441 common = [nullrev]
442 442 if heads is None:
443 443 heads = self.headrevs()
444 444
445 445 return ancestor.missingancestors(heads, common, self.parentrevs)
446 446
447 447 def findmissing(self, common=None, heads=None):
448 448 """Return the ancestors of heads that are not ancestors of common.
449 449
450 450 More specifically, return a list of nodes N such that every N
451 451 satisfies the following constraints:
452 452
453 453 1. N is an ancestor of some node in 'heads'
454 454 2. N is not an ancestor of any node in 'common'
455 455
456 456 The list is sorted by revision number, meaning it is
457 457 topologically sorted.
458 458
459 459 'heads' and 'common' are both lists of node IDs. If heads is
460 460 not supplied, uses all of the revlog's heads. If common is not
461 461 supplied, uses nullid."""
462 462 if common is None:
463 463 common = [nullid]
464 464 if heads is None:
465 465 heads = self.heads()
466 466
467 467 common = [self.rev(n) for n in common]
468 468 heads = [self.rev(n) for n in heads]
469 469
470 470 return [self.node(r) for r in
471 471 ancestor.missingancestors(heads, common, self.parentrevs)]
472 472
473 473 def nodesbetween(self, roots=None, heads=None):
474 474 """Return a topological path from 'roots' to 'heads'.
475 475
476 476 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
477 477 topologically sorted list of all nodes N that satisfy both of
478 478 these constraints:
479 479
480 480 1. N is a descendant of some node in 'roots'
481 481 2. N is an ancestor of some node in 'heads'
482 482
483 483 Every node is considered to be both a descendant and an ancestor
484 484 of itself, so every reachable node in 'roots' and 'heads' will be
485 485 included in 'nodes'.
486 486
487 487 'outroots' is the list of reachable nodes in 'roots', i.e., the
488 488 subset of 'roots' that is returned in 'nodes'. Likewise,
489 489 'outheads' is the subset of 'heads' that is also in 'nodes'.
490 490
491 491 'roots' and 'heads' are both lists of node IDs. If 'roots' is
492 492 unspecified, uses nullid as the only root. If 'heads' is
493 493 unspecified, uses list of all of the revlog's heads."""
494 494 nonodes = ([], [], [])
495 495 if roots is not None:
496 496 roots = list(roots)
497 497 if not roots:
498 498 return nonodes
499 499 lowestrev = min([self.rev(n) for n in roots])
500 500 else:
501 501 roots = [nullid] # Everybody's a descendant of nullid
502 502 lowestrev = nullrev
503 503 if (lowestrev == nullrev) and (heads is None):
504 504 # We want _all_ the nodes!
505 505 return ([self.node(r) for r in self], [nullid], list(self.heads()))
506 506 if heads is None:
507 507 # All nodes are ancestors, so the latest ancestor is the last
508 508 # node.
509 509 highestrev = len(self) - 1
510 510 # Set ancestors to None to signal that every node is an ancestor.
511 511 ancestors = None
512 512 # Set heads to an empty dictionary for later discovery of heads
513 513 heads = {}
514 514 else:
515 515 heads = list(heads)
516 516 if not heads:
517 517 return nonodes
518 518 ancestors = set()
519 519 # Turn heads into a dictionary so we can remove 'fake' heads.
520 520 # Also, later we will be using it to filter out the heads we can't
521 521 # find from roots.
522 522 heads = dict.fromkeys(heads, False)
523 523 # Start at the top and keep marking parents until we're done.
524 524 nodestotag = set(heads)
525 525 # Remember where the top was so we can use it as a limit later.
526 526 highestrev = max([self.rev(n) for n in nodestotag])
527 527 while nodestotag:
528 528 # grab a node to tag
529 529 n = nodestotag.pop()
530 530 # Never tag nullid
531 531 if n == nullid:
532 532 continue
533 533 # A node's revision number represents its place in a
534 534 # topologically sorted list of nodes.
535 535 r = self.rev(n)
536 536 if r >= lowestrev:
537 537 if n not in ancestors:
538 538 # If we are possibly a descendant of one of the roots
539 539 # and we haven't already been marked as an ancestor
540 540 ancestors.add(n) # Mark as ancestor
541 541 # Add non-nullid parents to list of nodes to tag.
542 542 nodestotag.update([p for p in self.parents(n) if
543 543 p != nullid])
544 544 elif n in heads: # We've seen it before, is it a fake head?
545 545 # So it is, real heads should not be the ancestors of
546 546 # any other heads.
547 547 heads.pop(n)
548 548 if not ancestors:
549 549 return nonodes
550 550 # Now that we have our set of ancestors, we want to remove any
551 551 # roots that are not ancestors.
552 552
553 553 # If one of the roots was nullid, everything is included anyway.
554 554 if lowestrev > nullrev:
555 555 # But, since we weren't, let's recompute the lowest rev to not
556 556 # include roots that aren't ancestors.
557 557
558 558 # Filter out roots that aren't ancestors of heads
559 559 roots = [n for n in roots if n in ancestors]
560 560 # Recompute the lowest revision
561 561 if roots:
562 562 lowestrev = min([self.rev(n) for n in roots])
563 563 else:
564 564 # No more roots? Return empty list
565 565 return nonodes
566 566 else:
567 567 # We are descending from nullid, and don't need to care about
568 568 # any other roots.
569 569 lowestrev = nullrev
570 570 roots = [nullid]
571 571 # Transform our roots list into a set.
572 572 descendants = set(roots)
573 573 # Also, keep the original roots so we can filter out roots that aren't
574 574 # 'real' roots (i.e. are descended from other roots).
575 575 roots = descendants.copy()
576 576 # Our topologically sorted list of output nodes.
577 577 orderedout = []
578 578 # Don't start at nullid since we don't want nullid in our output list,
579 579 # and if nullid shows up in descendants, empty parents will look like
580 580 # they're descendants.
581 581 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
582 582 n = self.node(r)
583 583 isdescendant = False
584 584 if lowestrev == nullrev: # Everybody is a descendant of nullid
585 585 isdescendant = True
586 586 elif n in descendants:
587 587 # n is already a descendant
588 588 isdescendant = True
589 589 # This check only needs to be done here because all the roots
590 590 # will start being marked is descendants before the loop.
591 591 if n in roots:
592 592 # If n was a root, check if it's a 'real' root.
593 593 p = tuple(self.parents(n))
594 594 # If any of its parents are descendants, it's not a root.
595 595 if (p[0] in descendants) or (p[1] in descendants):
596 596 roots.remove(n)
597 597 else:
598 598 p = tuple(self.parents(n))
599 599 # A node is a descendant if either of its parents are
600 600 # descendants. (We seeded the dependents list with the roots
601 601 # up there, remember?)
602 602 if (p[0] in descendants) or (p[1] in descendants):
603 603 descendants.add(n)
604 604 isdescendant = True
605 605 if isdescendant and ((ancestors is None) or (n in ancestors)):
606 606 # Only include nodes that are both descendants and ancestors.
607 607 orderedout.append(n)
608 608 if (ancestors is not None) and (n in heads):
609 609 # We're trying to figure out which heads are reachable
610 610 # from roots.
611 611 # Mark this head as having been reached
612 612 heads[n] = True
613 613 elif ancestors is None:
614 614 # Otherwise, we're trying to discover the heads.
615 615 # Assume this is a head because if it isn't, the next step
616 616 # will eventually remove it.
617 617 heads[n] = True
618 618 # But, obviously its parents aren't.
619 619 for p in self.parents(n):
620 620 heads.pop(p, None)
621 621 heads = [n for n, flag in heads.iteritems() if flag]
622 622 roots = list(roots)
623 623 assert orderedout
624 624 assert roots
625 625 assert heads
626 626 return (orderedout, roots, heads)
627 627
628 628 def headrevs(self):
629 629 try:
630 630 return self.index.headrevs()
631 631 except AttributeError:
632 632 return self._headrevs()
633 633
634 634 def _headrevs(self):
635 635 count = len(self)
636 636 if not count:
637 637 return [nullrev]
638 638 # we won't iter over filtered rev so nobody is a head at start
639 639 ishead = [0] * (count + 1)
640 640 index = self.index
641 641 for r in self:
642 642 ishead[r] = 1 # I may be an head
643 643 e = index[r]
644 644 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
645 645 return [r for r, val in enumerate(ishead) if val]
646 646
647 647 def heads(self, start=None, stop=None):
648 648 """return the list of all nodes that have no children
649 649
650 650 if start is specified, only heads that are descendants of
651 651 start will be returned
652 652 if stop is specified, it will consider all the revs from stop
653 653 as if they had no children
654 654 """
655 655 if start is None and stop is None:
656 656 if not len(self):
657 657 return [nullid]
658 658 return [self.node(r) for r in self.headrevs()]
659 659
660 660 if start is None:
661 661 start = nullid
662 662 if stop is None:
663 663 stop = []
664 664 stoprevs = set([self.rev(n) for n in stop])
665 665 startrev = self.rev(start)
666 666 reachable = set((startrev,))
667 667 heads = set((startrev,))
668 668
669 669 parentrevs = self.parentrevs
670 670 for r in self.revs(start=startrev + 1):
671 671 for p in parentrevs(r):
672 672 if p in reachable:
673 673 if r not in stoprevs:
674 674 reachable.add(r)
675 675 heads.add(r)
676 676 if p in heads and p not in stoprevs:
677 677 heads.remove(p)
678 678
679 679 return [self.node(r) for r in heads]
680 680
681 681 def children(self, node):
682 682 """find the children of a given node"""
683 683 c = []
684 684 p = self.rev(node)
685 685 for r in self.revs(start=p + 1):
686 686 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
687 687 if prevs:
688 688 for pr in prevs:
689 689 if pr == p:
690 690 c.append(self.node(r))
691 691 elif p == nullrev:
692 692 c.append(self.node(r))
693 693 return c
694 694
695 695 def descendant(self, start, end):
696 696 if start == nullrev:
697 697 return True
698 698 for i in self.descendants([start]):
699 699 if i == end:
700 700 return True
701 701 elif i > end:
702 702 break
703 703 return False
704 704
705 705 def ancestor(self, a, b):
706 706 """calculate the least common ancestor of nodes a and b"""
707 707
708 # fast path, check if it is a descendant
709 708 a, b = self.rev(a), self.rev(b)
710 start, end = sorted((a, b))
711 if self.descendant(start, end):
712 return self.node(start)
713
714 def parents(rev):
715 return [p for p in self.parentrevs(rev) if p != nullrev]
716
717 c = ancestor.ancestor(a, b, parents)
718 if c is None:
719 return nullid
720
721 return self.node(c)
709 try:
710 ancs = self.index.ancestors(a, b)
711 except (AttributeError, OverflowError):
712 ancs = ancestor.ancestors(self.parentrevs, a, b)
713 if ancs:
714 # choose a consistent winner when there's a tie
715 return min(map(self.node, ancs))
716 return nullid
722 717
723 718 def _match(self, id):
724 719 if isinstance(id, int):
725 720 # rev
726 721 return self.node(id)
727 722 if len(id) == 20:
728 723 # possibly a binary node
729 724 # odds of a binary node being all hex in ASCII are 1 in 10**25
730 725 try:
731 726 node = id
732 727 self.rev(node) # quick search the index
733 728 return node
734 729 except LookupError:
735 730 pass # may be partial hex id
736 731 try:
737 732 # str(rev)
738 733 rev = int(id)
739 734 if str(rev) != id:
740 735 raise ValueError
741 736 if rev < 0:
742 737 rev = len(self) + rev
743 738 if rev < 0 or rev >= len(self):
744 739 raise ValueError
745 740 return self.node(rev)
746 741 except (ValueError, OverflowError):
747 742 pass
748 743 if len(id) == 40:
749 744 try:
750 745 # a full hex nodeid?
751 746 node = bin(id)
752 747 self.rev(node)
753 748 return node
754 749 except (TypeError, LookupError):
755 750 pass
756 751
757 752 def _partialmatch(self, id):
758 753 try:
759 754 return self.index.partialmatch(id)
760 755 except RevlogError:
761 756 # parsers.c radix tree lookup gave multiple matches
762 757 raise LookupError(id, self.indexfile, _("ambiguous identifier"))
763 758 except (AttributeError, ValueError):
764 759 # we are pure python, or key was too short to search radix tree
765 760 pass
766 761
767 762 if id in self._pcache:
768 763 return self._pcache[id]
769 764
770 765 if len(id) < 40:
771 766 try:
772 767 # hex(node)[:...]
773 768 l = len(id) // 2 # grab an even number of digits
774 769 prefix = bin(id[:l * 2])
775 770 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
776 771 nl = [n for n in nl if hex(n).startswith(id)]
777 772 if len(nl) > 0:
778 773 if len(nl) == 1:
779 774 self._pcache[id] = nl[0]
780 775 return nl[0]
781 776 raise LookupError(id, self.indexfile,
782 777 _('ambiguous identifier'))
783 778 return None
784 779 except TypeError:
785 780 pass
786 781
787 782 def lookup(self, id):
788 783 """locate a node based on:
789 784 - revision number or str(revision number)
790 785 - nodeid or subset of hex nodeid
791 786 """
792 787 n = self._match(id)
793 788 if n is not None:
794 789 return n
795 790 n = self._partialmatch(id)
796 791 if n:
797 792 return n
798 793
799 794 raise LookupError(id, self.indexfile, _('no match found'))
800 795
801 796 def cmp(self, node, text):
802 797 """compare text with a given file revision
803 798
804 799 returns True if text is different than what is stored.
805 800 """
806 801 p1, p2 = self.parents(node)
807 802 return hash(text, p1, p2) != node
808 803
809 804 def _addchunk(self, offset, data):
810 805 o, d = self._chunkcache
811 806 # try to add to existing cache
812 807 if o + len(d) == offset and len(d) + len(data) < _chunksize:
813 808 self._chunkcache = o, d + data
814 809 else:
815 810 self._chunkcache = offset, data
816 811
817 812 def _loadchunk(self, offset, length):
818 813 if self._inline:
819 814 df = self.opener(self.indexfile)
820 815 else:
821 816 df = self.opener(self.datafile)
822 817
823 818 readahead = max(65536, length)
824 819 df.seek(offset)
825 820 d = df.read(readahead)
826 821 df.close()
827 822 self._addchunk(offset, d)
828 823 if readahead > length:
829 824 return util.buffer(d, 0, length)
830 825 return d
831 826
832 827 def _getchunk(self, offset, length):
833 828 o, d = self._chunkcache
834 829 l = len(d)
835 830
836 831 # is it in the cache?
837 832 cachestart = offset - o
838 833 cacheend = cachestart + length
839 834 if cachestart >= 0 and cacheend <= l:
840 835 if cachestart == 0 and cacheend == l:
841 836 return d # avoid a copy
842 837 return util.buffer(d, cachestart, cacheend - cachestart)
843 838
844 839 return self._loadchunk(offset, length)
845 840
846 841 def _chunkraw(self, startrev, endrev):
847 842 start = self.start(startrev)
848 843 length = self.end(endrev) - start
849 844 if self._inline:
850 845 start += (startrev + 1) * self._io.size
851 846 return self._getchunk(start, length)
852 847
853 848 def _chunk(self, rev):
854 849 return decompress(self._chunkraw(rev, rev))
855 850
856 851 def _chunkbase(self, rev):
857 852 return self._chunk(rev)
858 853
859 854 def _chunkclear(self):
860 855 self._chunkcache = (0, '')
861 856
862 857 def deltaparent(self, rev):
863 858 """return deltaparent of the given revision"""
864 859 base = self.index[rev][3]
865 860 if base == rev:
866 861 return nullrev
867 862 elif self._generaldelta:
868 863 return base
869 864 else:
870 865 return rev - 1
871 866
872 867 def revdiff(self, rev1, rev2):
873 868 """return or calculate a delta between two revisions"""
874 869 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
875 870 return str(self._chunk(rev2))
876 871
877 872 return mdiff.textdiff(self.revision(rev1),
878 873 self.revision(rev2))
879 874
880 875 def revision(self, nodeorrev):
881 876 """return an uncompressed revision of a given node or revision
882 877 number.
883 878 """
884 879 if isinstance(nodeorrev, int):
885 880 rev = nodeorrev
886 881 node = self.node(rev)
887 882 else:
888 883 node = nodeorrev
889 884 rev = None
890 885
891 886 cachedrev = None
892 887 if node == nullid:
893 888 return ""
894 889 if self._cache:
895 890 if self._cache[0] == node:
896 891 return self._cache[2]
897 892 cachedrev = self._cache[1]
898 893
899 894 # look up what we need to read
900 895 text = None
901 896 if rev is None:
902 897 rev = self.rev(node)
903 898
904 899 # check rev flags
905 900 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
906 901 raise RevlogError(_('incompatible revision flag %x') %
907 902 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
908 903
909 904 # build delta chain
910 905 chain = []
911 906 index = self.index # for performance
912 907 generaldelta = self._generaldelta
913 908 iterrev = rev
914 909 e = index[iterrev]
915 910 while iterrev != e[3] and iterrev != cachedrev:
916 911 chain.append(iterrev)
917 912 if generaldelta:
918 913 iterrev = e[3]
919 914 else:
920 915 iterrev -= 1
921 916 e = index[iterrev]
922 917 chain.reverse()
923 918 base = iterrev
924 919
925 920 if iterrev == cachedrev:
926 921 # cache hit
927 922 text = self._cache[2]
928 923
929 924 # drop cache to save memory
930 925 self._cache = None
931 926
932 927 self._chunkraw(base, rev)
933 928 if text is None:
934 929 text = str(self._chunkbase(base))
935 930
936 931 bins = [self._chunk(r) for r in chain]
937 932 text = mdiff.patches(text, bins)
938 933
939 934 text = self._checkhash(text, node, rev)
940 935
941 936 self._cache = (node, rev, text)
942 937 return text
943 938
944 939 def _checkhash(self, text, node, rev):
945 940 p1, p2 = self.parents(node)
946 941 if node != hash(text, p1, p2):
947 942 raise RevlogError(_("integrity check failed on %s:%d")
948 943 % (self.indexfile, rev))
949 944 return text
950 945
951 946 def checkinlinesize(self, tr, fp=None):
952 947 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
953 948 return
954 949
955 950 trinfo = tr.find(self.indexfile)
956 951 if trinfo is None:
957 952 raise RevlogError(_("%s not found in the transaction")
958 953 % self.indexfile)
959 954
960 955 trindex = trinfo[2]
961 956 dataoff = self.start(trindex)
962 957
963 958 tr.add(self.datafile, dataoff)
964 959
965 960 if fp:
966 961 fp.flush()
967 962 fp.close()
968 963
969 964 df = self.opener(self.datafile, 'w')
970 965 try:
971 966 for r in self:
972 967 df.write(self._chunkraw(r, r))
973 968 finally:
974 969 df.close()
975 970
976 971 fp = self.opener(self.indexfile, 'w', atomictemp=True)
977 972 self.version &= ~(REVLOGNGINLINEDATA)
978 973 self._inline = False
979 974 for i in self:
980 975 e = self._io.packentry(self.index[i], self.node, self.version, i)
981 976 fp.write(e)
982 977
983 978 # if we don't call close, the temp file will never replace the
984 979 # real index
985 980 fp.close()
986 981
987 982 tr.replace(self.indexfile, trindex * self._io.size)
988 983 self._chunkclear()
989 984
990 985 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
991 986 """add a revision to the log
992 987
993 988 text - the revision data to add
994 989 transaction - the transaction object used for rollback
995 990 link - the linkrev data to add
996 991 p1, p2 - the parent nodeids of the revision
997 992 cachedelta - an optional precomputed delta
998 993 """
999 994 node = hash(text, p1, p2)
1000 995 if node in self.nodemap:
1001 996 return node
1002 997
1003 998 dfh = None
1004 999 if not self._inline:
1005 1000 dfh = self.opener(self.datafile, "a")
1006 1001 ifh = self.opener(self.indexfile, "a+")
1007 1002 try:
1008 1003 return self._addrevision(node, text, transaction, link, p1, p2,
1009 1004 cachedelta, ifh, dfh)
1010 1005 finally:
1011 1006 if dfh:
1012 1007 dfh.close()
1013 1008 ifh.close()
1014 1009
1015 1010 def compress(self, text):
1016 1011 """ generate a possibly-compressed representation of text """
1017 1012 if not text:
1018 1013 return ("", text)
1019 1014 l = len(text)
1020 1015 bin = None
1021 1016 if l < 44:
1022 1017 pass
1023 1018 elif l > 1000000:
1024 1019 # zlib makes an internal copy, thus doubling memory usage for
1025 1020 # large files, so lets do this in pieces
1026 1021 z = zlib.compressobj()
1027 1022 p = []
1028 1023 pos = 0
1029 1024 while pos < l:
1030 1025 pos2 = pos + 2**20
1031 1026 p.append(z.compress(text[pos:pos2]))
1032 1027 pos = pos2
1033 1028 p.append(z.flush())
1034 1029 if sum(map(len, p)) < l:
1035 1030 bin = "".join(p)
1036 1031 else:
1037 1032 bin = _compress(text)
1038 1033 if bin is None or len(bin) > l:
1039 1034 if text[0] == '\0':
1040 1035 return ("", text)
1041 1036 return ('u', text)
1042 1037 return ("", bin)
1043 1038
1044 1039 def _addrevision(self, node, text, transaction, link, p1, p2,
1045 1040 cachedelta, ifh, dfh):
1046 1041 """internal function to add revisions to the log
1047 1042
1048 1043 see addrevision for argument descriptions.
1049 1044 invariants:
1050 1045 - text is optional (can be None); if not set, cachedelta must be set.
1051 1046 if both are set, they must correspond to each other.
1052 1047 """
1053 1048 btext = [text]
1054 1049 def buildtext():
1055 1050 if btext[0] is not None:
1056 1051 return btext[0]
1057 1052 # flush any pending writes here so we can read it in revision
1058 1053 if dfh:
1059 1054 dfh.flush()
1060 1055 ifh.flush()
1061 1056 basetext = self.revision(self.node(cachedelta[0]))
1062 1057 btext[0] = mdiff.patch(basetext, cachedelta[1])
1063 1058 chk = hash(btext[0], p1, p2)
1064 1059 if chk != node:
1065 1060 raise RevlogError(_("consistency error in delta"))
1066 1061 return btext[0]
1067 1062
1068 1063 def builddelta(rev):
1069 1064 # can we use the cached delta?
1070 1065 if cachedelta and cachedelta[0] == rev:
1071 1066 delta = cachedelta[1]
1072 1067 else:
1073 1068 t = buildtext()
1074 1069 ptext = self.revision(self.node(rev))
1075 1070 delta = mdiff.textdiff(ptext, t)
1076 1071 data = self.compress(delta)
1077 1072 l = len(data[1]) + len(data[0])
1078 1073 if basecache[0] == rev:
1079 1074 chainbase = basecache[1]
1080 1075 else:
1081 1076 chainbase = self.chainbase(rev)
1082 1077 dist = l + offset - self.start(chainbase)
1083 1078 if self._generaldelta:
1084 1079 base = rev
1085 1080 else:
1086 1081 base = chainbase
1087 1082 return dist, l, data, base, chainbase
1088 1083
1089 1084 curr = len(self)
1090 1085 prev = curr - 1
1091 1086 base = chainbase = curr
1092 1087 offset = self.end(prev)
1093 1088 flags = 0
1094 1089 d = None
1095 1090 basecache = self._basecache
1096 1091 p1r, p2r = self.rev(p1), self.rev(p2)
1097 1092
1098 1093 # should we try to build a delta?
1099 1094 if prev != nullrev:
1100 1095 if self._generaldelta:
1101 1096 if p1r >= basecache[1]:
1102 1097 d = builddelta(p1r)
1103 1098 elif p2r >= basecache[1]:
1104 1099 d = builddelta(p2r)
1105 1100 else:
1106 1101 d = builddelta(prev)
1107 1102 else:
1108 1103 d = builddelta(prev)
1109 1104 dist, l, data, base, chainbase = d
1110 1105
1111 1106 # full versions are inserted when the needed deltas
1112 1107 # become comparable to the uncompressed text
1113 1108 if text is None:
1114 1109 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1115 1110 cachedelta[1])
1116 1111 else:
1117 1112 textlen = len(text)
1118 1113 if d is None or dist > textlen * 2:
1119 1114 text = buildtext()
1120 1115 data = self.compress(text)
1121 1116 l = len(data[1]) + len(data[0])
1122 1117 base = chainbase = curr
1123 1118
1124 1119 e = (offset_type(offset, flags), l, textlen,
1125 1120 base, link, p1r, p2r, node)
1126 1121 self.index.insert(-1, e)
1127 1122 self.nodemap[node] = curr
1128 1123
1129 1124 entry = self._io.packentry(e, self.node, self.version, curr)
1130 1125 if not self._inline:
1131 1126 transaction.add(self.datafile, offset)
1132 1127 transaction.add(self.indexfile, curr * len(entry))
1133 1128 if data[0]:
1134 1129 dfh.write(data[0])
1135 1130 dfh.write(data[1])
1136 1131 dfh.flush()
1137 1132 ifh.write(entry)
1138 1133 else:
1139 1134 offset += curr * self._io.size
1140 1135 transaction.add(self.indexfile, offset, curr)
1141 1136 ifh.write(entry)
1142 1137 ifh.write(data[0])
1143 1138 ifh.write(data[1])
1144 1139 self.checkinlinesize(transaction, ifh)
1145 1140
1146 1141 if type(text) == str: # only accept immutable objects
1147 1142 self._cache = (node, curr, text)
1148 1143 self._basecache = (curr, chainbase)
1149 1144 return node
1150 1145
1151 1146 def group(self, nodelist, bundler, reorder=None):
1152 1147 """Calculate a delta group, yielding a sequence of changegroup chunks
1153 1148 (strings).
1154 1149
1155 1150 Given a list of changeset revs, return a set of deltas and
1156 1151 metadata corresponding to nodes. The first delta is
1157 1152 first parent(nodelist[0]) -> nodelist[0], the receiver is
1158 1153 guaranteed to have this parent as it has all history before
1159 1154 these changesets. In the case firstparent is nullrev the
1160 1155 changegroup starts with a full revision.
1161 1156 """
1162 1157
1163 1158 # if we don't have any revisions touched by these changesets, bail
1164 1159 if len(nodelist) == 0:
1165 1160 yield bundler.close()
1166 1161 return
1167 1162
1168 1163 # for generaldelta revlogs, we linearize the revs; this will both be
1169 1164 # much quicker and generate a much smaller bundle
1170 1165 if (self._generaldelta and reorder is not False) or reorder:
1171 1166 dag = dagutil.revlogdag(self)
1172 1167 revs = set(self.rev(n) for n in nodelist)
1173 1168 revs = dag.linearize(revs)
1174 1169 else:
1175 1170 revs = sorted([self.rev(n) for n in nodelist])
1176 1171
1177 1172 # add the parent of the first rev
1178 1173 p = self.parentrevs(revs[0])[0]
1179 1174 revs.insert(0, p)
1180 1175
1181 1176 # build deltas
1182 1177 for r in xrange(len(revs) - 1):
1183 1178 prev, curr = revs[r], revs[r + 1]
1184 1179 for c in bundler.revchunk(self, curr, prev):
1185 1180 yield c
1186 1181
1187 1182 yield bundler.close()
1188 1183
1189 1184 def addgroup(self, bundle, linkmapper, transaction):
1190 1185 """
1191 1186 add a delta group
1192 1187
1193 1188 given a set of deltas, add them to the revision log. the
1194 1189 first delta is against its parent, which should be in our
1195 1190 log, the rest are against the previous delta.
1196 1191 """
1197 1192
1198 1193 # track the base of the current delta log
1199 1194 content = []
1200 1195 node = None
1201 1196
1202 1197 r = len(self)
1203 1198 end = 0
1204 1199 if r:
1205 1200 end = self.end(r - 1)
1206 1201 ifh = self.opener(self.indexfile, "a+")
1207 1202 isize = r * self._io.size
1208 1203 if self._inline:
1209 1204 transaction.add(self.indexfile, end + isize, r)
1210 1205 dfh = None
1211 1206 else:
1212 1207 transaction.add(self.indexfile, isize, r)
1213 1208 transaction.add(self.datafile, end)
1214 1209 dfh = self.opener(self.datafile, "a")
1215 1210
1216 1211 try:
1217 1212 # loop through our set of deltas
1218 1213 chain = None
1219 1214 while True:
1220 1215 chunkdata = bundle.deltachunk(chain)
1221 1216 if not chunkdata:
1222 1217 break
1223 1218 node = chunkdata['node']
1224 1219 p1 = chunkdata['p1']
1225 1220 p2 = chunkdata['p2']
1226 1221 cs = chunkdata['cs']
1227 1222 deltabase = chunkdata['deltabase']
1228 1223 delta = chunkdata['delta']
1229 1224
1230 1225 content.append(node)
1231 1226
1232 1227 link = linkmapper(cs)
1233 1228 if node in self.nodemap:
1234 1229 # this can happen if two branches make the same change
1235 1230 chain = node
1236 1231 continue
1237 1232
1238 1233 for p in (p1, p2):
1239 1234 if p not in self.nodemap:
1240 1235 raise LookupError(p, self.indexfile,
1241 1236 _('unknown parent'))
1242 1237
1243 1238 if deltabase not in self.nodemap:
1244 1239 raise LookupError(deltabase, self.indexfile,
1245 1240 _('unknown delta base'))
1246 1241
1247 1242 baserev = self.rev(deltabase)
1248 1243 chain = self._addrevision(node, None, transaction, link,
1249 1244 p1, p2, (baserev, delta), ifh, dfh)
1250 1245 if not dfh and not self._inline:
1251 1246 # addrevision switched from inline to conventional
1252 1247 # reopen the index
1253 1248 ifh.close()
1254 1249 dfh = self.opener(self.datafile, "a")
1255 1250 ifh = self.opener(self.indexfile, "a")
1256 1251 finally:
1257 1252 if dfh:
1258 1253 dfh.close()
1259 1254 ifh.close()
1260 1255
1261 1256 return content
1262 1257
1263 1258 def strip(self, minlink, transaction):
1264 1259 """truncate the revlog on the first revision with a linkrev >= minlink
1265 1260
1266 1261 This function is called when we're stripping revision minlink and
1267 1262 its descendants from the repository.
1268 1263
1269 1264 We have to remove all revisions with linkrev >= minlink, because
1270 1265 the equivalent changelog revisions will be renumbered after the
1271 1266 strip.
1272 1267
1273 1268 So we truncate the revlog on the first of these revisions, and
1274 1269 trust that the caller has saved the revisions that shouldn't be
1275 1270 removed and that it'll re-add them after this truncation.
1276 1271 """
1277 1272 if len(self) == 0:
1278 1273 return
1279 1274
1280 1275 for rev in self:
1281 1276 if self.index[rev][4] >= minlink:
1282 1277 break
1283 1278 else:
1284 1279 return
1285 1280
1286 1281 # first truncate the files on disk
1287 1282 end = self.start(rev)
1288 1283 if not self._inline:
1289 1284 transaction.add(self.datafile, end)
1290 1285 end = rev * self._io.size
1291 1286 else:
1292 1287 end += rev * self._io.size
1293 1288
1294 1289 transaction.add(self.indexfile, end)
1295 1290
1296 1291 # then reset internal state in memory to forget those revisions
1297 1292 self._cache = None
1298 1293 self._chunkclear()
1299 1294 for x in xrange(rev, len(self)):
1300 1295 del self.nodemap[self.node(x)]
1301 1296
1302 1297 del self.index[rev:-1]
1303 1298
1304 1299 def checksize(self):
1305 1300 expected = 0
1306 1301 if len(self):
1307 1302 expected = max(0, self.end(len(self) - 1))
1308 1303
1309 1304 try:
1310 1305 f = self.opener(self.datafile)
1311 1306 f.seek(0, 2)
1312 1307 actual = f.tell()
1313 1308 f.close()
1314 1309 dd = actual - expected
1315 1310 except IOError, inst:
1316 1311 if inst.errno != errno.ENOENT:
1317 1312 raise
1318 1313 dd = 0
1319 1314
1320 1315 try:
1321 1316 f = self.opener(self.indexfile)
1322 1317 f.seek(0, 2)
1323 1318 actual = f.tell()
1324 1319 f.close()
1325 1320 s = self._io.size
1326 1321 i = max(0, actual // s)
1327 1322 di = actual - (i * s)
1328 1323 if self._inline:
1329 1324 databytes = 0
1330 1325 for r in self:
1331 1326 databytes += max(0, self.length(r))
1332 1327 dd = 0
1333 1328 di = actual - len(self) * s - databytes
1334 1329 except IOError, inst:
1335 1330 if inst.errno != errno.ENOENT:
1336 1331 raise
1337 1332 di = 0
1338 1333
1339 1334 return (dd, di)
1340 1335
1341 1336 def files(self):
1342 1337 res = [self.indexfile]
1343 1338 if not self._inline:
1344 1339 res.append(self.datafile)
1345 1340 return res
General Comments 0
You need to be logged in to leave comments. Login now