##// END OF EJS Templates
py3: use bytearray() instead of array('c', ...) constructions...
Augie Fackler -
r31346:2a18e9e6 default
parent child Browse files
Show More
@@ -1,522 +1,520 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import array
10 import array
11 import struct
11 import struct
12
12
13 from .node import (
13 from .node import (
14 bin,
14 bin,
15 hex,
15 hex,
16 nullid,
16 nullid,
17 nullrev,
17 nullrev,
18 )
18 )
19 from . import (
19 from . import (
20 encoding,
20 encoding,
21 error,
21 error,
22 scmutil,
22 scmutil,
23 util,
23 util,
24 )
24 )
25
25
26 array = array.array
26 array = array.array
27 calcsize = struct.calcsize
27 calcsize = struct.calcsize
28 pack = struct.pack
28 pack = struct.pack
29 unpack = struct.unpack
29 unpack = struct.unpack
30
30
31 def _filename(repo):
31 def _filename(repo):
32 """name of a branchcache file for a given repo or repoview"""
32 """name of a branchcache file for a given repo or repoview"""
33 filename = "cache/branch2"
33 filename = "cache/branch2"
34 if repo.filtername:
34 if repo.filtername:
35 filename = '%s-%s' % (filename, repo.filtername)
35 filename = '%s-%s' % (filename, repo.filtername)
36 return filename
36 return filename
37
37
38 def read(repo):
38 def read(repo):
39 try:
39 try:
40 f = repo.vfs(_filename(repo))
40 f = repo.vfs(_filename(repo))
41 lines = f.read().split('\n')
41 lines = f.read().split('\n')
42 f.close()
42 f.close()
43 except (IOError, OSError):
43 except (IOError, OSError):
44 return None
44 return None
45
45
46 try:
46 try:
47 cachekey = lines.pop(0).split(" ", 2)
47 cachekey = lines.pop(0).split(" ", 2)
48 last, lrev = cachekey[:2]
48 last, lrev = cachekey[:2]
49 last, lrev = bin(last), int(lrev)
49 last, lrev = bin(last), int(lrev)
50 filteredhash = None
50 filteredhash = None
51 if len(cachekey) > 2:
51 if len(cachekey) > 2:
52 filteredhash = bin(cachekey[2])
52 filteredhash = bin(cachekey[2])
53 partial = branchcache(tipnode=last, tiprev=lrev,
53 partial = branchcache(tipnode=last, tiprev=lrev,
54 filteredhash=filteredhash)
54 filteredhash=filteredhash)
55 if not partial.validfor(repo):
55 if not partial.validfor(repo):
56 # invalidate the cache
56 # invalidate the cache
57 raise ValueError('tip differs')
57 raise ValueError('tip differs')
58 cl = repo.changelog
58 cl = repo.changelog
59 for l in lines:
59 for l in lines:
60 if not l:
60 if not l:
61 continue
61 continue
62 node, state, label = l.split(" ", 2)
62 node, state, label = l.split(" ", 2)
63 if state not in 'oc':
63 if state not in 'oc':
64 raise ValueError('invalid branch state')
64 raise ValueError('invalid branch state')
65 label = encoding.tolocal(label.strip())
65 label = encoding.tolocal(label.strip())
66 node = bin(node)
66 node = bin(node)
67 if not cl.hasnode(node):
67 if not cl.hasnode(node):
68 raise ValueError('node %s does not exist' % hex(node))
68 raise ValueError('node %s does not exist' % hex(node))
69 partial.setdefault(label, []).append(node)
69 partial.setdefault(label, []).append(node)
70 if state == 'c':
70 if state == 'c':
71 partial._closednodes.add(node)
71 partial._closednodes.add(node)
72 except KeyboardInterrupt:
72 except KeyboardInterrupt:
73 raise
73 raise
74 except Exception as inst:
74 except Exception as inst:
75 if repo.ui.debugflag:
75 if repo.ui.debugflag:
76 msg = 'invalid branchheads cache'
76 msg = 'invalid branchheads cache'
77 if repo.filtername is not None:
77 if repo.filtername is not None:
78 msg += ' (%s)' % repo.filtername
78 msg += ' (%s)' % repo.filtername
79 msg += ': %s\n'
79 msg += ': %s\n'
80 repo.ui.debug(msg % inst)
80 repo.ui.debug(msg % inst)
81 partial = None
81 partial = None
82 return partial
82 return partial
83
83
84 ### Nearest subset relation
84 ### Nearest subset relation
85 # Nearest subset of filter X is a filter Y so that:
85 # Nearest subset of filter X is a filter Y so that:
86 # * Y is included in X,
86 # * Y is included in X,
87 # * X - Y is as small as possible.
87 # * X - Y is as small as possible.
88 # This create and ordering used for branchmap purpose.
88 # This create and ordering used for branchmap purpose.
89 # the ordering may be partial
89 # the ordering may be partial
90 subsettable = {None: 'visible',
90 subsettable = {None: 'visible',
91 'visible': 'served',
91 'visible': 'served',
92 'served': 'immutable',
92 'served': 'immutable',
93 'immutable': 'base'}
93 'immutable': 'base'}
94
94
95 def updatecache(repo):
95 def updatecache(repo):
96 cl = repo.changelog
96 cl = repo.changelog
97 filtername = repo.filtername
97 filtername = repo.filtername
98 partial = repo._branchcaches.get(filtername)
98 partial = repo._branchcaches.get(filtername)
99
99
100 revs = []
100 revs = []
101 if partial is None or not partial.validfor(repo):
101 if partial is None or not partial.validfor(repo):
102 partial = read(repo)
102 partial = read(repo)
103 if partial is None:
103 if partial is None:
104 subsetname = subsettable.get(filtername)
104 subsetname = subsettable.get(filtername)
105 if subsetname is None:
105 if subsetname is None:
106 partial = branchcache()
106 partial = branchcache()
107 else:
107 else:
108 subset = repo.filtered(subsetname)
108 subset = repo.filtered(subsetname)
109 partial = subset.branchmap().copy()
109 partial = subset.branchmap().copy()
110 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
110 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
111 revs.extend(r for r in extrarevs if r <= partial.tiprev)
111 revs.extend(r for r in extrarevs if r <= partial.tiprev)
112 revs.extend(cl.revs(start=partial.tiprev + 1))
112 revs.extend(cl.revs(start=partial.tiprev + 1))
113 if revs:
113 if revs:
114 partial.update(repo, revs)
114 partial.update(repo, revs)
115 partial.write(repo)
115 partial.write(repo)
116
116
117 assert partial.validfor(repo), filtername
117 assert partial.validfor(repo), filtername
118 repo._branchcaches[repo.filtername] = partial
118 repo._branchcaches[repo.filtername] = partial
119
119
120 def replacecache(repo, bm):
120 def replacecache(repo, bm):
121 """Replace the branchmap cache for a repo with a branch mapping.
121 """Replace the branchmap cache for a repo with a branch mapping.
122
122
123 This is likely only called during clone with a branch map from a remote.
123 This is likely only called during clone with a branch map from a remote.
124 """
124 """
125 rbheads = []
125 rbheads = []
126 closed = []
126 closed = []
127 for bheads in bm.itervalues():
127 for bheads in bm.itervalues():
128 rbheads.extend(bheads)
128 rbheads.extend(bheads)
129 for h in bheads:
129 for h in bheads:
130 r = repo.changelog.rev(h)
130 r = repo.changelog.rev(h)
131 b, c = repo.changelog.branchinfo(r)
131 b, c = repo.changelog.branchinfo(r)
132 if c:
132 if c:
133 closed.append(h)
133 closed.append(h)
134
134
135 if rbheads:
135 if rbheads:
136 rtiprev = max((int(repo.changelog.rev(node))
136 rtiprev = max((int(repo.changelog.rev(node))
137 for node in rbheads))
137 for node in rbheads))
138 cache = branchcache(bm,
138 cache = branchcache(bm,
139 repo[rtiprev].node(),
139 repo[rtiprev].node(),
140 rtiprev,
140 rtiprev,
141 closednodes=closed)
141 closednodes=closed)
142
142
143 # Try to stick it as low as possible
143 # Try to stick it as low as possible
144 # filter above served are unlikely to be fetch from a clone
144 # filter above served are unlikely to be fetch from a clone
145 for candidate in ('base', 'immutable', 'served'):
145 for candidate in ('base', 'immutable', 'served'):
146 rview = repo.filtered(candidate)
146 rview = repo.filtered(candidate)
147 if cache.validfor(rview):
147 if cache.validfor(rview):
148 repo._branchcaches[candidate] = cache
148 repo._branchcaches[candidate] = cache
149 cache.write(rview)
149 cache.write(rview)
150 break
150 break
151
151
152 class branchcache(dict):
152 class branchcache(dict):
153 """A dict like object that hold branches heads cache.
153 """A dict like object that hold branches heads cache.
154
154
155 This cache is used to avoid costly computations to determine all the
155 This cache is used to avoid costly computations to determine all the
156 branch heads of a repo.
156 branch heads of a repo.
157
157
158 The cache is serialized on disk in the following format:
158 The cache is serialized on disk in the following format:
159
159
160 <tip hex node> <tip rev number> [optional filtered repo hex hash]
160 <tip hex node> <tip rev number> [optional filtered repo hex hash]
161 <branch head hex node> <open/closed state> <branch name>
161 <branch head hex node> <open/closed state> <branch name>
162 <branch head hex node> <open/closed state> <branch name>
162 <branch head hex node> <open/closed state> <branch name>
163 ...
163 ...
164
164
165 The first line is used to check if the cache is still valid. If the
165 The first line is used to check if the cache is still valid. If the
166 branch cache is for a filtered repo view, an optional third hash is
166 branch cache is for a filtered repo view, an optional third hash is
167 included that hashes the hashes of all filtered revisions.
167 included that hashes the hashes of all filtered revisions.
168
168
169 The open/closed state is represented by a single letter 'o' or 'c'.
169 The open/closed state is represented by a single letter 'o' or 'c'.
170 This field can be used to avoid changelog reads when determining if a
170 This field can be used to avoid changelog reads when determining if a
171 branch head closes a branch or not.
171 branch head closes a branch or not.
172 """
172 """
173
173
174 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
174 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
175 filteredhash=None, closednodes=None):
175 filteredhash=None, closednodes=None):
176 super(branchcache, self).__init__(entries)
176 super(branchcache, self).__init__(entries)
177 self.tipnode = tipnode
177 self.tipnode = tipnode
178 self.tiprev = tiprev
178 self.tiprev = tiprev
179 self.filteredhash = filteredhash
179 self.filteredhash = filteredhash
180 # closednodes is a set of nodes that close their branch. If the branch
180 # closednodes is a set of nodes that close their branch. If the branch
181 # cache has been updated, it may contain nodes that are no longer
181 # cache has been updated, it may contain nodes that are no longer
182 # heads.
182 # heads.
183 if closednodes is None:
183 if closednodes is None:
184 self._closednodes = set()
184 self._closednodes = set()
185 else:
185 else:
186 self._closednodes = closednodes
186 self._closednodes = closednodes
187
187
188 def validfor(self, repo):
188 def validfor(self, repo):
189 """Is the cache content valid regarding a repo
189 """Is the cache content valid regarding a repo
190
190
191 - False when cached tipnode is unknown or if we detect a strip.
191 - False when cached tipnode is unknown or if we detect a strip.
192 - True when cache is up to date or a subset of current repo."""
192 - True when cache is up to date or a subset of current repo."""
193 try:
193 try:
194 return ((self.tipnode == repo.changelog.node(self.tiprev))
194 return ((self.tipnode == repo.changelog.node(self.tiprev))
195 and (self.filteredhash == \
195 and (self.filteredhash == \
196 scmutil.filteredhash(repo, self.tiprev)))
196 scmutil.filteredhash(repo, self.tiprev)))
197 except IndexError:
197 except IndexError:
198 return False
198 return False
199
199
200 def _branchtip(self, heads):
200 def _branchtip(self, heads):
201 '''Return tuple with last open head in heads and false,
201 '''Return tuple with last open head in heads and false,
202 otherwise return last closed head and true.'''
202 otherwise return last closed head and true.'''
203 tip = heads[-1]
203 tip = heads[-1]
204 closed = True
204 closed = True
205 for h in reversed(heads):
205 for h in reversed(heads):
206 if h not in self._closednodes:
206 if h not in self._closednodes:
207 tip = h
207 tip = h
208 closed = False
208 closed = False
209 break
209 break
210 return tip, closed
210 return tip, closed
211
211
212 def branchtip(self, branch):
212 def branchtip(self, branch):
213 '''Return the tipmost open head on branch head, otherwise return the
213 '''Return the tipmost open head on branch head, otherwise return the
214 tipmost closed head on branch.
214 tipmost closed head on branch.
215 Raise KeyError for unknown branch.'''
215 Raise KeyError for unknown branch.'''
216 return self._branchtip(self[branch])[0]
216 return self._branchtip(self[branch])[0]
217
217
218 def branchheads(self, branch, closed=False):
218 def branchheads(self, branch, closed=False):
219 heads = self[branch]
219 heads = self[branch]
220 if not closed:
220 if not closed:
221 heads = [h for h in heads if h not in self._closednodes]
221 heads = [h for h in heads if h not in self._closednodes]
222 return heads
222 return heads
223
223
224 def iterbranches(self):
224 def iterbranches(self):
225 for bn, heads in self.iteritems():
225 for bn, heads in self.iteritems():
226 yield (bn, heads) + self._branchtip(heads)
226 yield (bn, heads) + self._branchtip(heads)
227
227
228 def copy(self):
228 def copy(self):
229 """return an deep copy of the branchcache object"""
229 """return an deep copy of the branchcache object"""
230 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
230 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
231 self._closednodes)
231 self._closednodes)
232
232
233 def write(self, repo):
233 def write(self, repo):
234 try:
234 try:
235 f = repo.vfs(_filename(repo), "w", atomictemp=True)
235 f = repo.vfs(_filename(repo), "w", atomictemp=True)
236 cachekey = [hex(self.tipnode), str(self.tiprev)]
236 cachekey = [hex(self.tipnode), str(self.tiprev)]
237 if self.filteredhash is not None:
237 if self.filteredhash is not None:
238 cachekey.append(hex(self.filteredhash))
238 cachekey.append(hex(self.filteredhash))
239 f.write(" ".join(cachekey) + '\n')
239 f.write(" ".join(cachekey) + '\n')
240 nodecount = 0
240 nodecount = 0
241 for label, nodes in sorted(self.iteritems()):
241 for label, nodes in sorted(self.iteritems()):
242 for node in nodes:
242 for node in nodes:
243 nodecount += 1
243 nodecount += 1
244 if node in self._closednodes:
244 if node in self._closednodes:
245 state = 'c'
245 state = 'c'
246 else:
246 else:
247 state = 'o'
247 state = 'o'
248 f.write("%s %s %s\n" % (hex(node), state,
248 f.write("%s %s %s\n" % (hex(node), state,
249 encoding.fromlocal(label)))
249 encoding.fromlocal(label)))
250 f.close()
250 f.close()
251 repo.ui.log('branchcache',
251 repo.ui.log('branchcache',
252 'wrote %s branch cache with %d labels and %d nodes\n',
252 'wrote %s branch cache with %d labels and %d nodes\n',
253 repo.filtername, len(self), nodecount)
253 repo.filtername, len(self), nodecount)
254 except (IOError, OSError, error.Abort) as inst:
254 except (IOError, OSError, error.Abort) as inst:
255 repo.ui.debug("couldn't write branch cache: %s\n" % inst)
255 repo.ui.debug("couldn't write branch cache: %s\n" % inst)
256 # Abort may be raise by read only opener
256 # Abort may be raise by read only opener
257 pass
257 pass
258
258
259 def update(self, repo, revgen):
259 def update(self, repo, revgen):
260 """Given a branchhead cache, self, that may have extra nodes or be
260 """Given a branchhead cache, self, that may have extra nodes or be
261 missing heads, and a generator of nodes that are strictly a superset of
261 missing heads, and a generator of nodes that are strictly a superset of
262 heads missing, this function updates self to be correct.
262 heads missing, this function updates self to be correct.
263 """
263 """
264 starttime = util.timer()
264 starttime = util.timer()
265 cl = repo.changelog
265 cl = repo.changelog
266 # collect new branch entries
266 # collect new branch entries
267 newbranches = {}
267 newbranches = {}
268 getbranchinfo = repo.revbranchcache().branchinfo
268 getbranchinfo = repo.revbranchcache().branchinfo
269 for r in revgen:
269 for r in revgen:
270 branch, closesbranch = getbranchinfo(r)
270 branch, closesbranch = getbranchinfo(r)
271 newbranches.setdefault(branch, []).append(r)
271 newbranches.setdefault(branch, []).append(r)
272 if closesbranch:
272 if closesbranch:
273 self._closednodes.add(cl.node(r))
273 self._closednodes.add(cl.node(r))
274
274
275 # fetch current topological heads to speed up filtering
275 # fetch current topological heads to speed up filtering
276 topoheads = set(cl.headrevs())
276 topoheads = set(cl.headrevs())
277
277
278 # if older branchheads are reachable from new ones, they aren't
278 # if older branchheads are reachable from new ones, they aren't
279 # really branchheads. Note checking parents is insufficient:
279 # really branchheads. Note checking parents is insufficient:
280 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
280 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
281 for branch, newheadrevs in newbranches.iteritems():
281 for branch, newheadrevs in newbranches.iteritems():
282 bheads = self.setdefault(branch, [])
282 bheads = self.setdefault(branch, [])
283 bheadset = set(cl.rev(node) for node in bheads)
283 bheadset = set(cl.rev(node) for node in bheads)
284
284
285 # This have been tested True on all internal usage of this function.
285 # This have been tested True on all internal usage of this function.
286 # run it again in case of doubt
286 # run it again in case of doubt
287 # assert not (set(bheadrevs) & set(newheadrevs))
287 # assert not (set(bheadrevs) & set(newheadrevs))
288 newheadrevs.sort()
288 newheadrevs.sort()
289 bheadset.update(newheadrevs)
289 bheadset.update(newheadrevs)
290
290
291 # This prunes out two kinds of heads - heads that are superseded by
291 # This prunes out two kinds of heads - heads that are superseded by
292 # a head in newheadrevs, and newheadrevs that are not heads because
292 # a head in newheadrevs, and newheadrevs that are not heads because
293 # an existing head is their descendant.
293 # an existing head is their descendant.
294 uncertain = bheadset - topoheads
294 uncertain = bheadset - topoheads
295 if uncertain:
295 if uncertain:
296 floorrev = min(uncertain)
296 floorrev = min(uncertain)
297 ancestors = set(cl.ancestors(newheadrevs, floorrev))
297 ancestors = set(cl.ancestors(newheadrevs, floorrev))
298 bheadset -= ancestors
298 bheadset -= ancestors
299 bheadrevs = sorted(bheadset)
299 bheadrevs = sorted(bheadset)
300 self[branch] = [cl.node(rev) for rev in bheadrevs]
300 self[branch] = [cl.node(rev) for rev in bheadrevs]
301 tiprev = bheadrevs[-1]
301 tiprev = bheadrevs[-1]
302 if tiprev > self.tiprev:
302 if tiprev > self.tiprev:
303 self.tipnode = cl.node(tiprev)
303 self.tipnode = cl.node(tiprev)
304 self.tiprev = tiprev
304 self.tiprev = tiprev
305
305
306 if not self.validfor(repo):
306 if not self.validfor(repo):
307 # cache key are not valid anymore
307 # cache key are not valid anymore
308 self.tipnode = nullid
308 self.tipnode = nullid
309 self.tiprev = nullrev
309 self.tiprev = nullrev
310 for heads in self.values():
310 for heads in self.values():
311 tiprev = max(cl.rev(node) for node in heads)
311 tiprev = max(cl.rev(node) for node in heads)
312 if tiprev > self.tiprev:
312 if tiprev > self.tiprev:
313 self.tipnode = cl.node(tiprev)
313 self.tipnode = cl.node(tiprev)
314 self.tiprev = tiprev
314 self.tiprev = tiprev
315 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
315 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
316
316
317 duration = util.timer() - starttime
317 duration = util.timer() - starttime
318 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
318 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
319 repo.filtername, duration)
319 repo.filtername, duration)
320
320
321 # Revision branch info cache
321 # Revision branch info cache
322
322
323 _rbcversion = '-v1'
323 _rbcversion = '-v1'
324 _rbcnames = 'cache/rbc-names' + _rbcversion
324 _rbcnames = 'cache/rbc-names' + _rbcversion
325 _rbcrevs = 'cache/rbc-revs' + _rbcversion
325 _rbcrevs = 'cache/rbc-revs' + _rbcversion
326 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
326 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
327 _rbcrecfmt = '>4sI'
327 _rbcrecfmt = '>4sI'
328 _rbcrecsize = calcsize(_rbcrecfmt)
328 _rbcrecsize = calcsize(_rbcrecfmt)
329 _rbcnodelen = 4
329 _rbcnodelen = 4
330 _rbcbranchidxmask = 0x7fffffff
330 _rbcbranchidxmask = 0x7fffffff
331 _rbccloseflag = 0x80000000
331 _rbccloseflag = 0x80000000
332
332
333 class revbranchcache(object):
333 class revbranchcache(object):
334 """Persistent cache, mapping from revision number to branch name and close.
334 """Persistent cache, mapping from revision number to branch name and close.
335 This is a low level cache, independent of filtering.
335 This is a low level cache, independent of filtering.
336
336
337 Branch names are stored in rbc-names in internal encoding separated by 0.
337 Branch names are stored in rbc-names in internal encoding separated by 0.
338 rbc-names is append-only, and each branch name is only stored once and will
338 rbc-names is append-only, and each branch name is only stored once and will
339 thus have a unique index.
339 thus have a unique index.
340
340
341 The branch info for each revision is stored in rbc-revs as constant size
341 The branch info for each revision is stored in rbc-revs as constant size
342 records. The whole file is read into memory, but it is only 'parsed' on
342 records. The whole file is read into memory, but it is only 'parsed' on
343 demand. The file is usually append-only but will be truncated if repo
343 demand. The file is usually append-only but will be truncated if repo
344 modification is detected.
344 modification is detected.
345 The record for each revision contains the first 4 bytes of the
345 The record for each revision contains the first 4 bytes of the
346 corresponding node hash, and the record is only used if it still matches.
346 corresponding node hash, and the record is only used if it still matches.
347 Even a completely trashed rbc-revs fill thus still give the right result
347 Even a completely trashed rbc-revs fill thus still give the right result
348 while converging towards full recovery ... assuming no incorrectly matching
348 while converging towards full recovery ... assuming no incorrectly matching
349 node hashes.
349 node hashes.
350 The record also contains 4 bytes where 31 bits contains the index of the
350 The record also contains 4 bytes where 31 bits contains the index of the
351 branch and the last bit indicate that it is a branch close commit.
351 branch and the last bit indicate that it is a branch close commit.
352 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
352 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
353 and will grow with it but be 1/8th of its size.
353 and will grow with it but be 1/8th of its size.
354 """
354 """
355
355
356 def __init__(self, repo, readonly=True):
356 def __init__(self, repo, readonly=True):
357 assert repo.filtername is None
357 assert repo.filtername is None
358 self._repo = repo
358 self._repo = repo
359 self._names = [] # branch names in local encoding with static index
359 self._names = [] # branch names in local encoding with static index
360 self._rbcrevs = array('c') # structs of type _rbcrecfmt
360 self._rbcrevs = bytearray()
361 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
361 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
362 try:
362 try:
363 bndata = repo.vfs.read(_rbcnames)
363 bndata = repo.vfs.read(_rbcnames)
364 self._rbcsnameslen = len(bndata) # for verification before writing
364 self._rbcsnameslen = len(bndata) # for verification before writing
365 self._names = [encoding.tolocal(bn) for bn in bndata.split('\0')]
365 self._names = [encoding.tolocal(bn) for bn in bndata.split('\0')]
366 except (IOError, OSError):
366 except (IOError, OSError):
367 if readonly:
367 if readonly:
368 # don't try to use cache - fall back to the slow path
368 # don't try to use cache - fall back to the slow path
369 self.branchinfo = self._branchinfo
369 self.branchinfo = self._branchinfo
370
370
371 if self._names:
371 if self._names:
372 try:
372 try:
373 data = repo.vfs.read(_rbcrevs)
373 data = repo.vfs.read(_rbcrevs)
374 self._rbcrevs.fromstring(data)
374 self._rbcrevs[:] = data
375 except (IOError, OSError) as inst:
375 except (IOError, OSError) as inst:
376 repo.ui.debug("couldn't read revision branch cache: %s\n" %
376 repo.ui.debug("couldn't read revision branch cache: %s\n" %
377 inst)
377 inst)
378 # remember number of good records on disk
378 # remember number of good records on disk
379 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
379 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
380 len(repo.changelog))
380 len(repo.changelog))
381 if self._rbcrevslen == 0:
381 if self._rbcrevslen == 0:
382 self._names = []
382 self._names = []
383 self._rbcnamescount = len(self._names) # number of names read at
383 self._rbcnamescount = len(self._names) # number of names read at
384 # _rbcsnameslen
384 # _rbcsnameslen
385 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
385 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
386
386
387 def _clear(self):
387 def _clear(self):
388 self._rbcsnameslen = 0
388 self._rbcsnameslen = 0
389 del self._names[:]
389 del self._names[:]
390 self._rbcnamescount = 0
390 self._rbcnamescount = 0
391 self._namesreverse.clear()
391 self._namesreverse.clear()
392 self._rbcrevslen = len(self._repo.changelog)
392 self._rbcrevslen = len(self._repo.changelog)
393 self._rbcrevs = array('c')
393 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
394 self._rbcrevs.fromstring('\0' * (self._rbcrevslen * _rbcrecsize))
395
394
396 def branchinfo(self, rev):
395 def branchinfo(self, rev):
397 """Return branch name and close flag for rev, using and updating
396 """Return branch name and close flag for rev, using and updating
398 persistent cache."""
397 persistent cache."""
399 changelog = self._repo.changelog
398 changelog = self._repo.changelog
400 rbcrevidx = rev * _rbcrecsize
399 rbcrevidx = rev * _rbcrecsize
401
400
402 # avoid negative index, changelog.read(nullrev) is fast without cache
401 # avoid negative index, changelog.read(nullrev) is fast without cache
403 if rev == nullrev:
402 if rev == nullrev:
404 return changelog.branchinfo(rev)
403 return changelog.branchinfo(rev)
405
404
406 # if requested rev isn't allocated, grow and cache the rev info
405 # if requested rev isn't allocated, grow and cache the rev info
407 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
406 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
408 return self._branchinfo(rev)
407 return self._branchinfo(rev)
409
408
410 # fast path: extract data from cache, use it if node is matching
409 # fast path: extract data from cache, use it if node is matching
411 reponode = changelog.node(rev)[:_rbcnodelen]
410 reponode = changelog.node(rev)[:_rbcnodelen]
412 cachenode, branchidx = unpack(
411 cachenode, branchidx = unpack(
413 _rbcrecfmt, buffer(self._rbcrevs, rbcrevidx, _rbcrecsize))
412 _rbcrecfmt, buffer(self._rbcrevs, rbcrevidx, _rbcrecsize))
414 close = bool(branchidx & _rbccloseflag)
413 close = bool(branchidx & _rbccloseflag)
415 if close:
414 if close:
416 branchidx &= _rbcbranchidxmask
415 branchidx &= _rbcbranchidxmask
417 if cachenode == '\0\0\0\0':
416 if cachenode == '\0\0\0\0':
418 pass
417 pass
419 elif cachenode == reponode:
418 elif cachenode == reponode:
420 try:
419 try:
421 return self._names[branchidx], close
420 return self._names[branchidx], close
422 except IndexError:
421 except IndexError:
423 # recover from invalid reference to unknown branch
422 # recover from invalid reference to unknown branch
424 self._repo.ui.debug("referenced branch names not found"
423 self._repo.ui.debug("referenced branch names not found"
425 " - rebuilding revision branch cache from scratch\n")
424 " - rebuilding revision branch cache from scratch\n")
426 self._clear()
425 self._clear()
427 else:
426 else:
428 # rev/node map has changed, invalidate the cache from here up
427 # rev/node map has changed, invalidate the cache from here up
429 self._repo.ui.debug("history modification detected - truncating "
428 self._repo.ui.debug("history modification detected - truncating "
430 "revision branch cache to revision %s\n" % rev)
429 "revision branch cache to revision %s\n" % rev)
431 truncate = rbcrevidx + _rbcrecsize
430 truncate = rbcrevidx + _rbcrecsize
432 del self._rbcrevs[truncate:]
431 del self._rbcrevs[truncate:]
433 self._rbcrevslen = min(self._rbcrevslen, truncate)
432 self._rbcrevslen = min(self._rbcrevslen, truncate)
434
433
435 # fall back to slow path and make sure it will be written to disk
434 # fall back to slow path and make sure it will be written to disk
436 return self._branchinfo(rev)
435 return self._branchinfo(rev)
437
436
438 def _branchinfo(self, rev):
437 def _branchinfo(self, rev):
439 """Retrieve branch info from changelog and update _rbcrevs"""
438 """Retrieve branch info from changelog and update _rbcrevs"""
440 changelog = self._repo.changelog
439 changelog = self._repo.changelog
441 b, close = changelog.branchinfo(rev)
440 b, close = changelog.branchinfo(rev)
442 if b in self._namesreverse:
441 if b in self._namesreverse:
443 branchidx = self._namesreverse[b]
442 branchidx = self._namesreverse[b]
444 else:
443 else:
445 branchidx = len(self._names)
444 branchidx = len(self._names)
446 self._names.append(b)
445 self._names.append(b)
447 self._namesreverse[b] = branchidx
446 self._namesreverse[b] = branchidx
448 reponode = changelog.node(rev)
447 reponode = changelog.node(rev)
449 if close:
448 if close:
450 branchidx |= _rbccloseflag
449 branchidx |= _rbccloseflag
451 self._setcachedata(rev, reponode, branchidx)
450 self._setcachedata(rev, reponode, branchidx)
452 return b, close
451 return b, close
453
452
454 def _setcachedata(self, rev, node, branchidx):
453 def _setcachedata(self, rev, node, branchidx):
455 """Writes the node's branch data to the in-memory cache data."""
454 """Writes the node's branch data to the in-memory cache data."""
456 rbcrevidx = rev * _rbcrecsize
455 rbcrevidx = rev * _rbcrecsize
457 rec = array('c')
456 rec = bytearray(pack(_rbcrecfmt, node, branchidx))
458 rec.fromstring(pack(_rbcrecfmt, node, branchidx))
459 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
457 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
460 self._rbcrevs.extend('\0' *
458 self._rbcrevs.extend('\0' *
461 (len(self._repo.changelog) * _rbcrecsize -
459 (len(self._repo.changelog) * _rbcrecsize -
462 len(self._rbcrevs)))
460 len(self._rbcrevs)))
463 self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec
461 self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec
464 self._rbcrevslen = min(self._rbcrevslen, rev)
462 self._rbcrevslen = min(self._rbcrevslen, rev)
465
463
466 tr = self._repo.currenttransaction()
464 tr = self._repo.currenttransaction()
467 if tr:
465 if tr:
468 tr.addfinalize('write-revbranchcache', self.write)
466 tr.addfinalize('write-revbranchcache', self.write)
469
467
470 def write(self, tr=None):
468 def write(self, tr=None):
471 """Save branch cache if it is dirty."""
469 """Save branch cache if it is dirty."""
472 repo = self._repo
470 repo = self._repo
473 wlock = None
471 wlock = None
474 step = ''
472 step = ''
475 try:
473 try:
476 if self._rbcnamescount < len(self._names):
474 if self._rbcnamescount < len(self._names):
477 step = ' names'
475 step = ' names'
478 wlock = repo.wlock(wait=False)
476 wlock = repo.wlock(wait=False)
479 if self._rbcnamescount != 0:
477 if self._rbcnamescount != 0:
480 f = repo.vfs.open(_rbcnames, 'ab')
478 f = repo.vfs.open(_rbcnames, 'ab')
481 if f.tell() == self._rbcsnameslen:
479 if f.tell() == self._rbcsnameslen:
482 f.write('\0')
480 f.write('\0')
483 else:
481 else:
484 f.close()
482 f.close()
485 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
483 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
486 self._rbcnamescount = 0
484 self._rbcnamescount = 0
487 self._rbcrevslen = 0
485 self._rbcrevslen = 0
488 if self._rbcnamescount == 0:
486 if self._rbcnamescount == 0:
489 # before rewriting names, make sure references are removed
487 # before rewriting names, make sure references are removed
490 repo.vfs.unlinkpath(_rbcrevs, ignoremissing=True)
488 repo.vfs.unlinkpath(_rbcrevs, ignoremissing=True)
491 f = repo.vfs.open(_rbcnames, 'wb')
489 f = repo.vfs.open(_rbcnames, 'wb')
492 f.write('\0'.join(encoding.fromlocal(b)
490 f.write('\0'.join(encoding.fromlocal(b)
493 for b in self._names[self._rbcnamescount:]))
491 for b in self._names[self._rbcnamescount:]))
494 self._rbcsnameslen = f.tell()
492 self._rbcsnameslen = f.tell()
495 f.close()
493 f.close()
496 self._rbcnamescount = len(self._names)
494 self._rbcnamescount = len(self._names)
497
495
498 start = self._rbcrevslen * _rbcrecsize
496 start = self._rbcrevslen * _rbcrecsize
499 if start != len(self._rbcrevs):
497 if start != len(self._rbcrevs):
500 step = ''
498 step = ''
501 if wlock is None:
499 if wlock is None:
502 wlock = repo.wlock(wait=False)
500 wlock = repo.wlock(wait=False)
503 revs = min(len(repo.changelog),
501 revs = min(len(repo.changelog),
504 len(self._rbcrevs) // _rbcrecsize)
502 len(self._rbcrevs) // _rbcrecsize)
505 f = repo.vfs.open(_rbcrevs, 'ab')
503 f = repo.vfs.open(_rbcrevs, 'ab')
506 if f.tell() != start:
504 if f.tell() != start:
507 repo.ui.debug("truncating %s to %s\n" % (_rbcrevs, start))
505 repo.ui.debug("truncating %s to %s\n" % (_rbcrevs, start))
508 f.seek(start)
506 f.seek(start)
509 if f.tell() != start:
507 if f.tell() != start:
510 start = 0
508 start = 0
511 f.seek(start)
509 f.seek(start)
512 f.truncate()
510 f.truncate()
513 end = revs * _rbcrecsize
511 end = revs * _rbcrecsize
514 f.write(self._rbcrevs[start:end])
512 f.write(self._rbcrevs[start:end])
515 f.close()
513 f.close()
516 self._rbcrevslen = revs
514 self._rbcrevslen = revs
517 except (IOError, OSError, error.Abort, error.LockError) as inst:
515 except (IOError, OSError, error.Abort, error.LockError) as inst:
518 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
516 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
519 % (step, inst))
517 % (step, inst))
520 finally:
518 finally:
521 if wlock is not None:
519 if wlock is not None:
522 wlock.release()
520 wlock.release()
@@ -1,557 +1,557 b''
1 # bundlerepo.py - repository class for viewing uncompressed bundles
1 # bundlerepo.py - repository class for viewing uncompressed bundles
2 #
2 #
3 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
3 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Repository class for viewing uncompressed bundles.
8 """Repository class for viewing uncompressed bundles.
9
9
10 This provides a read-only repository interface to bundles as if they
10 This provides a read-only repository interface to bundles as if they
11 were part of the actual repository.
11 were part of the actual repository.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import os
16 import os
17 import shutil
17 import shutil
18 import tempfile
18 import tempfile
19
19
20 from .i18n import _
20 from .i18n import _
21 from .node import nullid
21 from .node import nullid
22
22
23 from . import (
23 from . import (
24 bundle2,
24 bundle2,
25 changegroup,
25 changegroup,
26 changelog,
26 changelog,
27 cmdutil,
27 cmdutil,
28 discovery,
28 discovery,
29 error,
29 error,
30 exchange,
30 exchange,
31 filelog,
31 filelog,
32 localrepo,
32 localrepo,
33 manifest,
33 manifest,
34 mdiff,
34 mdiff,
35 node as nodemod,
35 node as nodemod,
36 pathutil,
36 pathutil,
37 phases,
37 phases,
38 pycompat,
38 pycompat,
39 revlog,
39 revlog,
40 util,
40 util,
41 vfs as vfsmod,
41 vfs as vfsmod,
42 )
42 )
43
43
44 class bundlerevlog(revlog.revlog):
44 class bundlerevlog(revlog.revlog):
45 def __init__(self, opener, indexfile, bundle, linkmapper):
45 def __init__(self, opener, indexfile, bundle, linkmapper):
46 # How it works:
46 # How it works:
47 # To retrieve a revision, we need to know the offset of the revision in
47 # To retrieve a revision, we need to know the offset of the revision in
48 # the bundle (an unbundle object). We store this offset in the index
48 # the bundle (an unbundle object). We store this offset in the index
49 # (start). The base of the delta is stored in the base field.
49 # (start). The base of the delta is stored in the base field.
50 #
50 #
51 # To differentiate a rev in the bundle from a rev in the revlog, we
51 # To differentiate a rev in the bundle from a rev in the revlog, we
52 # check revision against repotiprev.
52 # check revision against repotiprev.
53 opener = vfsmod.readonlyvfs(opener)
53 opener = vfsmod.readonlyvfs(opener)
54 revlog.revlog.__init__(self, opener, indexfile)
54 revlog.revlog.__init__(self, opener, indexfile)
55 self.bundle = bundle
55 self.bundle = bundle
56 n = len(self)
56 n = len(self)
57 self.repotiprev = n - 1
57 self.repotiprev = n - 1
58 chain = None
58 chain = None
59 self.bundlerevs = set() # used by 'bundle()' revset expression
59 self.bundlerevs = set() # used by 'bundle()' revset expression
60 getchunk = lambda: bundle.deltachunk(chain)
60 getchunk = lambda: bundle.deltachunk(chain)
61 for chunkdata in iter(getchunk, {}):
61 for chunkdata in iter(getchunk, {}):
62 node = chunkdata['node']
62 node = chunkdata['node']
63 p1 = chunkdata['p1']
63 p1 = chunkdata['p1']
64 p2 = chunkdata['p2']
64 p2 = chunkdata['p2']
65 cs = chunkdata['cs']
65 cs = chunkdata['cs']
66 deltabase = chunkdata['deltabase']
66 deltabase = chunkdata['deltabase']
67 delta = chunkdata['delta']
67 delta = chunkdata['delta']
68
68
69 size = len(delta)
69 size = len(delta)
70 start = bundle.tell() - size
70 start = bundle.tell() - size
71
71
72 link = linkmapper(cs)
72 link = linkmapper(cs)
73 if node in self.nodemap:
73 if node in self.nodemap:
74 # this can happen if two branches make the same change
74 # this can happen if two branches make the same change
75 chain = node
75 chain = node
76 self.bundlerevs.add(self.nodemap[node])
76 self.bundlerevs.add(self.nodemap[node])
77 continue
77 continue
78
78
79 for p in (p1, p2):
79 for p in (p1, p2):
80 if p not in self.nodemap:
80 if p not in self.nodemap:
81 raise error.LookupError(p, self.indexfile,
81 raise error.LookupError(p, self.indexfile,
82 _("unknown parent"))
82 _("unknown parent"))
83
83
84 if deltabase not in self.nodemap:
84 if deltabase not in self.nodemap:
85 raise LookupError(deltabase, self.indexfile,
85 raise LookupError(deltabase, self.indexfile,
86 _('unknown delta base'))
86 _('unknown delta base'))
87
87
88 baserev = self.rev(deltabase)
88 baserev = self.rev(deltabase)
89 # start, size, full unc. size, base (unused), link, p1, p2, node
89 # start, size, full unc. size, base (unused), link, p1, p2, node
90 e = (revlog.offset_type(start, 0), size, -1, baserev, link,
90 e = (revlog.offset_type(start, 0), size, -1, baserev, link,
91 self.rev(p1), self.rev(p2), node)
91 self.rev(p1), self.rev(p2), node)
92 self.index.insert(-1, e)
92 self.index.insert(-1, e)
93 self.nodemap[node] = n
93 self.nodemap[node] = n
94 self.bundlerevs.add(n)
94 self.bundlerevs.add(n)
95 chain = node
95 chain = node
96 n += 1
96 n += 1
97
97
98 def _chunk(self, rev):
98 def _chunk(self, rev):
99 # Warning: in case of bundle, the diff is against what we stored as
99 # Warning: in case of bundle, the diff is against what we stored as
100 # delta base, not against rev - 1
100 # delta base, not against rev - 1
101 # XXX: could use some caching
101 # XXX: could use some caching
102 if rev <= self.repotiprev:
102 if rev <= self.repotiprev:
103 return revlog.revlog._chunk(self, rev)
103 return revlog.revlog._chunk(self, rev)
104 self.bundle.seek(self.start(rev))
104 self.bundle.seek(self.start(rev))
105 return self.bundle.read(self.length(rev))
105 return self.bundle.read(self.length(rev))
106
106
107 def revdiff(self, rev1, rev2):
107 def revdiff(self, rev1, rev2):
108 """return or calculate a delta between two revisions"""
108 """return or calculate a delta between two revisions"""
109 if rev1 > self.repotiprev and rev2 > self.repotiprev:
109 if rev1 > self.repotiprev and rev2 > self.repotiprev:
110 # hot path for bundle
110 # hot path for bundle
111 revb = self.index[rev2][3]
111 revb = self.index[rev2][3]
112 if revb == rev1:
112 if revb == rev1:
113 return self._chunk(rev2)
113 return self._chunk(rev2)
114 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
114 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
115 return revlog.revlog.revdiff(self, rev1, rev2)
115 return revlog.revlog.revdiff(self, rev1, rev2)
116
116
117 return mdiff.textdiff(self.revision(self.node(rev1)),
117 return mdiff.textdiff(self.revision(self.node(rev1)),
118 self.revision(self.node(rev2)))
118 self.revision(self.node(rev2)))
119
119
120 def revision(self, nodeorrev, raw=False):
120 def revision(self, nodeorrev, raw=False):
121 """return an uncompressed revision of a given node or revision
121 """return an uncompressed revision of a given node or revision
122 number.
122 number.
123 """
123 """
124 if isinstance(nodeorrev, int):
124 if isinstance(nodeorrev, int):
125 rev = nodeorrev
125 rev = nodeorrev
126 node = self.node(rev)
126 node = self.node(rev)
127 else:
127 else:
128 node = nodeorrev
128 node = nodeorrev
129 rev = self.rev(node)
129 rev = self.rev(node)
130
130
131 if node == nullid:
131 if node == nullid:
132 return ""
132 return ""
133
133
134 text = None
134 text = None
135 chain = []
135 chain = []
136 iterrev = rev
136 iterrev = rev
137 # reconstruct the revision if it is from a changegroup
137 # reconstruct the revision if it is from a changegroup
138 while iterrev > self.repotiprev:
138 while iterrev > self.repotiprev:
139 if self._cache and self._cache[1] == iterrev:
139 if self._cache and self._cache[1] == iterrev:
140 text = self._cache[2]
140 text = self._cache[2]
141 break
141 break
142 chain.append(iterrev)
142 chain.append(iterrev)
143 iterrev = self.index[iterrev][3]
143 iterrev = self.index[iterrev][3]
144 if text is None:
144 if text is None:
145 text = self.baserevision(iterrev)
145 text = self.baserevision(iterrev)
146
146
147 while chain:
147 while chain:
148 delta = self._chunk(chain.pop())
148 delta = self._chunk(chain.pop())
149 text = mdiff.patches(text, [delta])
149 text = mdiff.patches(text, [delta])
150
150
151 text, validatehash = self._processflags(text, self.flags(rev),
151 text, validatehash = self._processflags(text, self.flags(rev),
152 'read', raw=raw)
152 'read', raw=raw)
153 if validatehash:
153 if validatehash:
154 self.checkhash(text, node, rev=rev)
154 self.checkhash(text, node, rev=rev)
155 self._cache = (node, rev, text)
155 self._cache = (node, rev, text)
156 return text
156 return text
157
157
158 def baserevision(self, nodeorrev):
158 def baserevision(self, nodeorrev):
159 # Revlog subclasses may override 'revision' method to modify format of
159 # Revlog subclasses may override 'revision' method to modify format of
160 # content retrieved from revlog. To use bundlerevlog with such class one
160 # content retrieved from revlog. To use bundlerevlog with such class one
161 # needs to override 'baserevision' and make more specific call here.
161 # needs to override 'baserevision' and make more specific call here.
162 return revlog.revlog.revision(self, nodeorrev)
162 return revlog.revlog.revision(self, nodeorrev)
163
163
164 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
164 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
165 raise NotImplementedError
165 raise NotImplementedError
166 def addgroup(self, revs, linkmapper, transaction):
166 def addgroup(self, revs, linkmapper, transaction):
167 raise NotImplementedError
167 raise NotImplementedError
168 def strip(self, rev, minlink):
168 def strip(self, rev, minlink):
169 raise NotImplementedError
169 raise NotImplementedError
170 def checksize(self):
170 def checksize(self):
171 raise NotImplementedError
171 raise NotImplementedError
172
172
173 class bundlechangelog(bundlerevlog, changelog.changelog):
173 class bundlechangelog(bundlerevlog, changelog.changelog):
174 def __init__(self, opener, bundle):
174 def __init__(self, opener, bundle):
175 changelog.changelog.__init__(self, opener)
175 changelog.changelog.__init__(self, opener)
176 linkmapper = lambda x: x
176 linkmapper = lambda x: x
177 bundlerevlog.__init__(self, opener, self.indexfile, bundle,
177 bundlerevlog.__init__(self, opener, self.indexfile, bundle,
178 linkmapper)
178 linkmapper)
179
179
180 def baserevision(self, nodeorrev):
180 def baserevision(self, nodeorrev):
181 # Although changelog doesn't override 'revision' method, some extensions
181 # Although changelog doesn't override 'revision' method, some extensions
182 # may replace this class with another that does. Same story with
182 # may replace this class with another that does. Same story with
183 # manifest and filelog classes.
183 # manifest and filelog classes.
184
184
185 # This bypasses filtering on changelog.node() and rev() because we need
185 # This bypasses filtering on changelog.node() and rev() because we need
186 # revision text of the bundle base even if it is hidden.
186 # revision text of the bundle base even if it is hidden.
187 oldfilter = self.filteredrevs
187 oldfilter = self.filteredrevs
188 try:
188 try:
189 self.filteredrevs = ()
189 self.filteredrevs = ()
190 return changelog.changelog.revision(self, nodeorrev)
190 return changelog.changelog.revision(self, nodeorrev)
191 finally:
191 finally:
192 self.filteredrevs = oldfilter
192 self.filteredrevs = oldfilter
193
193
194 class bundlemanifest(bundlerevlog, manifest.manifestrevlog):
194 class bundlemanifest(bundlerevlog, manifest.manifestrevlog):
195 def __init__(self, opener, bundle, linkmapper, dirlogstarts=None, dir=''):
195 def __init__(self, opener, bundle, linkmapper, dirlogstarts=None, dir=''):
196 manifest.manifestrevlog.__init__(self, opener, dir=dir)
196 manifest.manifestrevlog.__init__(self, opener, dir=dir)
197 bundlerevlog.__init__(self, opener, self.indexfile, bundle,
197 bundlerevlog.__init__(self, opener, self.indexfile, bundle,
198 linkmapper)
198 linkmapper)
199 if dirlogstarts is None:
199 if dirlogstarts is None:
200 dirlogstarts = {}
200 dirlogstarts = {}
201 if self.bundle.version == "03":
201 if self.bundle.version == "03":
202 dirlogstarts = _getfilestarts(self.bundle)
202 dirlogstarts = _getfilestarts(self.bundle)
203 self._dirlogstarts = dirlogstarts
203 self._dirlogstarts = dirlogstarts
204 self._linkmapper = linkmapper
204 self._linkmapper = linkmapper
205
205
206 def baserevision(self, nodeorrev):
206 def baserevision(self, nodeorrev):
207 node = nodeorrev
207 node = nodeorrev
208 if isinstance(node, int):
208 if isinstance(node, int):
209 node = self.node(node)
209 node = self.node(node)
210
210
211 if node in self.fulltextcache:
211 if node in self.fulltextcache:
212 result = self.fulltextcache[node].tostring()
212 result = '%s' % self.fulltextcache[node]
213 else:
213 else:
214 result = manifest.manifestrevlog.revision(self, nodeorrev)
214 result = manifest.manifestrevlog.revision(self, nodeorrev)
215 return result
215 return result
216
216
217 def dirlog(self, d):
217 def dirlog(self, d):
218 if d in self._dirlogstarts:
218 if d in self._dirlogstarts:
219 self.bundle.seek(self._dirlogstarts[d])
219 self.bundle.seek(self._dirlogstarts[d])
220 return bundlemanifest(
220 return bundlemanifest(
221 self.opener, self.bundle, self._linkmapper,
221 self.opener, self.bundle, self._linkmapper,
222 self._dirlogstarts, dir=d)
222 self._dirlogstarts, dir=d)
223 return super(bundlemanifest, self).dirlog(d)
223 return super(bundlemanifest, self).dirlog(d)
224
224
225 class bundlefilelog(bundlerevlog, filelog.filelog):
225 class bundlefilelog(bundlerevlog, filelog.filelog):
226 def __init__(self, opener, path, bundle, linkmapper):
226 def __init__(self, opener, path, bundle, linkmapper):
227 filelog.filelog.__init__(self, opener, path)
227 filelog.filelog.__init__(self, opener, path)
228 bundlerevlog.__init__(self, opener, self.indexfile, bundle,
228 bundlerevlog.__init__(self, opener, self.indexfile, bundle,
229 linkmapper)
229 linkmapper)
230
230
231 def baserevision(self, nodeorrev):
231 def baserevision(self, nodeorrev):
232 return filelog.filelog.revision(self, nodeorrev)
232 return filelog.filelog.revision(self, nodeorrev)
233
233
234 class bundlepeer(localrepo.localpeer):
234 class bundlepeer(localrepo.localpeer):
235 def canpush(self):
235 def canpush(self):
236 return False
236 return False
237
237
238 class bundlephasecache(phases.phasecache):
238 class bundlephasecache(phases.phasecache):
239 def __init__(self, *args, **kwargs):
239 def __init__(self, *args, **kwargs):
240 super(bundlephasecache, self).__init__(*args, **kwargs)
240 super(bundlephasecache, self).__init__(*args, **kwargs)
241 if util.safehasattr(self, 'opener'):
241 if util.safehasattr(self, 'opener'):
242 self.opener = vfsmod.readonlyvfs(self.opener)
242 self.opener = vfsmod.readonlyvfs(self.opener)
243
243
244 def write(self):
244 def write(self):
245 raise NotImplementedError
245 raise NotImplementedError
246
246
247 def _write(self, fp):
247 def _write(self, fp):
248 raise NotImplementedError
248 raise NotImplementedError
249
249
250 def _updateroots(self, phase, newroots, tr):
250 def _updateroots(self, phase, newroots, tr):
251 self.phaseroots[phase] = newroots
251 self.phaseroots[phase] = newroots
252 self.invalidate()
252 self.invalidate()
253 self.dirty = True
253 self.dirty = True
254
254
255 def _getfilestarts(bundle):
255 def _getfilestarts(bundle):
256 bundlefilespos = {}
256 bundlefilespos = {}
257 for chunkdata in iter(bundle.filelogheader, {}):
257 for chunkdata in iter(bundle.filelogheader, {}):
258 fname = chunkdata['filename']
258 fname = chunkdata['filename']
259 bundlefilespos[fname] = bundle.tell()
259 bundlefilespos[fname] = bundle.tell()
260 for chunk in iter(lambda: bundle.deltachunk(None), {}):
260 for chunk in iter(lambda: bundle.deltachunk(None), {}):
261 pass
261 pass
262 return bundlefilespos
262 return bundlefilespos
263
263
264 class bundlerepository(localrepo.localrepository):
264 class bundlerepository(localrepo.localrepository):
265 def __init__(self, ui, path, bundlename):
265 def __init__(self, ui, path, bundlename):
266 def _writetempbundle(read, suffix, header=''):
266 def _writetempbundle(read, suffix, header=''):
267 """Write a temporary file to disk
267 """Write a temporary file to disk
268
268
269 This is closure because we need to make sure this tracked by
269 This is closure because we need to make sure this tracked by
270 self.tempfile for cleanup purposes."""
270 self.tempfile for cleanup purposes."""
271 fdtemp, temp = self.vfs.mkstemp(prefix="hg-bundle-",
271 fdtemp, temp = self.vfs.mkstemp(prefix="hg-bundle-",
272 suffix=".hg10un")
272 suffix=".hg10un")
273 self.tempfile = temp
273 self.tempfile = temp
274
274
275 with os.fdopen(fdtemp, pycompat.sysstr('wb')) as fptemp:
275 with os.fdopen(fdtemp, pycompat.sysstr('wb')) as fptemp:
276 fptemp.write(header)
276 fptemp.write(header)
277 while True:
277 while True:
278 chunk = read(2**18)
278 chunk = read(2**18)
279 if not chunk:
279 if not chunk:
280 break
280 break
281 fptemp.write(chunk)
281 fptemp.write(chunk)
282
282
283 return self.vfs.open(self.tempfile, mode="rb")
283 return self.vfs.open(self.tempfile, mode="rb")
284 self._tempparent = None
284 self._tempparent = None
285 try:
285 try:
286 localrepo.localrepository.__init__(self, ui, path)
286 localrepo.localrepository.__init__(self, ui, path)
287 except error.RepoError:
287 except error.RepoError:
288 self._tempparent = tempfile.mkdtemp()
288 self._tempparent = tempfile.mkdtemp()
289 localrepo.instance(ui, self._tempparent, 1)
289 localrepo.instance(ui, self._tempparent, 1)
290 localrepo.localrepository.__init__(self, ui, self._tempparent)
290 localrepo.localrepository.__init__(self, ui, self._tempparent)
291 self.ui.setconfig('phases', 'publish', False, 'bundlerepo')
291 self.ui.setconfig('phases', 'publish', False, 'bundlerepo')
292
292
293 if path:
293 if path:
294 self._url = 'bundle:' + util.expandpath(path) + '+' + bundlename
294 self._url = 'bundle:' + util.expandpath(path) + '+' + bundlename
295 else:
295 else:
296 self._url = 'bundle:' + bundlename
296 self._url = 'bundle:' + bundlename
297
297
298 self.tempfile = None
298 self.tempfile = None
299 f = util.posixfile(bundlename, "rb")
299 f = util.posixfile(bundlename, "rb")
300 self.bundlefile = self.bundle = exchange.readbundle(ui, f, bundlename)
300 self.bundlefile = self.bundle = exchange.readbundle(ui, f, bundlename)
301
301
302 if isinstance(self.bundle, bundle2.unbundle20):
302 if isinstance(self.bundle, bundle2.unbundle20):
303 cgstream = None
303 cgstream = None
304 for part in self.bundle.iterparts():
304 for part in self.bundle.iterparts():
305 if part.type == 'changegroup':
305 if part.type == 'changegroup':
306 if cgstream is not None:
306 if cgstream is not None:
307 raise NotImplementedError("can't process "
307 raise NotImplementedError("can't process "
308 "multiple changegroups")
308 "multiple changegroups")
309 cgstream = part
309 cgstream = part
310 version = part.params.get('version', '01')
310 version = part.params.get('version', '01')
311 legalcgvers = changegroup.supportedincomingversions(self)
311 legalcgvers = changegroup.supportedincomingversions(self)
312 if version not in legalcgvers:
312 if version not in legalcgvers:
313 msg = _('Unsupported changegroup version: %s')
313 msg = _('Unsupported changegroup version: %s')
314 raise error.Abort(msg % version)
314 raise error.Abort(msg % version)
315 if self.bundle.compressed():
315 if self.bundle.compressed():
316 cgstream = _writetempbundle(part.read,
316 cgstream = _writetempbundle(part.read,
317 ".cg%sun" % version)
317 ".cg%sun" % version)
318
318
319 if cgstream is None:
319 if cgstream is None:
320 raise error.Abort(_('No changegroups found'))
320 raise error.Abort(_('No changegroups found'))
321 cgstream.seek(0)
321 cgstream.seek(0)
322
322
323 self.bundle = changegroup.getunbundler(version, cgstream, 'UN')
323 self.bundle = changegroup.getunbundler(version, cgstream, 'UN')
324
324
325 elif self.bundle.compressed():
325 elif self.bundle.compressed():
326 f = _writetempbundle(self.bundle.read, '.hg10un', header='HG10UN')
326 f = _writetempbundle(self.bundle.read, '.hg10un', header='HG10UN')
327 self.bundlefile = self.bundle = exchange.readbundle(ui, f,
327 self.bundlefile = self.bundle = exchange.readbundle(ui, f,
328 bundlename,
328 bundlename,
329 self.vfs)
329 self.vfs)
330
330
331 # dict with the mapping 'filename' -> position in the bundle
331 # dict with the mapping 'filename' -> position in the bundle
332 self.bundlefilespos = {}
332 self.bundlefilespos = {}
333
333
334 self.firstnewrev = self.changelog.repotiprev + 1
334 self.firstnewrev = self.changelog.repotiprev + 1
335 phases.retractboundary(self, None, phases.draft,
335 phases.retractboundary(self, None, phases.draft,
336 [ctx.node() for ctx in self[self.firstnewrev:]])
336 [ctx.node() for ctx in self[self.firstnewrev:]])
337
337
338 @localrepo.unfilteredpropertycache
338 @localrepo.unfilteredpropertycache
339 def _phasecache(self):
339 def _phasecache(self):
340 return bundlephasecache(self, self._phasedefaults)
340 return bundlephasecache(self, self._phasedefaults)
341
341
342 @localrepo.unfilteredpropertycache
342 @localrepo.unfilteredpropertycache
343 def changelog(self):
343 def changelog(self):
344 # consume the header if it exists
344 # consume the header if it exists
345 self.bundle.changelogheader()
345 self.bundle.changelogheader()
346 c = bundlechangelog(self.svfs, self.bundle)
346 c = bundlechangelog(self.svfs, self.bundle)
347 self.manstart = self.bundle.tell()
347 self.manstart = self.bundle.tell()
348 return c
348 return c
349
349
350 def _constructmanifest(self):
350 def _constructmanifest(self):
351 self.bundle.seek(self.manstart)
351 self.bundle.seek(self.manstart)
352 # consume the header if it exists
352 # consume the header if it exists
353 self.bundle.manifestheader()
353 self.bundle.manifestheader()
354 linkmapper = self.unfiltered().changelog.rev
354 linkmapper = self.unfiltered().changelog.rev
355 m = bundlemanifest(self.svfs, self.bundle, linkmapper)
355 m = bundlemanifest(self.svfs, self.bundle, linkmapper)
356 self.filestart = self.bundle.tell()
356 self.filestart = self.bundle.tell()
357 return m
357 return m
358
358
359 @localrepo.unfilteredpropertycache
359 @localrepo.unfilteredpropertycache
360 def manstart(self):
360 def manstart(self):
361 self.changelog
361 self.changelog
362 return self.manstart
362 return self.manstart
363
363
364 @localrepo.unfilteredpropertycache
364 @localrepo.unfilteredpropertycache
365 def filestart(self):
365 def filestart(self):
366 self.manifestlog
366 self.manifestlog
367 return self.filestart
367 return self.filestart
368
368
369 def url(self):
369 def url(self):
370 return self._url
370 return self._url
371
371
372 def file(self, f):
372 def file(self, f):
373 if not self.bundlefilespos:
373 if not self.bundlefilespos:
374 self.bundle.seek(self.filestart)
374 self.bundle.seek(self.filestart)
375 self.bundlefilespos = _getfilestarts(self.bundle)
375 self.bundlefilespos = _getfilestarts(self.bundle)
376
376
377 if f in self.bundlefilespos:
377 if f in self.bundlefilespos:
378 self.bundle.seek(self.bundlefilespos[f])
378 self.bundle.seek(self.bundlefilespos[f])
379 linkmapper = self.unfiltered().changelog.rev
379 linkmapper = self.unfiltered().changelog.rev
380 return bundlefilelog(self.svfs, f, self.bundle, linkmapper)
380 return bundlefilelog(self.svfs, f, self.bundle, linkmapper)
381 else:
381 else:
382 return filelog.filelog(self.svfs, f)
382 return filelog.filelog(self.svfs, f)
383
383
384 def close(self):
384 def close(self):
385 """Close assigned bundle file immediately."""
385 """Close assigned bundle file immediately."""
386 self.bundlefile.close()
386 self.bundlefile.close()
387 if self.tempfile is not None:
387 if self.tempfile is not None:
388 self.vfs.unlink(self.tempfile)
388 self.vfs.unlink(self.tempfile)
389 if self._tempparent:
389 if self._tempparent:
390 shutil.rmtree(self._tempparent, True)
390 shutil.rmtree(self._tempparent, True)
391
391
392 def cancopy(self):
392 def cancopy(self):
393 return False
393 return False
394
394
395 def peer(self):
395 def peer(self):
396 return bundlepeer(self)
396 return bundlepeer(self)
397
397
398 def getcwd(self):
398 def getcwd(self):
399 return pycompat.getcwd() # always outside the repo
399 return pycompat.getcwd() # always outside the repo
400
400
401 # Check if parents exist in localrepo before setting
401 # Check if parents exist in localrepo before setting
402 def setparents(self, p1, p2=nullid):
402 def setparents(self, p1, p2=nullid):
403 p1rev = self.changelog.rev(p1)
403 p1rev = self.changelog.rev(p1)
404 p2rev = self.changelog.rev(p2)
404 p2rev = self.changelog.rev(p2)
405 msg = _("setting parent to node %s that only exists in the bundle\n")
405 msg = _("setting parent to node %s that only exists in the bundle\n")
406 if self.changelog.repotiprev < p1rev:
406 if self.changelog.repotiprev < p1rev:
407 self.ui.warn(msg % nodemod.hex(p1))
407 self.ui.warn(msg % nodemod.hex(p1))
408 if self.changelog.repotiprev < p2rev:
408 if self.changelog.repotiprev < p2rev:
409 self.ui.warn(msg % nodemod.hex(p2))
409 self.ui.warn(msg % nodemod.hex(p2))
410 return super(bundlerepository, self).setparents(p1, p2)
410 return super(bundlerepository, self).setparents(p1, p2)
411
411
412 def instance(ui, path, create):
412 def instance(ui, path, create):
413 if create:
413 if create:
414 raise error.Abort(_('cannot create new bundle repository'))
414 raise error.Abort(_('cannot create new bundle repository'))
415 # internal config: bundle.mainreporoot
415 # internal config: bundle.mainreporoot
416 parentpath = ui.config("bundle", "mainreporoot", "")
416 parentpath = ui.config("bundle", "mainreporoot", "")
417 if not parentpath:
417 if not parentpath:
418 # try to find the correct path to the working directory repo
418 # try to find the correct path to the working directory repo
419 parentpath = cmdutil.findrepo(pycompat.getcwd())
419 parentpath = cmdutil.findrepo(pycompat.getcwd())
420 if parentpath is None:
420 if parentpath is None:
421 parentpath = ''
421 parentpath = ''
422 if parentpath:
422 if parentpath:
423 # Try to make the full path relative so we get a nice, short URL.
423 # Try to make the full path relative so we get a nice, short URL.
424 # In particular, we don't want temp dir names in test outputs.
424 # In particular, we don't want temp dir names in test outputs.
425 cwd = pycompat.getcwd()
425 cwd = pycompat.getcwd()
426 if parentpath == cwd:
426 if parentpath == cwd:
427 parentpath = ''
427 parentpath = ''
428 else:
428 else:
429 cwd = pathutil.normasprefix(cwd)
429 cwd = pathutil.normasprefix(cwd)
430 if parentpath.startswith(cwd):
430 if parentpath.startswith(cwd):
431 parentpath = parentpath[len(cwd):]
431 parentpath = parentpath[len(cwd):]
432 u = util.url(path)
432 u = util.url(path)
433 path = u.localpath()
433 path = u.localpath()
434 if u.scheme == 'bundle':
434 if u.scheme == 'bundle':
435 s = path.split("+", 1)
435 s = path.split("+", 1)
436 if len(s) == 1:
436 if len(s) == 1:
437 repopath, bundlename = parentpath, s[0]
437 repopath, bundlename = parentpath, s[0]
438 else:
438 else:
439 repopath, bundlename = s
439 repopath, bundlename = s
440 else:
440 else:
441 repopath, bundlename = parentpath, path
441 repopath, bundlename = parentpath, path
442 return bundlerepository(ui, repopath, bundlename)
442 return bundlerepository(ui, repopath, bundlename)
443
443
444 class bundletransactionmanager(object):
444 class bundletransactionmanager(object):
445 def transaction(self):
445 def transaction(self):
446 return None
446 return None
447
447
448 def close(self):
448 def close(self):
449 raise NotImplementedError
449 raise NotImplementedError
450
450
451 def release(self):
451 def release(self):
452 raise NotImplementedError
452 raise NotImplementedError
453
453
454 def getremotechanges(ui, repo, other, onlyheads=None, bundlename=None,
454 def getremotechanges(ui, repo, other, onlyheads=None, bundlename=None,
455 force=False):
455 force=False):
456 '''obtains a bundle of changes incoming from other
456 '''obtains a bundle of changes incoming from other
457
457
458 "onlyheads" restricts the returned changes to those reachable from the
458 "onlyheads" restricts the returned changes to those reachable from the
459 specified heads.
459 specified heads.
460 "bundlename", if given, stores the bundle to this file path permanently;
460 "bundlename", if given, stores the bundle to this file path permanently;
461 otherwise it's stored to a temp file and gets deleted again when you call
461 otherwise it's stored to a temp file and gets deleted again when you call
462 the returned "cleanupfn".
462 the returned "cleanupfn".
463 "force" indicates whether to proceed on unrelated repos.
463 "force" indicates whether to proceed on unrelated repos.
464
464
465 Returns a tuple (local, csets, cleanupfn):
465 Returns a tuple (local, csets, cleanupfn):
466
466
467 "local" is a local repo from which to obtain the actual incoming
467 "local" is a local repo from which to obtain the actual incoming
468 changesets; it is a bundlerepo for the obtained bundle when the
468 changesets; it is a bundlerepo for the obtained bundle when the
469 original "other" is remote.
469 original "other" is remote.
470 "csets" lists the incoming changeset node ids.
470 "csets" lists the incoming changeset node ids.
471 "cleanupfn" must be called without arguments when you're done processing
471 "cleanupfn" must be called without arguments when you're done processing
472 the changes; it closes both the original "other" and the one returned
472 the changes; it closes both the original "other" and the one returned
473 here.
473 here.
474 '''
474 '''
475 tmp = discovery.findcommonincoming(repo, other, heads=onlyheads,
475 tmp = discovery.findcommonincoming(repo, other, heads=onlyheads,
476 force=force)
476 force=force)
477 common, incoming, rheads = tmp
477 common, incoming, rheads = tmp
478 if not incoming:
478 if not incoming:
479 try:
479 try:
480 if bundlename:
480 if bundlename:
481 os.unlink(bundlename)
481 os.unlink(bundlename)
482 except OSError:
482 except OSError:
483 pass
483 pass
484 return repo, [], other.close
484 return repo, [], other.close
485
485
486 commonset = set(common)
486 commonset = set(common)
487 rheads = [x for x in rheads if x not in commonset]
487 rheads = [x for x in rheads if x not in commonset]
488
488
489 bundle = None
489 bundle = None
490 bundlerepo = None
490 bundlerepo = None
491 localrepo = other.local()
491 localrepo = other.local()
492 if bundlename or not localrepo:
492 if bundlename or not localrepo:
493 # create a bundle (uncompressed if other repo is not local)
493 # create a bundle (uncompressed if other repo is not local)
494
494
495 # developer config: devel.legacy.exchange
495 # developer config: devel.legacy.exchange
496 legexc = ui.configlist('devel', 'legacy.exchange')
496 legexc = ui.configlist('devel', 'legacy.exchange')
497 forcebundle1 = 'bundle2' not in legexc and 'bundle1' in legexc
497 forcebundle1 = 'bundle2' not in legexc and 'bundle1' in legexc
498 canbundle2 = (not forcebundle1
498 canbundle2 = (not forcebundle1
499 and other.capable('getbundle')
499 and other.capable('getbundle')
500 and other.capable('bundle2'))
500 and other.capable('bundle2'))
501 if canbundle2:
501 if canbundle2:
502 kwargs = {}
502 kwargs = {}
503 kwargs['common'] = common
503 kwargs['common'] = common
504 kwargs['heads'] = rheads
504 kwargs['heads'] = rheads
505 kwargs['bundlecaps'] = exchange.caps20to10(repo)
505 kwargs['bundlecaps'] = exchange.caps20to10(repo)
506 kwargs['cg'] = True
506 kwargs['cg'] = True
507 b2 = other.getbundle('incoming', **kwargs)
507 b2 = other.getbundle('incoming', **kwargs)
508 fname = bundle = changegroup.writechunks(ui, b2._forwardchunks(),
508 fname = bundle = changegroup.writechunks(ui, b2._forwardchunks(),
509 bundlename)
509 bundlename)
510 else:
510 else:
511 if other.capable('getbundle'):
511 if other.capable('getbundle'):
512 cg = other.getbundle('incoming', common=common, heads=rheads)
512 cg = other.getbundle('incoming', common=common, heads=rheads)
513 elif onlyheads is None and not other.capable('changegroupsubset'):
513 elif onlyheads is None and not other.capable('changegroupsubset'):
514 # compat with older servers when pulling all remote heads
514 # compat with older servers when pulling all remote heads
515 cg = other.changegroup(incoming, "incoming")
515 cg = other.changegroup(incoming, "incoming")
516 rheads = None
516 rheads = None
517 else:
517 else:
518 cg = other.changegroupsubset(incoming, rheads, 'incoming')
518 cg = other.changegroupsubset(incoming, rheads, 'incoming')
519 if localrepo:
519 if localrepo:
520 bundletype = "HG10BZ"
520 bundletype = "HG10BZ"
521 else:
521 else:
522 bundletype = "HG10UN"
522 bundletype = "HG10UN"
523 fname = bundle = bundle2.writebundle(ui, cg, bundlename,
523 fname = bundle = bundle2.writebundle(ui, cg, bundlename,
524 bundletype)
524 bundletype)
525 # keep written bundle?
525 # keep written bundle?
526 if bundlename:
526 if bundlename:
527 bundle = None
527 bundle = None
528 if not localrepo:
528 if not localrepo:
529 # use the created uncompressed bundlerepo
529 # use the created uncompressed bundlerepo
530 localrepo = bundlerepo = bundlerepository(repo.baseui, repo.root,
530 localrepo = bundlerepo = bundlerepository(repo.baseui, repo.root,
531 fname)
531 fname)
532 # this repo contains local and other now, so filter out local again
532 # this repo contains local and other now, so filter out local again
533 common = repo.heads()
533 common = repo.heads()
534 if localrepo:
534 if localrepo:
535 # Part of common may be remotely filtered
535 # Part of common may be remotely filtered
536 # So use an unfiltered version
536 # So use an unfiltered version
537 # The discovery process probably need cleanup to avoid that
537 # The discovery process probably need cleanup to avoid that
538 localrepo = localrepo.unfiltered()
538 localrepo = localrepo.unfiltered()
539
539
540 csets = localrepo.changelog.findmissing(common, rheads)
540 csets = localrepo.changelog.findmissing(common, rheads)
541
541
542 if bundlerepo:
542 if bundlerepo:
543 reponodes = [ctx.node() for ctx in bundlerepo[bundlerepo.firstnewrev:]]
543 reponodes = [ctx.node() for ctx in bundlerepo[bundlerepo.firstnewrev:]]
544 remotephases = other.listkeys('phases')
544 remotephases = other.listkeys('phases')
545
545
546 pullop = exchange.pulloperation(bundlerepo, other, heads=reponodes)
546 pullop = exchange.pulloperation(bundlerepo, other, heads=reponodes)
547 pullop.trmanager = bundletransactionmanager()
547 pullop.trmanager = bundletransactionmanager()
548 exchange._pullapplyphases(pullop, remotephases)
548 exchange._pullapplyphases(pullop, remotephases)
549
549
550 def cleanup():
550 def cleanup():
551 if bundlerepo:
551 if bundlerepo:
552 bundlerepo.close()
552 bundlerepo.close()
553 if bundle:
553 if bundle:
554 os.unlink(bundle)
554 os.unlink(bundle)
555 other.close()
555 other.close()
556
556
557 return (localrepo, csets, cleanup)
557 return (localrepo, csets, cleanup)
@@ -1,1603 +1,1603 b''
1 # manifest.py - manifest revision class for mercurial
1 # manifest.py - manifest revision class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import array
11 import heapq
10 import heapq
12 import os
11 import os
13 import struct
12 import struct
14
13
15 from .i18n import _
14 from .i18n import _
16 from . import (
15 from . import (
17 error,
16 error,
18 mdiff,
17 mdiff,
19 parsers,
18 parsers,
20 revlog,
19 revlog,
21 util,
20 util,
22 )
21 )
23
22
24 propertycache = util.propertycache
23 propertycache = util.propertycache
25
24
26 def _parsev1(data):
25 def _parsev1(data):
27 # This method does a little bit of excessive-looking
26 # This method does a little bit of excessive-looking
28 # precondition checking. This is so that the behavior of this
27 # precondition checking. This is so that the behavior of this
29 # class exactly matches its C counterpart to try and help
28 # class exactly matches its C counterpart to try and help
30 # prevent surprise breakage for anyone that develops against
29 # prevent surprise breakage for anyone that develops against
31 # the pure version.
30 # the pure version.
32 if data and data[-1] != '\n':
31 if data and data[-1] != '\n':
33 raise ValueError('Manifest did not end in a newline.')
32 raise ValueError('Manifest did not end in a newline.')
34 prev = None
33 prev = None
35 for l in data.splitlines():
34 for l in data.splitlines():
36 if prev is not None and prev > l:
35 if prev is not None and prev > l:
37 raise ValueError('Manifest lines not in sorted order.')
36 raise ValueError('Manifest lines not in sorted order.')
38 prev = l
37 prev = l
39 f, n = l.split('\0')
38 f, n = l.split('\0')
40 if len(n) > 40:
39 if len(n) > 40:
41 yield f, revlog.bin(n[:40]), n[40:]
40 yield f, revlog.bin(n[:40]), n[40:]
42 else:
41 else:
43 yield f, revlog.bin(n), ''
42 yield f, revlog.bin(n), ''
44
43
45 def _parsev2(data):
44 def _parsev2(data):
46 metadataend = data.find('\n')
45 metadataend = data.find('\n')
47 # Just ignore metadata for now
46 # Just ignore metadata for now
48 pos = metadataend + 1
47 pos = metadataend + 1
49 prevf = ''
48 prevf = ''
50 while pos < len(data):
49 while pos < len(data):
51 end = data.find('\n', pos + 1) # +1 to skip stem length byte
50 end = data.find('\n', pos + 1) # +1 to skip stem length byte
52 if end == -1:
51 if end == -1:
53 raise ValueError('Manifest ended with incomplete file entry.')
52 raise ValueError('Manifest ended with incomplete file entry.')
54 stemlen = ord(data[pos])
53 stemlen = ord(data[pos])
55 items = data[pos + 1:end].split('\0')
54 items = data[pos + 1:end].split('\0')
56 f = prevf[:stemlen] + items[0]
55 f = prevf[:stemlen] + items[0]
57 if prevf > f:
56 if prevf > f:
58 raise ValueError('Manifest entries not in sorted order.')
57 raise ValueError('Manifest entries not in sorted order.')
59 fl = items[1]
58 fl = items[1]
60 # Just ignore metadata (items[2:] for now)
59 # Just ignore metadata (items[2:] for now)
61 n = data[end + 1:end + 21]
60 n = data[end + 1:end + 21]
62 yield f, n, fl
61 yield f, n, fl
63 pos = end + 22
62 pos = end + 22
64 prevf = f
63 prevf = f
65
64
66 def _parse(data):
65 def _parse(data):
67 """Generates (path, node, flags) tuples from a manifest text"""
66 """Generates (path, node, flags) tuples from a manifest text"""
68 if data.startswith('\0'):
67 if data.startswith('\0'):
69 return iter(_parsev2(data))
68 return iter(_parsev2(data))
70 else:
69 else:
71 return iter(_parsev1(data))
70 return iter(_parsev1(data))
72
71
73 def _text(it, usemanifestv2):
72 def _text(it, usemanifestv2):
74 """Given an iterator over (path, node, flags) tuples, returns a manifest
73 """Given an iterator over (path, node, flags) tuples, returns a manifest
75 text"""
74 text"""
76 if usemanifestv2:
75 if usemanifestv2:
77 return _textv2(it)
76 return _textv2(it)
78 else:
77 else:
79 return _textv1(it)
78 return _textv1(it)
80
79
81 def _textv1(it):
80 def _textv1(it):
82 files = []
81 files = []
83 lines = []
82 lines = []
84 _hex = revlog.hex
83 _hex = revlog.hex
85 for f, n, fl in it:
84 for f, n, fl in it:
86 files.append(f)
85 files.append(f)
87 # if this is changed to support newlines in filenames,
86 # if this is changed to support newlines in filenames,
88 # be sure to check the templates/ dir again (especially *-raw.tmpl)
87 # be sure to check the templates/ dir again (especially *-raw.tmpl)
89 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
88 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
90
89
91 _checkforbidden(files)
90 _checkforbidden(files)
92 return ''.join(lines)
91 return ''.join(lines)
93
92
94 def _textv2(it):
93 def _textv2(it):
95 files = []
94 files = []
96 lines = ['\0\n']
95 lines = ['\0\n']
97 prevf = ''
96 prevf = ''
98 for f, n, fl in it:
97 for f, n, fl in it:
99 files.append(f)
98 files.append(f)
100 stem = os.path.commonprefix([prevf, f])
99 stem = os.path.commonprefix([prevf, f])
101 stemlen = min(len(stem), 255)
100 stemlen = min(len(stem), 255)
102 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
101 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
103 prevf = f
102 prevf = f
104 _checkforbidden(files)
103 _checkforbidden(files)
105 return ''.join(lines)
104 return ''.join(lines)
106
105
107 class lazymanifestiter(object):
106 class lazymanifestiter(object):
108 def __init__(self, lm):
107 def __init__(self, lm):
109 self.pos = 0
108 self.pos = 0
110 self.lm = lm
109 self.lm = lm
111
110
112 def __iter__(self):
111 def __iter__(self):
113 return self
112 return self
114
113
115 def next(self):
114 def next(self):
116 try:
115 try:
117 data, pos = self.lm._get(self.pos)
116 data, pos = self.lm._get(self.pos)
118 except IndexError:
117 except IndexError:
119 raise StopIteration
118 raise StopIteration
120 if pos == -1:
119 if pos == -1:
121 self.pos += 1
120 self.pos += 1
122 return data[0]
121 return data[0]
123 self.pos += 1
122 self.pos += 1
124 zeropos = data.find('\x00', pos)
123 zeropos = data.find('\x00', pos)
125 return data[pos:zeropos]
124 return data[pos:zeropos]
126
125
127 class lazymanifestiterentries(object):
126 class lazymanifestiterentries(object):
128 def __init__(self, lm):
127 def __init__(self, lm):
129 self.lm = lm
128 self.lm = lm
130 self.pos = 0
129 self.pos = 0
131
130
132 def __iter__(self):
131 def __iter__(self):
133 return self
132 return self
134
133
135 def next(self):
134 def next(self):
136 try:
135 try:
137 data, pos = self.lm._get(self.pos)
136 data, pos = self.lm._get(self.pos)
138 except IndexError:
137 except IndexError:
139 raise StopIteration
138 raise StopIteration
140 if pos == -1:
139 if pos == -1:
141 self.pos += 1
140 self.pos += 1
142 return data
141 return data
143 zeropos = data.find('\x00', pos)
142 zeropos = data.find('\x00', pos)
144 hashval = unhexlify(data, self.lm.extrainfo[self.pos],
143 hashval = unhexlify(data, self.lm.extrainfo[self.pos],
145 zeropos + 1, 40)
144 zeropos + 1, 40)
146 flags = self.lm._getflags(data, self.pos, zeropos)
145 flags = self.lm._getflags(data, self.pos, zeropos)
147 self.pos += 1
146 self.pos += 1
148 return (data[pos:zeropos], hashval, flags)
147 return (data[pos:zeropos], hashval, flags)
149
148
150 def unhexlify(data, extra, pos, length):
149 def unhexlify(data, extra, pos, length):
151 s = data[pos:pos + length].decode('hex')
150 s = data[pos:pos + length].decode('hex')
152 if extra:
151 if extra:
153 s += chr(extra & 0xff)
152 s += chr(extra & 0xff)
154 return s
153 return s
155
154
156 def _cmp(a, b):
155 def _cmp(a, b):
157 return (a > b) - (a < b)
156 return (a > b) - (a < b)
158
157
159 class _lazymanifest(object):
158 class _lazymanifest(object):
160 def __init__(self, data, positions=None, extrainfo=None, extradata=None):
159 def __init__(self, data, positions=None, extrainfo=None, extradata=None):
161 if positions is None:
160 if positions is None:
162 self.positions = self.findlines(data)
161 self.positions = self.findlines(data)
163 self.extrainfo = [0] * len(self.positions)
162 self.extrainfo = [0] * len(self.positions)
164 self.data = data
163 self.data = data
165 self.extradata = []
164 self.extradata = []
166 else:
165 else:
167 self.positions = positions[:]
166 self.positions = positions[:]
168 self.extrainfo = extrainfo[:]
167 self.extrainfo = extrainfo[:]
169 self.extradata = extradata[:]
168 self.extradata = extradata[:]
170 self.data = data
169 self.data = data
171
170
172 def findlines(self, data):
171 def findlines(self, data):
173 if not data:
172 if not data:
174 return []
173 return []
175 pos = data.find("\n")
174 pos = data.find("\n")
176 if pos == -1 or data[-1] != '\n':
175 if pos == -1 or data[-1] != '\n':
177 raise ValueError("Manifest did not end in a newline.")
176 raise ValueError("Manifest did not end in a newline.")
178 positions = [0]
177 positions = [0]
179 prev = data[:data.find('\x00')]
178 prev = data[:data.find('\x00')]
180 while pos < len(data) - 1 and pos != -1:
179 while pos < len(data) - 1 and pos != -1:
181 positions.append(pos + 1)
180 positions.append(pos + 1)
182 nexts = data[pos + 1:data.find('\x00', pos + 1)]
181 nexts = data[pos + 1:data.find('\x00', pos + 1)]
183 if nexts < prev:
182 if nexts < prev:
184 raise ValueError("Manifest lines not in sorted order.")
183 raise ValueError("Manifest lines not in sorted order.")
185 prev = nexts
184 prev = nexts
186 pos = data.find("\n", pos + 1)
185 pos = data.find("\n", pos + 1)
187 return positions
186 return positions
188
187
189 def _get(self, index):
188 def _get(self, index):
190 # get the position encoded in pos:
189 # get the position encoded in pos:
191 # positive number is an index in 'data'
190 # positive number is an index in 'data'
192 # negative number is in extrapieces
191 # negative number is in extrapieces
193 pos = self.positions[index]
192 pos = self.positions[index]
194 if pos >= 0:
193 if pos >= 0:
195 return self.data, pos
194 return self.data, pos
196 return self.extradata[-pos - 1], -1
195 return self.extradata[-pos - 1], -1
197
196
198 def _getkey(self, pos):
197 def _getkey(self, pos):
199 if pos >= 0:
198 if pos >= 0:
200 return self.data[pos:self.data.find('\x00', pos + 1)]
199 return self.data[pos:self.data.find('\x00', pos + 1)]
201 return self.extradata[-pos - 1][0]
200 return self.extradata[-pos - 1][0]
202
201
203 def bsearch(self, key):
202 def bsearch(self, key):
204 first = 0
203 first = 0
205 last = len(self.positions) - 1
204 last = len(self.positions) - 1
206
205
207 while first <= last:
206 while first <= last:
208 midpoint = (first + last)//2
207 midpoint = (first + last)//2
209 nextpos = self.positions[midpoint]
208 nextpos = self.positions[midpoint]
210 candidate = self._getkey(nextpos)
209 candidate = self._getkey(nextpos)
211 r = _cmp(key, candidate)
210 r = _cmp(key, candidate)
212 if r == 0:
211 if r == 0:
213 return midpoint
212 return midpoint
214 else:
213 else:
215 if r < 0:
214 if r < 0:
216 last = midpoint - 1
215 last = midpoint - 1
217 else:
216 else:
218 first = midpoint + 1
217 first = midpoint + 1
219 return -1
218 return -1
220
219
221 def bsearch2(self, key):
220 def bsearch2(self, key):
222 # same as the above, but will always return the position
221 # same as the above, but will always return the position
223 # done for performance reasons
222 # done for performance reasons
224 first = 0
223 first = 0
225 last = len(self.positions) - 1
224 last = len(self.positions) - 1
226
225
227 while first <= last:
226 while first <= last:
228 midpoint = (first + last)//2
227 midpoint = (first + last)//2
229 nextpos = self.positions[midpoint]
228 nextpos = self.positions[midpoint]
230 candidate = self._getkey(nextpos)
229 candidate = self._getkey(nextpos)
231 r = _cmp(key, candidate)
230 r = _cmp(key, candidate)
232 if r == 0:
231 if r == 0:
233 return (midpoint, True)
232 return (midpoint, True)
234 else:
233 else:
235 if r < 0:
234 if r < 0:
236 last = midpoint - 1
235 last = midpoint - 1
237 else:
236 else:
238 first = midpoint + 1
237 first = midpoint + 1
239 return (first, False)
238 return (first, False)
240
239
241 def __contains__(self, key):
240 def __contains__(self, key):
242 return self.bsearch(key) != -1
241 return self.bsearch(key) != -1
243
242
244 def _getflags(self, data, needle, pos):
243 def _getflags(self, data, needle, pos):
245 start = pos + 41
244 start = pos + 41
246 end = data.find("\n", start)
245 end = data.find("\n", start)
247 if end == -1:
246 if end == -1:
248 end = len(data) - 1
247 end = len(data) - 1
249 if start == end:
248 if start == end:
250 return ''
249 return ''
251 return self.data[start:end]
250 return self.data[start:end]
252
251
253 def __getitem__(self, key):
252 def __getitem__(self, key):
254 if not isinstance(key, str):
253 if not isinstance(key, str):
255 raise TypeError("getitem: manifest keys must be a string.")
254 raise TypeError("getitem: manifest keys must be a string.")
256 needle = self.bsearch(key)
255 needle = self.bsearch(key)
257 if needle == -1:
256 if needle == -1:
258 raise KeyError
257 raise KeyError
259 data, pos = self._get(needle)
258 data, pos = self._get(needle)
260 if pos == -1:
259 if pos == -1:
261 return (data[1], data[2])
260 return (data[1], data[2])
262 zeropos = data.find('\x00', pos)
261 zeropos = data.find('\x00', pos)
263 assert 0 <= needle <= len(self.positions)
262 assert 0 <= needle <= len(self.positions)
264 assert len(self.extrainfo) == len(self.positions)
263 assert len(self.extrainfo) == len(self.positions)
265 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
264 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
266 flags = self._getflags(data, needle, zeropos)
265 flags = self._getflags(data, needle, zeropos)
267 return (hashval, flags)
266 return (hashval, flags)
268
267
269 def __delitem__(self, key):
268 def __delitem__(self, key):
270 needle, found = self.bsearch2(key)
269 needle, found = self.bsearch2(key)
271 if not found:
270 if not found:
272 raise KeyError
271 raise KeyError
273 cur = self.positions[needle]
272 cur = self.positions[needle]
274 self.positions = self.positions[:needle] + self.positions[needle + 1:]
273 self.positions = self.positions[:needle] + self.positions[needle + 1:]
275 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
274 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
276 if cur >= 0:
275 if cur >= 0:
277 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
276 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
278
277
279 def __setitem__(self, key, value):
278 def __setitem__(self, key, value):
280 if not isinstance(key, str):
279 if not isinstance(key, str):
281 raise TypeError("setitem: manifest keys must be a string.")
280 raise TypeError("setitem: manifest keys must be a string.")
282 if not isinstance(value, tuple) or len(value) != 2:
281 if not isinstance(value, tuple) or len(value) != 2:
283 raise TypeError("Manifest values must be a tuple of (node, flags).")
282 raise TypeError("Manifest values must be a tuple of (node, flags).")
284 hashval = value[0]
283 hashval = value[0]
285 if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22:
284 if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22:
286 raise TypeError("node must be a 20-byte string")
285 raise TypeError("node must be a 20-byte string")
287 flags = value[1]
286 flags = value[1]
288 if len(hashval) == 22:
287 if len(hashval) == 22:
289 hashval = hashval[:-1]
288 hashval = hashval[:-1]
290 if not isinstance(flags, str) or len(flags) > 1:
289 if not isinstance(flags, str) or len(flags) > 1:
291 raise TypeError("flags must a 0 or 1 byte string, got %r", flags)
290 raise TypeError("flags must a 0 or 1 byte string, got %r", flags)
292 needle, found = self.bsearch2(key)
291 needle, found = self.bsearch2(key)
293 if found:
292 if found:
294 # put the item
293 # put the item
295 pos = self.positions[needle]
294 pos = self.positions[needle]
296 if pos < 0:
295 if pos < 0:
297 self.extradata[-pos - 1] = (key, hashval, value[1])
296 self.extradata[-pos - 1] = (key, hashval, value[1])
298 else:
297 else:
299 # just don't bother
298 # just don't bother
300 self.extradata.append((key, hashval, value[1]))
299 self.extradata.append((key, hashval, value[1]))
301 self.positions[needle] = -len(self.extradata)
300 self.positions[needle] = -len(self.extradata)
302 else:
301 else:
303 # not found, put it in with extra positions
302 # not found, put it in with extra positions
304 self.extradata.append((key, hashval, value[1]))
303 self.extradata.append((key, hashval, value[1]))
305 self.positions = (self.positions[:needle] + [-len(self.extradata)]
304 self.positions = (self.positions[:needle] + [-len(self.extradata)]
306 + self.positions[needle:])
305 + self.positions[needle:])
307 self.extrainfo = (self.extrainfo[:needle] + [0] +
306 self.extrainfo = (self.extrainfo[:needle] + [0] +
308 self.extrainfo[needle:])
307 self.extrainfo[needle:])
309
308
310 def copy(self):
309 def copy(self):
311 # XXX call _compact like in C?
310 # XXX call _compact like in C?
312 return _lazymanifest(self.data, self.positions, self.extrainfo,
311 return _lazymanifest(self.data, self.positions, self.extrainfo,
313 self.extradata)
312 self.extradata)
314
313
315 def _compact(self):
314 def _compact(self):
316 # hopefully not called TOO often
315 # hopefully not called TOO often
317 if len(self.extradata) == 0:
316 if len(self.extradata) == 0:
318 return
317 return
319 l = []
318 l = []
320 last_cut = 0
319 last_cut = 0
321 i = 0
320 i = 0
322 offset = 0
321 offset = 0
323 self.extrainfo = [0] * len(self.positions)
322 self.extrainfo = [0] * len(self.positions)
324 while i < len(self.positions):
323 while i < len(self.positions):
325 if self.positions[i] >= 0:
324 if self.positions[i] >= 0:
326 cur = self.positions[i]
325 cur = self.positions[i]
327 last_cut = cur
326 last_cut = cur
328 while True:
327 while True:
329 self.positions[i] = offset
328 self.positions[i] = offset
330 i += 1
329 i += 1
331 if i == len(self.positions) or self.positions[i] < 0:
330 if i == len(self.positions) or self.positions[i] < 0:
332 break
331 break
333 offset += self.positions[i] - cur
332 offset += self.positions[i] - cur
334 cur = self.positions[i]
333 cur = self.positions[i]
335 end_cut = self.data.find('\n', cur)
334 end_cut = self.data.find('\n', cur)
336 if end_cut != -1:
335 if end_cut != -1:
337 end_cut += 1
336 end_cut += 1
338 offset += end_cut - cur
337 offset += end_cut - cur
339 l.append(self.data[last_cut:end_cut])
338 l.append(self.data[last_cut:end_cut])
340 else:
339 else:
341 while i < len(self.positions) and self.positions[i] < 0:
340 while i < len(self.positions) and self.positions[i] < 0:
342 cur = self.positions[i]
341 cur = self.positions[i]
343 t = self.extradata[-cur - 1]
342 t = self.extradata[-cur - 1]
344 l.append(self._pack(t))
343 l.append(self._pack(t))
345 self.positions[i] = offset
344 self.positions[i] = offset
346 if len(t[1]) > 20:
345 if len(t[1]) > 20:
347 self.extrainfo[i] = ord(t[1][21])
346 self.extrainfo[i] = ord(t[1][21])
348 offset += len(l[-1])
347 offset += len(l[-1])
349 i += 1
348 i += 1
350 self.data = ''.join(l)
349 self.data = ''.join(l)
351 self.extradata = []
350 self.extradata = []
352
351
353 def _pack(self, d):
352 def _pack(self, d):
354 return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n'
353 return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n'
355
354
356 def text(self):
355 def text(self):
357 self._compact()
356 self._compact()
358 return self.data
357 return self.data
359
358
360 def diff(self, m2, clean=False):
359 def diff(self, m2, clean=False):
361 '''Finds changes between the current manifest and m2.'''
360 '''Finds changes between the current manifest and m2.'''
362 # XXX think whether efficiency matters here
361 # XXX think whether efficiency matters here
363 diff = {}
362 diff = {}
364
363
365 for fn, e1, flags in self.iterentries():
364 for fn, e1, flags in self.iterentries():
366 if fn not in m2:
365 if fn not in m2:
367 diff[fn] = (e1, flags), (None, '')
366 diff[fn] = (e1, flags), (None, '')
368 else:
367 else:
369 e2 = m2[fn]
368 e2 = m2[fn]
370 if (e1, flags) != e2:
369 if (e1, flags) != e2:
371 diff[fn] = (e1, flags), e2
370 diff[fn] = (e1, flags), e2
372 elif clean:
371 elif clean:
373 diff[fn] = None
372 diff[fn] = None
374
373
375 for fn, e2, flags in m2.iterentries():
374 for fn, e2, flags in m2.iterentries():
376 if fn not in self:
375 if fn not in self:
377 diff[fn] = (None, ''), (e2, flags)
376 diff[fn] = (None, ''), (e2, flags)
378
377
379 return diff
378 return diff
380
379
381 def iterentries(self):
380 def iterentries(self):
382 return lazymanifestiterentries(self)
381 return lazymanifestiterentries(self)
383
382
384 def iterkeys(self):
383 def iterkeys(self):
385 return lazymanifestiter(self)
384 return lazymanifestiter(self)
386
385
387 def __iter__(self):
386 def __iter__(self):
388 return lazymanifestiter(self)
387 return lazymanifestiter(self)
389
388
390 def __len__(self):
389 def __len__(self):
391 return len(self.positions)
390 return len(self.positions)
392
391
393 def filtercopy(self, filterfn):
392 def filtercopy(self, filterfn):
394 # XXX should be optimized
393 # XXX should be optimized
395 c = _lazymanifest('')
394 c = _lazymanifest('')
396 for f, n, fl in self.iterentries():
395 for f, n, fl in self.iterentries():
397 if filterfn(f):
396 if filterfn(f):
398 c[f] = n, fl
397 c[f] = n, fl
399 return c
398 return c
400
399
401 try:
400 try:
402 _lazymanifest = parsers.lazymanifest
401 _lazymanifest = parsers.lazymanifest
403 except AttributeError:
402 except AttributeError:
404 pass
403 pass
405
404
406 class manifestdict(object):
405 class manifestdict(object):
407 def __init__(self, data=''):
406 def __init__(self, data=''):
408 if data.startswith('\0'):
407 if data.startswith('\0'):
409 #_lazymanifest can not parse v2
408 #_lazymanifest can not parse v2
410 self._lm = _lazymanifest('')
409 self._lm = _lazymanifest('')
411 for f, n, fl in _parsev2(data):
410 for f, n, fl in _parsev2(data):
412 self._lm[f] = n, fl
411 self._lm[f] = n, fl
413 else:
412 else:
414 self._lm = _lazymanifest(data)
413 self._lm = _lazymanifest(data)
415
414
416 def __getitem__(self, key):
415 def __getitem__(self, key):
417 return self._lm[key][0]
416 return self._lm[key][0]
418
417
419 def find(self, key):
418 def find(self, key):
420 return self._lm[key]
419 return self._lm[key]
421
420
422 def __len__(self):
421 def __len__(self):
423 return len(self._lm)
422 return len(self._lm)
424
423
425 def __nonzero__(self):
424 def __nonzero__(self):
426 # nonzero is covered by the __len__ function, but implementing it here
425 # nonzero is covered by the __len__ function, but implementing it here
427 # makes it easier for extensions to override.
426 # makes it easier for extensions to override.
428 return len(self._lm) != 0
427 return len(self._lm) != 0
429
428
430 def __setitem__(self, key, node):
429 def __setitem__(self, key, node):
431 self._lm[key] = node, self.flags(key, '')
430 self._lm[key] = node, self.flags(key, '')
432
431
433 def __contains__(self, key):
432 def __contains__(self, key):
434 return key in self._lm
433 return key in self._lm
435
434
436 def __delitem__(self, key):
435 def __delitem__(self, key):
437 del self._lm[key]
436 del self._lm[key]
438
437
439 def __iter__(self):
438 def __iter__(self):
440 return self._lm.__iter__()
439 return self._lm.__iter__()
441
440
442 def iterkeys(self):
441 def iterkeys(self):
443 return self._lm.iterkeys()
442 return self._lm.iterkeys()
444
443
445 def keys(self):
444 def keys(self):
446 return list(self.iterkeys())
445 return list(self.iterkeys())
447
446
448 def filesnotin(self, m2, match=None):
447 def filesnotin(self, m2, match=None):
449 '''Set of files in this manifest that are not in the other'''
448 '''Set of files in this manifest that are not in the other'''
450 if match:
449 if match:
451 m1 = self.matches(match)
450 m1 = self.matches(match)
452 m2 = m2.matches(match)
451 m2 = m2.matches(match)
453 return m1.filesnotin(m2)
452 return m1.filesnotin(m2)
454 diff = self.diff(m2)
453 diff = self.diff(m2)
455 files = set(filepath
454 files = set(filepath
456 for filepath, hashflags in diff.iteritems()
455 for filepath, hashflags in diff.iteritems()
457 if hashflags[1][0] is None)
456 if hashflags[1][0] is None)
458 return files
457 return files
459
458
460 @propertycache
459 @propertycache
461 def _dirs(self):
460 def _dirs(self):
462 return util.dirs(self)
461 return util.dirs(self)
463
462
464 def dirs(self):
463 def dirs(self):
465 return self._dirs
464 return self._dirs
466
465
467 def hasdir(self, dir):
466 def hasdir(self, dir):
468 return dir in self._dirs
467 return dir in self._dirs
469
468
470 def _filesfastpath(self, match):
469 def _filesfastpath(self, match):
471 '''Checks whether we can correctly and quickly iterate over matcher
470 '''Checks whether we can correctly and quickly iterate over matcher
472 files instead of over manifest files.'''
471 files instead of over manifest files.'''
473 files = match.files()
472 files = match.files()
474 return (len(files) < 100 and (match.isexact() or
473 return (len(files) < 100 and (match.isexact() or
475 (match.prefix() and all(fn in self for fn in files))))
474 (match.prefix() and all(fn in self for fn in files))))
476
475
477 def walk(self, match):
476 def walk(self, match):
478 '''Generates matching file names.
477 '''Generates matching file names.
479
478
480 Equivalent to manifest.matches(match).iterkeys(), but without creating
479 Equivalent to manifest.matches(match).iterkeys(), but without creating
481 an entirely new manifest.
480 an entirely new manifest.
482
481
483 It also reports nonexistent files by marking them bad with match.bad().
482 It also reports nonexistent files by marking them bad with match.bad().
484 '''
483 '''
485 if match.always():
484 if match.always():
486 for f in iter(self):
485 for f in iter(self):
487 yield f
486 yield f
488 return
487 return
489
488
490 fset = set(match.files())
489 fset = set(match.files())
491
490
492 # avoid the entire walk if we're only looking for specific files
491 # avoid the entire walk if we're only looking for specific files
493 if self._filesfastpath(match):
492 if self._filesfastpath(match):
494 for fn in sorted(fset):
493 for fn in sorted(fset):
495 yield fn
494 yield fn
496 return
495 return
497
496
498 for fn in self:
497 for fn in self:
499 if fn in fset:
498 if fn in fset:
500 # specified pattern is the exact name
499 # specified pattern is the exact name
501 fset.remove(fn)
500 fset.remove(fn)
502 if match(fn):
501 if match(fn):
503 yield fn
502 yield fn
504
503
505 # for dirstate.walk, files=['.'] means "walk the whole tree".
504 # for dirstate.walk, files=['.'] means "walk the whole tree".
506 # follow that here, too
505 # follow that here, too
507 fset.discard('.')
506 fset.discard('.')
508
507
509 for fn in sorted(fset):
508 for fn in sorted(fset):
510 if not self.hasdir(fn):
509 if not self.hasdir(fn):
511 match.bad(fn, None)
510 match.bad(fn, None)
512
511
513 def matches(self, match):
512 def matches(self, match):
514 '''generate a new manifest filtered by the match argument'''
513 '''generate a new manifest filtered by the match argument'''
515 if match.always():
514 if match.always():
516 return self.copy()
515 return self.copy()
517
516
518 if self._filesfastpath(match):
517 if self._filesfastpath(match):
519 m = manifestdict()
518 m = manifestdict()
520 lm = self._lm
519 lm = self._lm
521 for fn in match.files():
520 for fn in match.files():
522 if fn in lm:
521 if fn in lm:
523 m._lm[fn] = lm[fn]
522 m._lm[fn] = lm[fn]
524 return m
523 return m
525
524
526 m = manifestdict()
525 m = manifestdict()
527 m._lm = self._lm.filtercopy(match)
526 m._lm = self._lm.filtercopy(match)
528 return m
527 return m
529
528
530 def diff(self, m2, match=None, clean=False):
529 def diff(self, m2, match=None, clean=False):
531 '''Finds changes between the current manifest and m2.
530 '''Finds changes between the current manifest and m2.
532
531
533 Args:
532 Args:
534 m2: the manifest to which this manifest should be compared.
533 m2: the manifest to which this manifest should be compared.
535 clean: if true, include files unchanged between these manifests
534 clean: if true, include files unchanged between these manifests
536 with a None value in the returned dictionary.
535 with a None value in the returned dictionary.
537
536
538 The result is returned as a dict with filename as key and
537 The result is returned as a dict with filename as key and
539 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
538 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
540 nodeid in the current/other manifest and fl1/fl2 is the flag
539 nodeid in the current/other manifest and fl1/fl2 is the flag
541 in the current/other manifest. Where the file does not exist,
540 in the current/other manifest. Where the file does not exist,
542 the nodeid will be None and the flags will be the empty
541 the nodeid will be None and the flags will be the empty
543 string.
542 string.
544 '''
543 '''
545 if match:
544 if match:
546 m1 = self.matches(match)
545 m1 = self.matches(match)
547 m2 = m2.matches(match)
546 m2 = m2.matches(match)
548 return m1.diff(m2, clean=clean)
547 return m1.diff(m2, clean=clean)
549 return self._lm.diff(m2._lm, clean)
548 return self._lm.diff(m2._lm, clean)
550
549
551 def setflag(self, key, flag):
550 def setflag(self, key, flag):
552 self._lm[key] = self[key], flag
551 self._lm[key] = self[key], flag
553
552
554 def get(self, key, default=None):
553 def get(self, key, default=None):
555 try:
554 try:
556 return self._lm[key][0]
555 return self._lm[key][0]
557 except KeyError:
556 except KeyError:
558 return default
557 return default
559
558
560 def flags(self, key, default=''):
559 def flags(self, key, default=''):
561 try:
560 try:
562 return self._lm[key][1]
561 return self._lm[key][1]
563 except KeyError:
562 except KeyError:
564 return default
563 return default
565
564
566 def copy(self):
565 def copy(self):
567 c = manifestdict()
566 c = manifestdict()
568 c._lm = self._lm.copy()
567 c._lm = self._lm.copy()
569 return c
568 return c
570
569
571 def iteritems(self):
570 def iteritems(self):
572 return (x[:2] for x in self._lm.iterentries())
571 return (x[:2] for x in self._lm.iterentries())
573
572
574 def iterentries(self):
573 def iterentries(self):
575 return self._lm.iterentries()
574 return self._lm.iterentries()
576
575
577 def text(self, usemanifestv2=False):
576 def text(self, usemanifestv2=False):
578 if usemanifestv2:
577 if usemanifestv2:
579 return _textv2(self._lm.iterentries())
578 return _textv2(self._lm.iterentries())
580 else:
579 else:
581 # use (probably) native version for v1
580 # use (probably) native version for v1
582 return self._lm.text()
581 return self._lm.text()
583
582
584 def fastdelta(self, base, changes):
583 def fastdelta(self, base, changes):
585 """Given a base manifest text as an array.array and a list of changes
584 """Given a base manifest text as an array.array and a list of changes
586 relative to that text, compute a delta that can be used by revlog.
585 relative to that text, compute a delta that can be used by revlog.
587 """
586 """
588 delta = []
587 delta = []
589 dstart = None
588 dstart = None
590 dend = None
589 dend = None
591 dline = [""]
590 dline = [""]
592 start = 0
591 start = 0
593 # zero copy representation of base as a buffer
592 # zero copy representation of base as a buffer
594 addbuf = util.buffer(base)
593 addbuf = util.buffer(base)
595
594
596 changes = list(changes)
595 changes = list(changes)
597 if len(changes) < 1000:
596 if len(changes) < 1000:
598 # start with a readonly loop that finds the offset of
597 # start with a readonly loop that finds the offset of
599 # each line and creates the deltas
598 # each line and creates the deltas
600 for f, todelete in changes:
599 for f, todelete in changes:
601 # bs will either be the index of the item or the insert point
600 # bs will either be the index of the item or the insert point
602 start, end = _msearch(addbuf, f, start)
601 start, end = _msearch(addbuf, f, start)
603 if not todelete:
602 if not todelete:
604 h, fl = self._lm[f]
603 h, fl = self._lm[f]
605 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
604 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
606 else:
605 else:
607 if start == end:
606 if start == end:
608 # item we want to delete was not found, error out
607 # item we want to delete was not found, error out
609 raise AssertionError(
608 raise AssertionError(
610 _("failed to remove %s from manifest") % f)
609 _("failed to remove %s from manifest") % f)
611 l = ""
610 l = ""
612 if dstart is not None and dstart <= start and dend >= start:
611 if dstart is not None and dstart <= start and dend >= start:
613 if dend < end:
612 if dend < end:
614 dend = end
613 dend = end
615 if l:
614 if l:
616 dline.append(l)
615 dline.append(l)
617 else:
616 else:
618 if dstart is not None:
617 if dstart is not None:
619 delta.append([dstart, dend, "".join(dline)])
618 delta.append([dstart, dend, "".join(dline)])
620 dstart = start
619 dstart = start
621 dend = end
620 dend = end
622 dline = [l]
621 dline = [l]
623
622
624 if dstart is not None:
623 if dstart is not None:
625 delta.append([dstart, dend, "".join(dline)])
624 delta.append([dstart, dend, "".join(dline)])
626 # apply the delta to the base, and get a delta for addrevision
625 # apply the delta to the base, and get a delta for addrevision
627 deltatext, arraytext = _addlistdelta(base, delta)
626 deltatext, arraytext = _addlistdelta(base, delta)
628 else:
627 else:
629 # For large changes, it's much cheaper to just build the text and
628 # For large changes, it's much cheaper to just build the text and
630 # diff it.
629 # diff it.
631 arraytext = array.array('c', self.text())
630 arraytext = bytearray(self.text())
632 deltatext = mdiff.textdiff(base, arraytext)
631 deltatext = mdiff.textdiff(
632 util.buffer(base), util.buffer(arraytext))
633
633
634 return arraytext, deltatext
634 return arraytext, deltatext
635
635
636 def _msearch(m, s, lo=0, hi=None):
636 def _msearch(m, s, lo=0, hi=None):
637 '''return a tuple (start, end) that says where to find s within m.
637 '''return a tuple (start, end) that says where to find s within m.
638
638
639 If the string is found m[start:end] are the line containing
639 If the string is found m[start:end] are the line containing
640 that string. If start == end the string was not found and
640 that string. If start == end the string was not found and
641 they indicate the proper sorted insertion point.
641 they indicate the proper sorted insertion point.
642
642
643 m should be a buffer or a string
643 m should be a buffer or a string
644 s is a string'''
644 s is a string'''
645 def advance(i, c):
645 def advance(i, c):
646 while i < lenm and m[i] != c:
646 while i < lenm and m[i] != c:
647 i += 1
647 i += 1
648 return i
648 return i
649 if not s:
649 if not s:
650 return (lo, lo)
650 return (lo, lo)
651 lenm = len(m)
651 lenm = len(m)
652 if not hi:
652 if not hi:
653 hi = lenm
653 hi = lenm
654 while lo < hi:
654 while lo < hi:
655 mid = (lo + hi) // 2
655 mid = (lo + hi) // 2
656 start = mid
656 start = mid
657 while start > 0 and m[start - 1] != '\n':
657 while start > 0 and m[start - 1] != '\n':
658 start -= 1
658 start -= 1
659 end = advance(start, '\0')
659 end = advance(start, '\0')
660 if m[start:end] < s:
660 if m[start:end] < s:
661 # we know that after the null there are 40 bytes of sha1
661 # we know that after the null there are 40 bytes of sha1
662 # this translates to the bisect lo = mid + 1
662 # this translates to the bisect lo = mid + 1
663 lo = advance(end + 40, '\n') + 1
663 lo = advance(end + 40, '\n') + 1
664 else:
664 else:
665 # this translates to the bisect hi = mid
665 # this translates to the bisect hi = mid
666 hi = start
666 hi = start
667 end = advance(lo, '\0')
667 end = advance(lo, '\0')
668 found = m[lo:end]
668 found = m[lo:end]
669 if s == found:
669 if s == found:
670 # we know that after the null there are 40 bytes of sha1
670 # we know that after the null there are 40 bytes of sha1
671 end = advance(end + 40, '\n')
671 end = advance(end + 40, '\n')
672 return (lo, end + 1)
672 return (lo, end + 1)
673 else:
673 else:
674 return (lo, lo)
674 return (lo, lo)
675
675
676 def _checkforbidden(l):
676 def _checkforbidden(l):
677 """Check filenames for illegal characters."""
677 """Check filenames for illegal characters."""
678 for f in l:
678 for f in l:
679 if '\n' in f or '\r' in f:
679 if '\n' in f or '\r' in f:
680 raise error.RevlogError(
680 raise error.RevlogError(
681 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
681 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
682
682
683
683
684 # apply the changes collected during the bisect loop to our addlist
684 # apply the changes collected during the bisect loop to our addlist
685 # return a delta suitable for addrevision
685 # return a delta suitable for addrevision
686 def _addlistdelta(addlist, x):
686 def _addlistdelta(addlist, x):
687 # for large addlist arrays, building a new array is cheaper
687 # for large addlist arrays, building a new array is cheaper
688 # than repeatedly modifying the existing one
688 # than repeatedly modifying the existing one
689 currentposition = 0
689 currentposition = 0
690 newaddlist = array.array('c')
690 newaddlist = bytearray()
691
691
692 for start, end, content in x:
692 for start, end, content in x:
693 newaddlist += addlist[currentposition:start]
693 newaddlist += addlist[currentposition:start]
694 if content:
694 if content:
695 newaddlist += array.array('c', content)
695 newaddlist += bytearray(content)
696
696
697 currentposition = end
697 currentposition = end
698
698
699 newaddlist += addlist[currentposition:]
699 newaddlist += addlist[currentposition:]
700
700
701 deltatext = "".join(struct.pack(">lll", start, end, len(content))
701 deltatext = "".join(struct.pack(">lll", start, end, len(content))
702 + content for start, end, content in x)
702 + content for start, end, content in x)
703 return deltatext, newaddlist
703 return deltatext, newaddlist
704
704
705 def _splittopdir(f):
705 def _splittopdir(f):
706 if '/' in f:
706 if '/' in f:
707 dir, subpath = f.split('/', 1)
707 dir, subpath = f.split('/', 1)
708 return dir + '/', subpath
708 return dir + '/', subpath
709 else:
709 else:
710 return '', f
710 return '', f
711
711
712 _noop = lambda s: None
712 _noop = lambda s: None
713
713
714 class treemanifest(object):
714 class treemanifest(object):
715 def __init__(self, dir='', text=''):
715 def __init__(self, dir='', text=''):
716 self._dir = dir
716 self._dir = dir
717 self._node = revlog.nullid
717 self._node = revlog.nullid
718 self._loadfunc = _noop
718 self._loadfunc = _noop
719 self._copyfunc = _noop
719 self._copyfunc = _noop
720 self._dirty = False
720 self._dirty = False
721 self._dirs = {}
721 self._dirs = {}
722 # Using _lazymanifest here is a little slower than plain old dicts
722 # Using _lazymanifest here is a little slower than plain old dicts
723 self._files = {}
723 self._files = {}
724 self._flags = {}
724 self._flags = {}
725 if text:
725 if text:
726 def readsubtree(subdir, subm):
726 def readsubtree(subdir, subm):
727 raise AssertionError('treemanifest constructor only accepts '
727 raise AssertionError('treemanifest constructor only accepts '
728 'flat manifests')
728 'flat manifests')
729 self.parse(text, readsubtree)
729 self.parse(text, readsubtree)
730 self._dirty = True # Mark flat manifest dirty after parsing
730 self._dirty = True # Mark flat manifest dirty after parsing
731
731
732 def _subpath(self, path):
732 def _subpath(self, path):
733 return self._dir + path
733 return self._dir + path
734
734
735 def __len__(self):
735 def __len__(self):
736 self._load()
736 self._load()
737 size = len(self._files)
737 size = len(self._files)
738 for m in self._dirs.values():
738 for m in self._dirs.values():
739 size += m.__len__()
739 size += m.__len__()
740 return size
740 return size
741
741
742 def _isempty(self):
742 def _isempty(self):
743 self._load() # for consistency; already loaded by all callers
743 self._load() # for consistency; already loaded by all callers
744 return (not self._files and (not self._dirs or
744 return (not self._files and (not self._dirs or
745 all(m._isempty() for m in self._dirs.values())))
745 all(m._isempty() for m in self._dirs.values())))
746
746
747 def __repr__(self):
747 def __repr__(self):
748 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
748 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
749 (self._dir, revlog.hex(self._node),
749 (self._dir, revlog.hex(self._node),
750 bool(self._loadfunc is _noop),
750 bool(self._loadfunc is _noop),
751 self._dirty, id(self)))
751 self._dirty, id(self)))
752
752
753 def dir(self):
753 def dir(self):
754 '''The directory that this tree manifest represents, including a
754 '''The directory that this tree manifest represents, including a
755 trailing '/'. Empty string for the repo root directory.'''
755 trailing '/'. Empty string for the repo root directory.'''
756 return self._dir
756 return self._dir
757
757
758 def node(self):
758 def node(self):
759 '''This node of this instance. nullid for unsaved instances. Should
759 '''This node of this instance. nullid for unsaved instances. Should
760 be updated when the instance is read or written from a revlog.
760 be updated when the instance is read or written from a revlog.
761 '''
761 '''
762 assert not self._dirty
762 assert not self._dirty
763 return self._node
763 return self._node
764
764
765 def setnode(self, node):
765 def setnode(self, node):
766 self._node = node
766 self._node = node
767 self._dirty = False
767 self._dirty = False
768
768
769 def iterentries(self):
769 def iterentries(self):
770 self._load()
770 self._load()
771 for p, n in sorted(self._dirs.items() + self._files.items()):
771 for p, n in sorted(self._dirs.items() + self._files.items()):
772 if p in self._files:
772 if p in self._files:
773 yield self._subpath(p), n, self._flags.get(p, '')
773 yield self._subpath(p), n, self._flags.get(p, '')
774 else:
774 else:
775 for x in n.iterentries():
775 for x in n.iterentries():
776 yield x
776 yield x
777
777
778 def iteritems(self):
778 def iteritems(self):
779 self._load()
779 self._load()
780 for p, n in sorted(self._dirs.items() + self._files.items()):
780 for p, n in sorted(self._dirs.items() + self._files.items()):
781 if p in self._files:
781 if p in self._files:
782 yield self._subpath(p), n
782 yield self._subpath(p), n
783 else:
783 else:
784 for f, sn in n.iteritems():
784 for f, sn in n.iteritems():
785 yield f, sn
785 yield f, sn
786
786
787 def iterkeys(self):
787 def iterkeys(self):
788 self._load()
788 self._load()
789 for p in sorted(self._dirs.keys() + self._files.keys()):
789 for p in sorted(self._dirs.keys() + self._files.keys()):
790 if p in self._files:
790 if p in self._files:
791 yield self._subpath(p)
791 yield self._subpath(p)
792 else:
792 else:
793 for f in self._dirs[p].iterkeys():
793 for f in self._dirs[p].iterkeys():
794 yield f
794 yield f
795
795
796 def keys(self):
796 def keys(self):
797 return list(self.iterkeys())
797 return list(self.iterkeys())
798
798
799 def __iter__(self):
799 def __iter__(self):
800 return self.iterkeys()
800 return self.iterkeys()
801
801
802 def __contains__(self, f):
802 def __contains__(self, f):
803 if f is None:
803 if f is None:
804 return False
804 return False
805 self._load()
805 self._load()
806 dir, subpath = _splittopdir(f)
806 dir, subpath = _splittopdir(f)
807 if dir:
807 if dir:
808 if dir not in self._dirs:
808 if dir not in self._dirs:
809 return False
809 return False
810 return self._dirs[dir].__contains__(subpath)
810 return self._dirs[dir].__contains__(subpath)
811 else:
811 else:
812 return f in self._files
812 return f in self._files
813
813
814 def get(self, f, default=None):
814 def get(self, f, default=None):
815 self._load()
815 self._load()
816 dir, subpath = _splittopdir(f)
816 dir, subpath = _splittopdir(f)
817 if dir:
817 if dir:
818 if dir not in self._dirs:
818 if dir not in self._dirs:
819 return default
819 return default
820 return self._dirs[dir].get(subpath, default)
820 return self._dirs[dir].get(subpath, default)
821 else:
821 else:
822 return self._files.get(f, default)
822 return self._files.get(f, default)
823
823
824 def __getitem__(self, f):
824 def __getitem__(self, f):
825 self._load()
825 self._load()
826 dir, subpath = _splittopdir(f)
826 dir, subpath = _splittopdir(f)
827 if dir:
827 if dir:
828 return self._dirs[dir].__getitem__(subpath)
828 return self._dirs[dir].__getitem__(subpath)
829 else:
829 else:
830 return self._files[f]
830 return self._files[f]
831
831
832 def flags(self, f):
832 def flags(self, f):
833 self._load()
833 self._load()
834 dir, subpath = _splittopdir(f)
834 dir, subpath = _splittopdir(f)
835 if dir:
835 if dir:
836 if dir not in self._dirs:
836 if dir not in self._dirs:
837 return ''
837 return ''
838 return self._dirs[dir].flags(subpath)
838 return self._dirs[dir].flags(subpath)
839 else:
839 else:
840 if f in self._dirs:
840 if f in self._dirs:
841 return ''
841 return ''
842 return self._flags.get(f, '')
842 return self._flags.get(f, '')
843
843
844 def find(self, f):
844 def find(self, f):
845 self._load()
845 self._load()
846 dir, subpath = _splittopdir(f)
846 dir, subpath = _splittopdir(f)
847 if dir:
847 if dir:
848 return self._dirs[dir].find(subpath)
848 return self._dirs[dir].find(subpath)
849 else:
849 else:
850 return self._files[f], self._flags.get(f, '')
850 return self._files[f], self._flags.get(f, '')
851
851
852 def __delitem__(self, f):
852 def __delitem__(self, f):
853 self._load()
853 self._load()
854 dir, subpath = _splittopdir(f)
854 dir, subpath = _splittopdir(f)
855 if dir:
855 if dir:
856 self._dirs[dir].__delitem__(subpath)
856 self._dirs[dir].__delitem__(subpath)
857 # If the directory is now empty, remove it
857 # If the directory is now empty, remove it
858 if self._dirs[dir]._isempty():
858 if self._dirs[dir]._isempty():
859 del self._dirs[dir]
859 del self._dirs[dir]
860 else:
860 else:
861 del self._files[f]
861 del self._files[f]
862 if f in self._flags:
862 if f in self._flags:
863 del self._flags[f]
863 del self._flags[f]
864 self._dirty = True
864 self._dirty = True
865
865
866 def __setitem__(self, f, n):
866 def __setitem__(self, f, n):
867 assert n is not None
867 assert n is not None
868 self._load()
868 self._load()
869 dir, subpath = _splittopdir(f)
869 dir, subpath = _splittopdir(f)
870 if dir:
870 if dir:
871 if dir not in self._dirs:
871 if dir not in self._dirs:
872 self._dirs[dir] = treemanifest(self._subpath(dir))
872 self._dirs[dir] = treemanifest(self._subpath(dir))
873 self._dirs[dir].__setitem__(subpath, n)
873 self._dirs[dir].__setitem__(subpath, n)
874 else:
874 else:
875 self._files[f] = n[:21] # to match manifestdict's behavior
875 self._files[f] = n[:21] # to match manifestdict's behavior
876 self._dirty = True
876 self._dirty = True
877
877
878 def _load(self):
878 def _load(self):
879 if self._loadfunc is not _noop:
879 if self._loadfunc is not _noop:
880 lf, self._loadfunc = self._loadfunc, _noop
880 lf, self._loadfunc = self._loadfunc, _noop
881 lf(self)
881 lf(self)
882 elif self._copyfunc is not _noop:
882 elif self._copyfunc is not _noop:
883 cf, self._copyfunc = self._copyfunc, _noop
883 cf, self._copyfunc = self._copyfunc, _noop
884 cf(self)
884 cf(self)
885
885
886 def setflag(self, f, flags):
886 def setflag(self, f, flags):
887 """Set the flags (symlink, executable) for path f."""
887 """Set the flags (symlink, executable) for path f."""
888 self._load()
888 self._load()
889 dir, subpath = _splittopdir(f)
889 dir, subpath = _splittopdir(f)
890 if dir:
890 if dir:
891 if dir not in self._dirs:
891 if dir not in self._dirs:
892 self._dirs[dir] = treemanifest(self._subpath(dir))
892 self._dirs[dir] = treemanifest(self._subpath(dir))
893 self._dirs[dir].setflag(subpath, flags)
893 self._dirs[dir].setflag(subpath, flags)
894 else:
894 else:
895 self._flags[f] = flags
895 self._flags[f] = flags
896 self._dirty = True
896 self._dirty = True
897
897
898 def copy(self):
898 def copy(self):
899 copy = treemanifest(self._dir)
899 copy = treemanifest(self._dir)
900 copy._node = self._node
900 copy._node = self._node
901 copy._dirty = self._dirty
901 copy._dirty = self._dirty
902 if self._copyfunc is _noop:
902 if self._copyfunc is _noop:
903 def _copyfunc(s):
903 def _copyfunc(s):
904 self._load()
904 self._load()
905 for d in self._dirs:
905 for d in self._dirs:
906 s._dirs[d] = self._dirs[d].copy()
906 s._dirs[d] = self._dirs[d].copy()
907 s._files = dict.copy(self._files)
907 s._files = dict.copy(self._files)
908 s._flags = dict.copy(self._flags)
908 s._flags = dict.copy(self._flags)
909 if self._loadfunc is _noop:
909 if self._loadfunc is _noop:
910 _copyfunc(copy)
910 _copyfunc(copy)
911 else:
911 else:
912 copy._copyfunc = _copyfunc
912 copy._copyfunc = _copyfunc
913 else:
913 else:
914 copy._copyfunc = self._copyfunc
914 copy._copyfunc = self._copyfunc
915 return copy
915 return copy
916
916
917 def filesnotin(self, m2, match=None):
917 def filesnotin(self, m2, match=None):
918 '''Set of files in this manifest that are not in the other'''
918 '''Set of files in this manifest that are not in the other'''
919 if match:
919 if match:
920 m1 = self.matches(match)
920 m1 = self.matches(match)
921 m2 = m2.matches(match)
921 m2 = m2.matches(match)
922 return m1.filesnotin(m2)
922 return m1.filesnotin(m2)
923
923
924 files = set()
924 files = set()
925 def _filesnotin(t1, t2):
925 def _filesnotin(t1, t2):
926 if t1._node == t2._node and not t1._dirty and not t2._dirty:
926 if t1._node == t2._node and not t1._dirty and not t2._dirty:
927 return
927 return
928 t1._load()
928 t1._load()
929 t2._load()
929 t2._load()
930 for d, m1 in t1._dirs.iteritems():
930 for d, m1 in t1._dirs.iteritems():
931 if d in t2._dirs:
931 if d in t2._dirs:
932 m2 = t2._dirs[d]
932 m2 = t2._dirs[d]
933 _filesnotin(m1, m2)
933 _filesnotin(m1, m2)
934 else:
934 else:
935 files.update(m1.iterkeys())
935 files.update(m1.iterkeys())
936
936
937 for fn in t1._files.iterkeys():
937 for fn in t1._files.iterkeys():
938 if fn not in t2._files:
938 if fn not in t2._files:
939 files.add(t1._subpath(fn))
939 files.add(t1._subpath(fn))
940
940
941 _filesnotin(self, m2)
941 _filesnotin(self, m2)
942 return files
942 return files
943
943
944 @propertycache
944 @propertycache
945 def _alldirs(self):
945 def _alldirs(self):
946 return util.dirs(self)
946 return util.dirs(self)
947
947
948 def dirs(self):
948 def dirs(self):
949 return self._alldirs
949 return self._alldirs
950
950
951 def hasdir(self, dir):
951 def hasdir(self, dir):
952 self._load()
952 self._load()
953 topdir, subdir = _splittopdir(dir)
953 topdir, subdir = _splittopdir(dir)
954 if topdir:
954 if topdir:
955 if topdir in self._dirs:
955 if topdir in self._dirs:
956 return self._dirs[topdir].hasdir(subdir)
956 return self._dirs[topdir].hasdir(subdir)
957 return False
957 return False
958 return (dir + '/') in self._dirs
958 return (dir + '/') in self._dirs
959
959
960 def walk(self, match):
960 def walk(self, match):
961 '''Generates matching file names.
961 '''Generates matching file names.
962
962
963 Equivalent to manifest.matches(match).iterkeys(), but without creating
963 Equivalent to manifest.matches(match).iterkeys(), but without creating
964 an entirely new manifest.
964 an entirely new manifest.
965
965
966 It also reports nonexistent files by marking them bad with match.bad().
966 It also reports nonexistent files by marking them bad with match.bad().
967 '''
967 '''
968 if match.always():
968 if match.always():
969 for f in iter(self):
969 for f in iter(self):
970 yield f
970 yield f
971 return
971 return
972
972
973 fset = set(match.files())
973 fset = set(match.files())
974
974
975 for fn in self._walk(match):
975 for fn in self._walk(match):
976 if fn in fset:
976 if fn in fset:
977 # specified pattern is the exact name
977 # specified pattern is the exact name
978 fset.remove(fn)
978 fset.remove(fn)
979 yield fn
979 yield fn
980
980
981 # for dirstate.walk, files=['.'] means "walk the whole tree".
981 # for dirstate.walk, files=['.'] means "walk the whole tree".
982 # follow that here, too
982 # follow that here, too
983 fset.discard('.')
983 fset.discard('.')
984
984
985 for fn in sorted(fset):
985 for fn in sorted(fset):
986 if not self.hasdir(fn):
986 if not self.hasdir(fn):
987 match.bad(fn, None)
987 match.bad(fn, None)
988
988
989 def _walk(self, match):
989 def _walk(self, match):
990 '''Recursively generates matching file names for walk().'''
990 '''Recursively generates matching file names for walk().'''
991 if not match.visitdir(self._dir[:-1] or '.'):
991 if not match.visitdir(self._dir[:-1] or '.'):
992 return
992 return
993
993
994 # yield this dir's files and walk its submanifests
994 # yield this dir's files and walk its submanifests
995 self._load()
995 self._load()
996 for p in sorted(self._dirs.keys() + self._files.keys()):
996 for p in sorted(self._dirs.keys() + self._files.keys()):
997 if p in self._files:
997 if p in self._files:
998 fullp = self._subpath(p)
998 fullp = self._subpath(p)
999 if match(fullp):
999 if match(fullp):
1000 yield fullp
1000 yield fullp
1001 else:
1001 else:
1002 for f in self._dirs[p]._walk(match):
1002 for f in self._dirs[p]._walk(match):
1003 yield f
1003 yield f
1004
1004
1005 def matches(self, match):
1005 def matches(self, match):
1006 '''generate a new manifest filtered by the match argument'''
1006 '''generate a new manifest filtered by the match argument'''
1007 if match.always():
1007 if match.always():
1008 return self.copy()
1008 return self.copy()
1009
1009
1010 return self._matches(match)
1010 return self._matches(match)
1011
1011
1012 def _matches(self, match):
1012 def _matches(self, match):
1013 '''recursively generate a new manifest filtered by the match argument.
1013 '''recursively generate a new manifest filtered by the match argument.
1014 '''
1014 '''
1015
1015
1016 visit = match.visitdir(self._dir[:-1] or '.')
1016 visit = match.visitdir(self._dir[:-1] or '.')
1017 if visit == 'all':
1017 if visit == 'all':
1018 return self.copy()
1018 return self.copy()
1019 ret = treemanifest(self._dir)
1019 ret = treemanifest(self._dir)
1020 if not visit:
1020 if not visit:
1021 return ret
1021 return ret
1022
1022
1023 self._load()
1023 self._load()
1024 for fn in self._files:
1024 for fn in self._files:
1025 fullp = self._subpath(fn)
1025 fullp = self._subpath(fn)
1026 if not match(fullp):
1026 if not match(fullp):
1027 continue
1027 continue
1028 ret._files[fn] = self._files[fn]
1028 ret._files[fn] = self._files[fn]
1029 if fn in self._flags:
1029 if fn in self._flags:
1030 ret._flags[fn] = self._flags[fn]
1030 ret._flags[fn] = self._flags[fn]
1031
1031
1032 for dir, subm in self._dirs.iteritems():
1032 for dir, subm in self._dirs.iteritems():
1033 m = subm._matches(match)
1033 m = subm._matches(match)
1034 if not m._isempty():
1034 if not m._isempty():
1035 ret._dirs[dir] = m
1035 ret._dirs[dir] = m
1036
1036
1037 if not ret._isempty():
1037 if not ret._isempty():
1038 ret._dirty = True
1038 ret._dirty = True
1039 return ret
1039 return ret
1040
1040
1041 def diff(self, m2, match=None, clean=False):
1041 def diff(self, m2, match=None, clean=False):
1042 '''Finds changes between the current manifest and m2.
1042 '''Finds changes between the current manifest and m2.
1043
1043
1044 Args:
1044 Args:
1045 m2: the manifest to which this manifest should be compared.
1045 m2: the manifest to which this manifest should be compared.
1046 clean: if true, include files unchanged between these manifests
1046 clean: if true, include files unchanged between these manifests
1047 with a None value in the returned dictionary.
1047 with a None value in the returned dictionary.
1048
1048
1049 The result is returned as a dict with filename as key and
1049 The result is returned as a dict with filename as key and
1050 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1050 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1051 nodeid in the current/other manifest and fl1/fl2 is the flag
1051 nodeid in the current/other manifest and fl1/fl2 is the flag
1052 in the current/other manifest. Where the file does not exist,
1052 in the current/other manifest. Where the file does not exist,
1053 the nodeid will be None and the flags will be the empty
1053 the nodeid will be None and the flags will be the empty
1054 string.
1054 string.
1055 '''
1055 '''
1056 if match:
1056 if match:
1057 m1 = self.matches(match)
1057 m1 = self.matches(match)
1058 m2 = m2.matches(match)
1058 m2 = m2.matches(match)
1059 return m1.diff(m2, clean=clean)
1059 return m1.diff(m2, clean=clean)
1060 result = {}
1060 result = {}
1061 emptytree = treemanifest()
1061 emptytree = treemanifest()
1062 def _diff(t1, t2):
1062 def _diff(t1, t2):
1063 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1063 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1064 return
1064 return
1065 t1._load()
1065 t1._load()
1066 t2._load()
1066 t2._load()
1067 for d, m1 in t1._dirs.iteritems():
1067 for d, m1 in t1._dirs.iteritems():
1068 m2 = t2._dirs.get(d, emptytree)
1068 m2 = t2._dirs.get(d, emptytree)
1069 _diff(m1, m2)
1069 _diff(m1, m2)
1070
1070
1071 for d, m2 in t2._dirs.iteritems():
1071 for d, m2 in t2._dirs.iteritems():
1072 if d not in t1._dirs:
1072 if d not in t1._dirs:
1073 _diff(emptytree, m2)
1073 _diff(emptytree, m2)
1074
1074
1075 for fn, n1 in t1._files.iteritems():
1075 for fn, n1 in t1._files.iteritems():
1076 fl1 = t1._flags.get(fn, '')
1076 fl1 = t1._flags.get(fn, '')
1077 n2 = t2._files.get(fn, None)
1077 n2 = t2._files.get(fn, None)
1078 fl2 = t2._flags.get(fn, '')
1078 fl2 = t2._flags.get(fn, '')
1079 if n1 != n2 or fl1 != fl2:
1079 if n1 != n2 or fl1 != fl2:
1080 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1080 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1081 elif clean:
1081 elif clean:
1082 result[t1._subpath(fn)] = None
1082 result[t1._subpath(fn)] = None
1083
1083
1084 for fn, n2 in t2._files.iteritems():
1084 for fn, n2 in t2._files.iteritems():
1085 if fn not in t1._files:
1085 if fn not in t1._files:
1086 fl2 = t2._flags.get(fn, '')
1086 fl2 = t2._flags.get(fn, '')
1087 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
1087 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
1088
1088
1089 _diff(self, m2)
1089 _diff(self, m2)
1090 return result
1090 return result
1091
1091
1092 def unmodifiedsince(self, m2):
1092 def unmodifiedsince(self, m2):
1093 return not self._dirty and not m2._dirty and self._node == m2._node
1093 return not self._dirty and not m2._dirty and self._node == m2._node
1094
1094
1095 def parse(self, text, readsubtree):
1095 def parse(self, text, readsubtree):
1096 for f, n, fl in _parse(text):
1096 for f, n, fl in _parse(text):
1097 if fl == 't':
1097 if fl == 't':
1098 f = f + '/'
1098 f = f + '/'
1099 self._dirs[f] = readsubtree(self._subpath(f), n)
1099 self._dirs[f] = readsubtree(self._subpath(f), n)
1100 elif '/' in f:
1100 elif '/' in f:
1101 # This is a flat manifest, so use __setitem__ and setflag rather
1101 # This is a flat manifest, so use __setitem__ and setflag rather
1102 # than assigning directly to _files and _flags, so we can
1102 # than assigning directly to _files and _flags, so we can
1103 # assign a path in a subdirectory, and to mark dirty (compared
1103 # assign a path in a subdirectory, and to mark dirty (compared
1104 # to nullid).
1104 # to nullid).
1105 self[f] = n
1105 self[f] = n
1106 if fl:
1106 if fl:
1107 self.setflag(f, fl)
1107 self.setflag(f, fl)
1108 else:
1108 else:
1109 # Assigning to _files and _flags avoids marking as dirty,
1109 # Assigning to _files and _flags avoids marking as dirty,
1110 # and should be a little faster.
1110 # and should be a little faster.
1111 self._files[f] = n
1111 self._files[f] = n
1112 if fl:
1112 if fl:
1113 self._flags[f] = fl
1113 self._flags[f] = fl
1114
1114
1115 def text(self, usemanifestv2=False):
1115 def text(self, usemanifestv2=False):
1116 """Get the full data of this manifest as a bytestring."""
1116 """Get the full data of this manifest as a bytestring."""
1117 self._load()
1117 self._load()
1118 return _text(self.iterentries(), usemanifestv2)
1118 return _text(self.iterentries(), usemanifestv2)
1119
1119
1120 def dirtext(self, usemanifestv2=False):
1120 def dirtext(self, usemanifestv2=False):
1121 """Get the full data of this directory as a bytestring. Make sure that
1121 """Get the full data of this directory as a bytestring. Make sure that
1122 any submanifests have been written first, so their nodeids are correct.
1122 any submanifests have been written first, so their nodeids are correct.
1123 """
1123 """
1124 self._load()
1124 self._load()
1125 flags = self.flags
1125 flags = self.flags
1126 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
1126 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
1127 files = [(f, self._files[f], flags(f)) for f in self._files]
1127 files = [(f, self._files[f], flags(f)) for f in self._files]
1128 return _text(sorted(dirs + files), usemanifestv2)
1128 return _text(sorted(dirs + files), usemanifestv2)
1129
1129
1130 def read(self, gettext, readsubtree):
1130 def read(self, gettext, readsubtree):
1131 def _load_for_read(s):
1131 def _load_for_read(s):
1132 s.parse(gettext(), readsubtree)
1132 s.parse(gettext(), readsubtree)
1133 s._dirty = False
1133 s._dirty = False
1134 self._loadfunc = _load_for_read
1134 self._loadfunc = _load_for_read
1135
1135
1136 def writesubtrees(self, m1, m2, writesubtree):
1136 def writesubtrees(self, m1, m2, writesubtree):
1137 self._load() # for consistency; should never have any effect here
1137 self._load() # for consistency; should never have any effect here
1138 m1._load()
1138 m1._load()
1139 m2._load()
1139 m2._load()
1140 emptytree = treemanifest()
1140 emptytree = treemanifest()
1141 for d, subm in self._dirs.iteritems():
1141 for d, subm in self._dirs.iteritems():
1142 subp1 = m1._dirs.get(d, emptytree)._node
1142 subp1 = m1._dirs.get(d, emptytree)._node
1143 subp2 = m2._dirs.get(d, emptytree)._node
1143 subp2 = m2._dirs.get(d, emptytree)._node
1144 if subp1 == revlog.nullid:
1144 if subp1 == revlog.nullid:
1145 subp1, subp2 = subp2, subp1
1145 subp1, subp2 = subp2, subp1
1146 writesubtree(subm, subp1, subp2)
1146 writesubtree(subm, subp1, subp2)
1147
1147
1148 class manifestrevlog(revlog.revlog):
1148 class manifestrevlog(revlog.revlog):
1149 '''A revlog that stores manifest texts. This is responsible for caching the
1149 '''A revlog that stores manifest texts. This is responsible for caching the
1150 full-text manifest contents.
1150 full-text manifest contents.
1151 '''
1151 '''
1152 def __init__(self, opener, dir='', dirlogcache=None, indexfile=None):
1152 def __init__(self, opener, dir='', dirlogcache=None, indexfile=None):
1153 """Constructs a new manifest revlog
1153 """Constructs a new manifest revlog
1154
1154
1155 `indexfile` - used by extensions to have two manifests at once, like
1155 `indexfile` - used by extensions to have two manifests at once, like
1156 when transitioning between flatmanifeset and treemanifests.
1156 when transitioning between flatmanifeset and treemanifests.
1157 """
1157 """
1158 # During normal operations, we expect to deal with not more than four
1158 # During normal operations, we expect to deal with not more than four
1159 # revs at a time (such as during commit --amend). When rebasing large
1159 # revs at a time (such as during commit --amend). When rebasing large
1160 # stacks of commits, the number can go up, hence the config knob below.
1160 # stacks of commits, the number can go up, hence the config knob below.
1161 cachesize = 4
1161 cachesize = 4
1162 usetreemanifest = False
1162 usetreemanifest = False
1163 usemanifestv2 = False
1163 usemanifestv2 = False
1164 opts = getattr(opener, 'options', None)
1164 opts = getattr(opener, 'options', None)
1165 if opts is not None:
1165 if opts is not None:
1166 cachesize = opts.get('manifestcachesize', cachesize)
1166 cachesize = opts.get('manifestcachesize', cachesize)
1167 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1167 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1168 usemanifestv2 = opts.get('manifestv2', usemanifestv2)
1168 usemanifestv2 = opts.get('manifestv2', usemanifestv2)
1169
1169
1170 self._treeondisk = usetreemanifest
1170 self._treeondisk = usetreemanifest
1171 self._usemanifestv2 = usemanifestv2
1171 self._usemanifestv2 = usemanifestv2
1172
1172
1173 self._fulltextcache = util.lrucachedict(cachesize)
1173 self._fulltextcache = util.lrucachedict(cachesize)
1174
1174
1175 if dir:
1175 if dir:
1176 assert self._treeondisk, 'opts is %r' % opts
1176 assert self._treeondisk, 'opts is %r' % opts
1177 if not dir.endswith('/'):
1177 if not dir.endswith('/'):
1178 dir = dir + '/'
1178 dir = dir + '/'
1179
1179
1180 if indexfile is None:
1180 if indexfile is None:
1181 indexfile = '00manifest.i'
1181 indexfile = '00manifest.i'
1182 if dir:
1182 if dir:
1183 indexfile = "meta/" + dir + indexfile
1183 indexfile = "meta/" + dir + indexfile
1184
1184
1185 self._dir = dir
1185 self._dir = dir
1186 # The dirlogcache is kept on the root manifest log
1186 # The dirlogcache is kept on the root manifest log
1187 if dir:
1187 if dir:
1188 self._dirlogcache = dirlogcache
1188 self._dirlogcache = dirlogcache
1189 else:
1189 else:
1190 self._dirlogcache = {'': self}
1190 self._dirlogcache = {'': self}
1191
1191
1192 super(manifestrevlog, self).__init__(opener, indexfile,
1192 super(manifestrevlog, self).__init__(opener, indexfile,
1193 checkambig=bool(dir))
1193 checkambig=bool(dir))
1194
1194
1195 @property
1195 @property
1196 def fulltextcache(self):
1196 def fulltextcache(self):
1197 return self._fulltextcache
1197 return self._fulltextcache
1198
1198
1199 def clearcaches(self):
1199 def clearcaches(self):
1200 super(manifestrevlog, self).clearcaches()
1200 super(manifestrevlog, self).clearcaches()
1201 self._fulltextcache.clear()
1201 self._fulltextcache.clear()
1202 self._dirlogcache = {'': self}
1202 self._dirlogcache = {'': self}
1203
1203
1204 def dirlog(self, dir):
1204 def dirlog(self, dir):
1205 if dir:
1205 if dir:
1206 assert self._treeondisk
1206 assert self._treeondisk
1207 if dir not in self._dirlogcache:
1207 if dir not in self._dirlogcache:
1208 self._dirlogcache[dir] = manifestrevlog(self.opener, dir,
1208 self._dirlogcache[dir] = manifestrevlog(self.opener, dir,
1209 self._dirlogcache)
1209 self._dirlogcache)
1210 return self._dirlogcache[dir]
1210 return self._dirlogcache[dir]
1211
1211
1212 def add(self, m, transaction, link, p1, p2, added, removed, readtree=None):
1212 def add(self, m, transaction, link, p1, p2, added, removed, readtree=None):
1213 if (p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta')
1213 if (p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta')
1214 and not self._usemanifestv2):
1214 and not self._usemanifestv2):
1215 # If our first parent is in the manifest cache, we can
1215 # If our first parent is in the manifest cache, we can
1216 # compute a delta here using properties we know about the
1216 # compute a delta here using properties we know about the
1217 # manifest up-front, which may save time later for the
1217 # manifest up-front, which may save time later for the
1218 # revlog layer.
1218 # revlog layer.
1219
1219
1220 _checkforbidden(added)
1220 _checkforbidden(added)
1221 # combine the changed lists into one sorted iterator
1221 # combine the changed lists into one sorted iterator
1222 work = heapq.merge([(x, False) for x in added],
1222 work = heapq.merge([(x, False) for x in added],
1223 [(x, True) for x in removed])
1223 [(x, True) for x in removed])
1224
1224
1225 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1225 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1226 cachedelta = self.rev(p1), deltatext
1226 cachedelta = self.rev(p1), deltatext
1227 text = util.buffer(arraytext)
1227 text = util.buffer(arraytext)
1228 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
1228 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
1229 else:
1229 else:
1230 # The first parent manifest isn't already loaded, so we'll
1230 # The first parent manifest isn't already loaded, so we'll
1231 # just encode a fulltext of the manifest and pass that
1231 # just encode a fulltext of the manifest and pass that
1232 # through to the revlog layer, and let it handle the delta
1232 # through to the revlog layer, and let it handle the delta
1233 # process.
1233 # process.
1234 if self._treeondisk:
1234 if self._treeondisk:
1235 assert readtree, "readtree must be set for treemanifest writes"
1235 assert readtree, "readtree must be set for treemanifest writes"
1236 m1 = readtree(self._dir, p1)
1236 m1 = readtree(self._dir, p1)
1237 m2 = readtree(self._dir, p2)
1237 m2 = readtree(self._dir, p2)
1238 n = self._addtree(m, transaction, link, m1, m2, readtree)
1238 n = self._addtree(m, transaction, link, m1, m2, readtree)
1239 arraytext = None
1239 arraytext = None
1240 else:
1240 else:
1241 text = m.text(self._usemanifestv2)
1241 text = m.text(self._usemanifestv2)
1242 n = self.addrevision(text, transaction, link, p1, p2)
1242 n = self.addrevision(text, transaction, link, p1, p2)
1243 arraytext = array.array('c', text)
1243 arraytext = bytearray(text)
1244
1244
1245 if arraytext is not None:
1245 if arraytext is not None:
1246 self.fulltextcache[n] = arraytext
1246 self.fulltextcache[n] = arraytext
1247
1247
1248 return n
1248 return n
1249
1249
1250 def _addtree(self, m, transaction, link, m1, m2, readtree):
1250 def _addtree(self, m, transaction, link, m1, m2, readtree):
1251 # If the manifest is unchanged compared to one parent,
1251 # If the manifest is unchanged compared to one parent,
1252 # don't write a new revision
1252 # don't write a new revision
1253 if self._dir != '' and (m.unmodifiedsince(m1) or m.unmodifiedsince(m2)):
1253 if self._dir != '' and (m.unmodifiedsince(m1) or m.unmodifiedsince(m2)):
1254 return m.node()
1254 return m.node()
1255 def writesubtree(subm, subp1, subp2):
1255 def writesubtree(subm, subp1, subp2):
1256 sublog = self.dirlog(subm.dir())
1256 sublog = self.dirlog(subm.dir())
1257 sublog.add(subm, transaction, link, subp1, subp2, None, None,
1257 sublog.add(subm, transaction, link, subp1, subp2, None, None,
1258 readtree=readtree)
1258 readtree=readtree)
1259 m.writesubtrees(m1, m2, writesubtree)
1259 m.writesubtrees(m1, m2, writesubtree)
1260 text = m.dirtext(self._usemanifestv2)
1260 text = m.dirtext(self._usemanifestv2)
1261 n = None
1261 n = None
1262 if self._dir != '':
1262 if self._dir != '':
1263 # Double-check whether contents are unchanged to one parent
1263 # Double-check whether contents are unchanged to one parent
1264 if text == m1.dirtext(self._usemanifestv2):
1264 if text == m1.dirtext(self._usemanifestv2):
1265 n = m1.node()
1265 n = m1.node()
1266 elif text == m2.dirtext(self._usemanifestv2):
1266 elif text == m2.dirtext(self._usemanifestv2):
1267 n = m2.node()
1267 n = m2.node()
1268
1268
1269 if not n:
1269 if not n:
1270 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
1270 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
1271
1271
1272 # Save nodeid so parent manifest can calculate its nodeid
1272 # Save nodeid so parent manifest can calculate its nodeid
1273 m.setnode(n)
1273 m.setnode(n)
1274 return n
1274 return n
1275
1275
1276 class manifestlog(object):
1276 class manifestlog(object):
1277 """A collection class representing the collection of manifest snapshots
1277 """A collection class representing the collection of manifest snapshots
1278 referenced by commits in the repository.
1278 referenced by commits in the repository.
1279
1279
1280 In this situation, 'manifest' refers to the abstract concept of a snapshot
1280 In this situation, 'manifest' refers to the abstract concept of a snapshot
1281 of the list of files in the given commit. Consumers of the output of this
1281 of the list of files in the given commit. Consumers of the output of this
1282 class do not care about the implementation details of the actual manifests
1282 class do not care about the implementation details of the actual manifests
1283 they receive (i.e. tree or flat or lazily loaded, etc)."""
1283 they receive (i.e. tree or flat or lazily loaded, etc)."""
1284 def __init__(self, opener, repo):
1284 def __init__(self, opener, repo):
1285 usetreemanifest = False
1285 usetreemanifest = False
1286 cachesize = 4
1286 cachesize = 4
1287
1287
1288 opts = getattr(opener, 'options', None)
1288 opts = getattr(opener, 'options', None)
1289 if opts is not None:
1289 if opts is not None:
1290 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1290 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1291 cachesize = opts.get('manifestcachesize', cachesize)
1291 cachesize = opts.get('manifestcachesize', cachesize)
1292 self._treeinmem = usetreemanifest
1292 self._treeinmem = usetreemanifest
1293
1293
1294 self._oldmanifest = repo._constructmanifest()
1294 self._oldmanifest = repo._constructmanifest()
1295 self._revlog = self._oldmanifest
1295 self._revlog = self._oldmanifest
1296
1296
1297 # A cache of the manifestctx or treemanifestctx for each directory
1297 # A cache of the manifestctx or treemanifestctx for each directory
1298 self._dirmancache = {}
1298 self._dirmancache = {}
1299 self._dirmancache[''] = util.lrucachedict(cachesize)
1299 self._dirmancache[''] = util.lrucachedict(cachesize)
1300
1300
1301 self.cachesize = cachesize
1301 self.cachesize = cachesize
1302
1302
1303 def __getitem__(self, node):
1303 def __getitem__(self, node):
1304 """Retrieves the manifest instance for the given node. Throws a
1304 """Retrieves the manifest instance for the given node. Throws a
1305 LookupError if not found.
1305 LookupError if not found.
1306 """
1306 """
1307 return self.get('', node)
1307 return self.get('', node)
1308
1308
1309 def get(self, dir, node, verify=True):
1309 def get(self, dir, node, verify=True):
1310 """Retrieves the manifest instance for the given node. Throws a
1310 """Retrieves the manifest instance for the given node. Throws a
1311 LookupError if not found.
1311 LookupError if not found.
1312
1312
1313 `verify` - if True an exception will be thrown if the node is not in
1313 `verify` - if True an exception will be thrown if the node is not in
1314 the revlog
1314 the revlog
1315 """
1315 """
1316 if node in self._dirmancache.get(dir, ()):
1316 if node in self._dirmancache.get(dir, ()):
1317 cachemf = self._dirmancache[dir][node]
1317 cachemf = self._dirmancache[dir][node]
1318 # The old manifest may put non-ctx manifests in the cache, so
1318 # The old manifest may put non-ctx manifests in the cache, so
1319 # skip those since they don't implement the full api.
1319 # skip those since they don't implement the full api.
1320 if (isinstance(cachemf, manifestctx) or
1320 if (isinstance(cachemf, manifestctx) or
1321 isinstance(cachemf, treemanifestctx)):
1321 isinstance(cachemf, treemanifestctx)):
1322 return cachemf
1322 return cachemf
1323
1323
1324 if dir:
1324 if dir:
1325 if self._revlog._treeondisk:
1325 if self._revlog._treeondisk:
1326 if verify:
1326 if verify:
1327 dirlog = self._revlog.dirlog(dir)
1327 dirlog = self._revlog.dirlog(dir)
1328 if node not in dirlog.nodemap:
1328 if node not in dirlog.nodemap:
1329 raise LookupError(node, dirlog.indexfile,
1329 raise LookupError(node, dirlog.indexfile,
1330 _('no node'))
1330 _('no node'))
1331 m = treemanifestctx(self, dir, node)
1331 m = treemanifestctx(self, dir, node)
1332 else:
1332 else:
1333 raise error.Abort(
1333 raise error.Abort(
1334 _("cannot ask for manifest directory '%s' in a flat "
1334 _("cannot ask for manifest directory '%s' in a flat "
1335 "manifest") % dir)
1335 "manifest") % dir)
1336 else:
1336 else:
1337 if verify:
1337 if verify:
1338 if node not in self._revlog.nodemap:
1338 if node not in self._revlog.nodemap:
1339 raise LookupError(node, self._revlog.indexfile,
1339 raise LookupError(node, self._revlog.indexfile,
1340 _('no node'))
1340 _('no node'))
1341 if self._treeinmem:
1341 if self._treeinmem:
1342 m = treemanifestctx(self, '', node)
1342 m = treemanifestctx(self, '', node)
1343 else:
1343 else:
1344 m = manifestctx(self, node)
1344 m = manifestctx(self, node)
1345
1345
1346 if node != revlog.nullid:
1346 if node != revlog.nullid:
1347 mancache = self._dirmancache.get(dir)
1347 mancache = self._dirmancache.get(dir)
1348 if not mancache:
1348 if not mancache:
1349 mancache = util.lrucachedict(self.cachesize)
1349 mancache = util.lrucachedict(self.cachesize)
1350 self._dirmancache[dir] = mancache
1350 self._dirmancache[dir] = mancache
1351 mancache[node] = m
1351 mancache[node] = m
1352 return m
1352 return m
1353
1353
1354 def clearcaches(self):
1354 def clearcaches(self):
1355 self._dirmancache.clear()
1355 self._dirmancache.clear()
1356 self._revlog.clearcaches()
1356 self._revlog.clearcaches()
1357
1357
1358 class memmanifestctx(object):
1358 class memmanifestctx(object):
1359 def __init__(self, manifestlog):
1359 def __init__(self, manifestlog):
1360 self._manifestlog = manifestlog
1360 self._manifestlog = manifestlog
1361 self._manifestdict = manifestdict()
1361 self._manifestdict = manifestdict()
1362
1362
1363 def _revlog(self):
1363 def _revlog(self):
1364 return self._manifestlog._revlog
1364 return self._manifestlog._revlog
1365
1365
1366 def new(self):
1366 def new(self):
1367 return memmanifestctx(self._manifestlog)
1367 return memmanifestctx(self._manifestlog)
1368
1368
1369 def copy(self):
1369 def copy(self):
1370 memmf = memmanifestctx(self._manifestlog)
1370 memmf = memmanifestctx(self._manifestlog)
1371 memmf._manifestdict = self.read().copy()
1371 memmf._manifestdict = self.read().copy()
1372 return memmf
1372 return memmf
1373
1373
1374 def read(self):
1374 def read(self):
1375 return self._manifestdict
1375 return self._manifestdict
1376
1376
1377 def write(self, transaction, link, p1, p2, added, removed):
1377 def write(self, transaction, link, p1, p2, added, removed):
1378 return self._revlog().add(self._manifestdict, transaction, link, p1, p2,
1378 return self._revlog().add(self._manifestdict, transaction, link, p1, p2,
1379 added, removed)
1379 added, removed)
1380
1380
1381 class manifestctx(object):
1381 class manifestctx(object):
1382 """A class representing a single revision of a manifest, including its
1382 """A class representing a single revision of a manifest, including its
1383 contents, its parent revs, and its linkrev.
1383 contents, its parent revs, and its linkrev.
1384 """
1384 """
1385 def __init__(self, manifestlog, node):
1385 def __init__(self, manifestlog, node):
1386 self._manifestlog = manifestlog
1386 self._manifestlog = manifestlog
1387 self._data = None
1387 self._data = None
1388
1388
1389 self._node = node
1389 self._node = node
1390
1390
1391 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1391 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1392 # but let's add it later when something needs it and we can load it
1392 # but let's add it later when something needs it and we can load it
1393 # lazily.
1393 # lazily.
1394 #self.p1, self.p2 = revlog.parents(node)
1394 #self.p1, self.p2 = revlog.parents(node)
1395 #rev = revlog.rev(node)
1395 #rev = revlog.rev(node)
1396 #self.linkrev = revlog.linkrev(rev)
1396 #self.linkrev = revlog.linkrev(rev)
1397
1397
1398 def _revlog(self):
1398 def _revlog(self):
1399 return self._manifestlog._revlog
1399 return self._manifestlog._revlog
1400
1400
1401 def node(self):
1401 def node(self):
1402 return self._node
1402 return self._node
1403
1403
1404 def new(self):
1404 def new(self):
1405 return memmanifestctx(self._manifestlog)
1405 return memmanifestctx(self._manifestlog)
1406
1406
1407 def copy(self):
1407 def copy(self):
1408 memmf = memmanifestctx(self._manifestlog)
1408 memmf = memmanifestctx(self._manifestlog)
1409 memmf._manifestdict = self.read().copy()
1409 memmf._manifestdict = self.read().copy()
1410 return memmf
1410 return memmf
1411
1411
1412 @propertycache
1412 @propertycache
1413 def parents(self):
1413 def parents(self):
1414 return self._revlog().parents(self._node)
1414 return self._revlog().parents(self._node)
1415
1415
1416 def read(self):
1416 def read(self):
1417 if self._data is None:
1417 if self._data is None:
1418 if self._node == revlog.nullid:
1418 if self._node == revlog.nullid:
1419 self._data = manifestdict()
1419 self._data = manifestdict()
1420 else:
1420 else:
1421 rl = self._revlog()
1421 rl = self._revlog()
1422 text = rl.revision(self._node)
1422 text = rl.revision(self._node)
1423 arraytext = array.array('c', text)
1423 arraytext = bytearray(text)
1424 rl._fulltextcache[self._node] = arraytext
1424 rl._fulltextcache[self._node] = arraytext
1425 self._data = manifestdict(text)
1425 self._data = manifestdict(text)
1426 return self._data
1426 return self._data
1427
1427
1428 def readfast(self, shallow=False):
1428 def readfast(self, shallow=False):
1429 '''Calls either readdelta or read, based on which would be less work.
1429 '''Calls either readdelta or read, based on which would be less work.
1430 readdelta is called if the delta is against the p1, and therefore can be
1430 readdelta is called if the delta is against the p1, and therefore can be
1431 read quickly.
1431 read quickly.
1432
1432
1433 If `shallow` is True, nothing changes since this is a flat manifest.
1433 If `shallow` is True, nothing changes since this is a flat manifest.
1434 '''
1434 '''
1435 rl = self._revlog()
1435 rl = self._revlog()
1436 r = rl.rev(self._node)
1436 r = rl.rev(self._node)
1437 deltaparent = rl.deltaparent(r)
1437 deltaparent = rl.deltaparent(r)
1438 if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
1438 if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
1439 return self.readdelta()
1439 return self.readdelta()
1440 return self.read()
1440 return self.read()
1441
1441
1442 def readdelta(self, shallow=False):
1442 def readdelta(self, shallow=False):
1443 '''Returns a manifest containing just the entries that are present
1443 '''Returns a manifest containing just the entries that are present
1444 in this manifest, but not in its p1 manifest. This is efficient to read
1444 in this manifest, but not in its p1 manifest. This is efficient to read
1445 if the revlog delta is already p1.
1445 if the revlog delta is already p1.
1446
1446
1447 Changing the value of `shallow` has no effect on flat manifests.
1447 Changing the value of `shallow` has no effect on flat manifests.
1448 '''
1448 '''
1449 revlog = self._revlog()
1449 revlog = self._revlog()
1450 if revlog._usemanifestv2:
1450 if revlog._usemanifestv2:
1451 # Need to perform a slow delta
1451 # Need to perform a slow delta
1452 r0 = revlog.deltaparent(revlog.rev(self._node))
1452 r0 = revlog.deltaparent(revlog.rev(self._node))
1453 m0 = self._manifestlog[revlog.node(r0)].read()
1453 m0 = self._manifestlog[revlog.node(r0)].read()
1454 m1 = self.read()
1454 m1 = self.read()
1455 md = manifestdict()
1455 md = manifestdict()
1456 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1456 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1457 if n1:
1457 if n1:
1458 md[f] = n1
1458 md[f] = n1
1459 if fl1:
1459 if fl1:
1460 md.setflag(f, fl1)
1460 md.setflag(f, fl1)
1461 return md
1461 return md
1462
1462
1463 r = revlog.rev(self._node)
1463 r = revlog.rev(self._node)
1464 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1464 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1465 return manifestdict(d)
1465 return manifestdict(d)
1466
1466
1467 def find(self, key):
1467 def find(self, key):
1468 return self.read().find(key)
1468 return self.read().find(key)
1469
1469
1470 class memtreemanifestctx(object):
1470 class memtreemanifestctx(object):
1471 def __init__(self, manifestlog, dir=''):
1471 def __init__(self, manifestlog, dir=''):
1472 self._manifestlog = manifestlog
1472 self._manifestlog = manifestlog
1473 self._dir = dir
1473 self._dir = dir
1474 self._treemanifest = treemanifest()
1474 self._treemanifest = treemanifest()
1475
1475
1476 def _revlog(self):
1476 def _revlog(self):
1477 return self._manifestlog._revlog
1477 return self._manifestlog._revlog
1478
1478
1479 def new(self, dir=''):
1479 def new(self, dir=''):
1480 return memtreemanifestctx(self._manifestlog, dir=dir)
1480 return memtreemanifestctx(self._manifestlog, dir=dir)
1481
1481
1482 def copy(self):
1482 def copy(self):
1483 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1483 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1484 memmf._treemanifest = self._treemanifest.copy()
1484 memmf._treemanifest = self._treemanifest.copy()
1485 return memmf
1485 return memmf
1486
1486
1487 def read(self):
1487 def read(self):
1488 return self._treemanifest
1488 return self._treemanifest
1489
1489
1490 def write(self, transaction, link, p1, p2, added, removed):
1490 def write(self, transaction, link, p1, p2, added, removed):
1491 def readtree(dir, node):
1491 def readtree(dir, node):
1492 return self._manifestlog.get(dir, node).read()
1492 return self._manifestlog.get(dir, node).read()
1493 return self._revlog().add(self._treemanifest, transaction, link, p1, p2,
1493 return self._revlog().add(self._treemanifest, transaction, link, p1, p2,
1494 added, removed, readtree=readtree)
1494 added, removed, readtree=readtree)
1495
1495
1496 class treemanifestctx(object):
1496 class treemanifestctx(object):
1497 def __init__(self, manifestlog, dir, node):
1497 def __init__(self, manifestlog, dir, node):
1498 self._manifestlog = manifestlog
1498 self._manifestlog = manifestlog
1499 self._dir = dir
1499 self._dir = dir
1500 self._data = None
1500 self._data = None
1501
1501
1502 self._node = node
1502 self._node = node
1503
1503
1504 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
1504 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
1505 # we can instantiate treemanifestctx objects for directories we don't
1505 # we can instantiate treemanifestctx objects for directories we don't
1506 # have on disk.
1506 # have on disk.
1507 #self.p1, self.p2 = revlog.parents(node)
1507 #self.p1, self.p2 = revlog.parents(node)
1508 #rev = revlog.rev(node)
1508 #rev = revlog.rev(node)
1509 #self.linkrev = revlog.linkrev(rev)
1509 #self.linkrev = revlog.linkrev(rev)
1510
1510
1511 def _revlog(self):
1511 def _revlog(self):
1512 return self._manifestlog._revlog.dirlog(self._dir)
1512 return self._manifestlog._revlog.dirlog(self._dir)
1513
1513
1514 def read(self):
1514 def read(self):
1515 if self._data is None:
1515 if self._data is None:
1516 rl = self._revlog()
1516 rl = self._revlog()
1517 if self._node == revlog.nullid:
1517 if self._node == revlog.nullid:
1518 self._data = treemanifest()
1518 self._data = treemanifest()
1519 elif rl._treeondisk:
1519 elif rl._treeondisk:
1520 m = treemanifest(dir=self._dir)
1520 m = treemanifest(dir=self._dir)
1521 def gettext():
1521 def gettext():
1522 return rl.revision(self._node)
1522 return rl.revision(self._node)
1523 def readsubtree(dir, subm):
1523 def readsubtree(dir, subm):
1524 # Set verify to False since we need to be able to create
1524 # Set verify to False since we need to be able to create
1525 # subtrees for trees that don't exist on disk.
1525 # subtrees for trees that don't exist on disk.
1526 return self._manifestlog.get(dir, subm, verify=False).read()
1526 return self._manifestlog.get(dir, subm, verify=False).read()
1527 m.read(gettext, readsubtree)
1527 m.read(gettext, readsubtree)
1528 m.setnode(self._node)
1528 m.setnode(self._node)
1529 self._data = m
1529 self._data = m
1530 else:
1530 else:
1531 text = rl.revision(self._node)
1531 text = rl.revision(self._node)
1532 arraytext = array.array('c', text)
1532 arraytext = bytearray(text)
1533 rl.fulltextcache[self._node] = arraytext
1533 rl.fulltextcache[self._node] = arraytext
1534 self._data = treemanifest(dir=self._dir, text=text)
1534 self._data = treemanifest(dir=self._dir, text=text)
1535
1535
1536 return self._data
1536 return self._data
1537
1537
1538 def node(self):
1538 def node(self):
1539 return self._node
1539 return self._node
1540
1540
1541 def new(self, dir=''):
1541 def new(self, dir=''):
1542 return memtreemanifestctx(self._manifestlog, dir=dir)
1542 return memtreemanifestctx(self._manifestlog, dir=dir)
1543
1543
1544 def copy(self):
1544 def copy(self):
1545 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1545 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1546 memmf._treemanifest = self.read().copy()
1546 memmf._treemanifest = self.read().copy()
1547 return memmf
1547 return memmf
1548
1548
1549 @propertycache
1549 @propertycache
1550 def parents(self):
1550 def parents(self):
1551 return self._revlog().parents(self._node)
1551 return self._revlog().parents(self._node)
1552
1552
1553 def readdelta(self, shallow=False):
1553 def readdelta(self, shallow=False):
1554 '''Returns a manifest containing just the entries that are present
1554 '''Returns a manifest containing just the entries that are present
1555 in this manifest, but not in its p1 manifest. This is efficient to read
1555 in this manifest, but not in its p1 manifest. This is efficient to read
1556 if the revlog delta is already p1.
1556 if the revlog delta is already p1.
1557
1557
1558 If `shallow` is True, this will read the delta for this directory,
1558 If `shallow` is True, this will read the delta for this directory,
1559 without recursively reading subdirectory manifests. Instead, any
1559 without recursively reading subdirectory manifests. Instead, any
1560 subdirectory entry will be reported as it appears in the manifest, i.e.
1560 subdirectory entry will be reported as it appears in the manifest, i.e.
1561 the subdirectory will be reported among files and distinguished only by
1561 the subdirectory will be reported among files and distinguished only by
1562 its 't' flag.
1562 its 't' flag.
1563 '''
1563 '''
1564 revlog = self._revlog()
1564 revlog = self._revlog()
1565 if shallow and not revlog._usemanifestv2:
1565 if shallow and not revlog._usemanifestv2:
1566 r = revlog.rev(self._node)
1566 r = revlog.rev(self._node)
1567 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1567 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1568 return manifestdict(d)
1568 return manifestdict(d)
1569 else:
1569 else:
1570 # Need to perform a slow delta
1570 # Need to perform a slow delta
1571 r0 = revlog.deltaparent(revlog.rev(self._node))
1571 r0 = revlog.deltaparent(revlog.rev(self._node))
1572 m0 = self._manifestlog.get(self._dir, revlog.node(r0)).read()
1572 m0 = self._manifestlog.get(self._dir, revlog.node(r0)).read()
1573 m1 = self.read()
1573 m1 = self.read()
1574 md = treemanifest(dir=self._dir)
1574 md = treemanifest(dir=self._dir)
1575 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1575 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1576 if n1:
1576 if n1:
1577 md[f] = n1
1577 md[f] = n1
1578 if fl1:
1578 if fl1:
1579 md.setflag(f, fl1)
1579 md.setflag(f, fl1)
1580 return md
1580 return md
1581
1581
1582 def readfast(self, shallow=False):
1582 def readfast(self, shallow=False):
1583 '''Calls either readdelta or read, based on which would be less work.
1583 '''Calls either readdelta or read, based on which would be less work.
1584 readdelta is called if the delta is against the p1, and therefore can be
1584 readdelta is called if the delta is against the p1, and therefore can be
1585 read quickly.
1585 read quickly.
1586
1586
1587 If `shallow` is True, it only returns the entries from this manifest,
1587 If `shallow` is True, it only returns the entries from this manifest,
1588 and not any submanifests.
1588 and not any submanifests.
1589 '''
1589 '''
1590 rl = self._revlog()
1590 rl = self._revlog()
1591 r = rl.rev(self._node)
1591 r = rl.rev(self._node)
1592 deltaparent = rl.deltaparent(r)
1592 deltaparent = rl.deltaparent(r)
1593 if (deltaparent != revlog.nullrev and
1593 if (deltaparent != revlog.nullrev and
1594 deltaparent in rl.parentrevs(r)):
1594 deltaparent in rl.parentrevs(r)):
1595 return self.readdelta(shallow=shallow)
1595 return self.readdelta(shallow=shallow)
1596
1596
1597 if shallow:
1597 if shallow:
1598 return manifestdict(rl.revision(self._node))
1598 return manifestdict(rl.revision(self._node))
1599 else:
1599 else:
1600 return self.read()
1600 return self.read()
1601
1601
1602 def find(self, key):
1602 def find(self, key):
1603 return self.read().find(key)
1603 return self.read().find(key)
@@ -1,569 +1,568 b''
1 # tags.py - read tag info from local repository
1 # tags.py - read tag info from local repository
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com>
3 # Copyright 2009 Matt Mackall <mpm@selenic.com>
4 # Copyright 2009 Greg Ward <greg@gerg.ca>
4 # Copyright 2009 Greg Ward <greg@gerg.ca>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 # Currently this module only deals with reading and caching tags.
9 # Currently this module only deals with reading and caching tags.
10 # Eventually, it could take care of updating (adding/removing/moving)
10 # Eventually, it could take care of updating (adding/removing/moving)
11 # tags too.
11 # tags too.
12
12
13 from __future__ import absolute_import
13 from __future__ import absolute_import
14
14
15 import array
15 import array
16 import errno
16 import errno
17
17
18 from .node import (
18 from .node import (
19 bin,
19 bin,
20 hex,
20 hex,
21 nullid,
21 nullid,
22 short,
22 short,
23 )
23 )
24 from . import (
24 from . import (
25 encoding,
25 encoding,
26 error,
26 error,
27 scmutil,
27 scmutil,
28 util,
28 util,
29 )
29 )
30
30
31 array = array.array
31 array = array.array
32
32
33 # Tags computation can be expensive and caches exist to make it fast in
33 # Tags computation can be expensive and caches exist to make it fast in
34 # the common case.
34 # the common case.
35 #
35 #
36 # The "hgtagsfnodes1" cache file caches the .hgtags filenode values for
36 # The "hgtagsfnodes1" cache file caches the .hgtags filenode values for
37 # each revision in the repository. The file is effectively an array of
37 # each revision in the repository. The file is effectively an array of
38 # fixed length records. Read the docs for "hgtagsfnodescache" for technical
38 # fixed length records. Read the docs for "hgtagsfnodescache" for technical
39 # details.
39 # details.
40 #
40 #
41 # The .hgtags filenode cache grows in proportion to the length of the
41 # The .hgtags filenode cache grows in proportion to the length of the
42 # changelog. The file is truncated when the # changelog is stripped.
42 # changelog. The file is truncated when the # changelog is stripped.
43 #
43 #
44 # The purpose of the filenode cache is to avoid the most expensive part
44 # The purpose of the filenode cache is to avoid the most expensive part
45 # of finding global tags, which is looking up the .hgtags filenode in the
45 # of finding global tags, which is looking up the .hgtags filenode in the
46 # manifest for each head. This can take dozens or over 100ms for
46 # manifest for each head. This can take dozens or over 100ms for
47 # repositories with very large manifests. Multiplied by dozens or even
47 # repositories with very large manifests. Multiplied by dozens or even
48 # hundreds of heads and there is a significant performance concern.
48 # hundreds of heads and there is a significant performance concern.
49 #
49 #
50 # There also exist a separate cache file for each repository filter.
50 # There also exist a separate cache file for each repository filter.
51 # These "tags-*" files store information about the history of tags.
51 # These "tags-*" files store information about the history of tags.
52 #
52 #
53 # The tags cache files consists of a cache validation line followed by
53 # The tags cache files consists of a cache validation line followed by
54 # a history of tags.
54 # a history of tags.
55 #
55 #
56 # The cache validation line has the format:
56 # The cache validation line has the format:
57 #
57 #
58 # <tiprev> <tipnode> [<filteredhash>]
58 # <tiprev> <tipnode> [<filteredhash>]
59 #
59 #
60 # <tiprev> is an integer revision and <tipnode> is a 40 character hex
60 # <tiprev> is an integer revision and <tipnode> is a 40 character hex
61 # node for that changeset. These redundantly identify the repository
61 # node for that changeset. These redundantly identify the repository
62 # tip from the time the cache was written. In addition, <filteredhash>,
62 # tip from the time the cache was written. In addition, <filteredhash>,
63 # if present, is a 40 character hex hash of the contents of the filtered
63 # if present, is a 40 character hex hash of the contents of the filtered
64 # revisions for this filter. If the set of filtered revs changes, the
64 # revisions for this filter. If the set of filtered revs changes, the
65 # hash will change and invalidate the cache.
65 # hash will change and invalidate the cache.
66 #
66 #
67 # The history part of the tags cache consists of lines of the form:
67 # The history part of the tags cache consists of lines of the form:
68 #
68 #
69 # <node> <tag>
69 # <node> <tag>
70 #
70 #
71 # (This format is identical to that of .hgtags files.)
71 # (This format is identical to that of .hgtags files.)
72 #
72 #
73 # <tag> is the tag name and <node> is the 40 character hex changeset
73 # <tag> is the tag name and <node> is the 40 character hex changeset
74 # the tag is associated with.
74 # the tag is associated with.
75 #
75 #
76 # Tags are written sorted by tag name.
76 # Tags are written sorted by tag name.
77 #
77 #
78 # Tags associated with multiple changesets have an entry for each changeset.
78 # Tags associated with multiple changesets have an entry for each changeset.
79 # The most recent changeset (in terms of revlog ordering for the head
79 # The most recent changeset (in terms of revlog ordering for the head
80 # setting it) for each tag is last.
80 # setting it) for each tag is last.
81
81
82 def findglobaltags(ui, repo, alltags, tagtypes):
82 def findglobaltags(ui, repo, alltags, tagtypes):
83 '''Find global tags in a repo.
83 '''Find global tags in a repo.
84
84
85 "alltags" maps tag name to (node, hist) 2-tuples.
85 "alltags" maps tag name to (node, hist) 2-tuples.
86
86
87 "tagtypes" maps tag name to tag type. Global tags always have the
87 "tagtypes" maps tag name to tag type. Global tags always have the
88 "global" tag type.
88 "global" tag type.
89
89
90 The "alltags" and "tagtypes" dicts are updated in place. Empty dicts
90 The "alltags" and "tagtypes" dicts are updated in place. Empty dicts
91 should be passed in.
91 should be passed in.
92
92
93 The tags cache is read and updated as a side-effect of calling.
93 The tags cache is read and updated as a side-effect of calling.
94 '''
94 '''
95 # This is so we can be lazy and assume alltags contains only global
95 # This is so we can be lazy and assume alltags contains only global
96 # tags when we pass it to _writetagcache().
96 # tags when we pass it to _writetagcache().
97 assert len(alltags) == len(tagtypes) == 0, \
97 assert len(alltags) == len(tagtypes) == 0, \
98 "findglobaltags() should be called first"
98 "findglobaltags() should be called first"
99
99
100 (heads, tagfnode, valid, cachetags, shouldwrite) = _readtagcache(ui, repo)
100 (heads, tagfnode, valid, cachetags, shouldwrite) = _readtagcache(ui, repo)
101 if cachetags is not None:
101 if cachetags is not None:
102 assert not shouldwrite
102 assert not shouldwrite
103 # XXX is this really 100% correct? are there oddball special
103 # XXX is this really 100% correct? are there oddball special
104 # cases where a global tag should outrank a local tag but won't,
104 # cases where a global tag should outrank a local tag but won't,
105 # because cachetags does not contain rank info?
105 # because cachetags does not contain rank info?
106 _updatetags(cachetags, 'global', alltags, tagtypes)
106 _updatetags(cachetags, 'global', alltags, tagtypes)
107 return
107 return
108
108
109 seen = set() # set of fnode
109 seen = set() # set of fnode
110 fctx = None
110 fctx = None
111 for head in reversed(heads): # oldest to newest
111 for head in reversed(heads): # oldest to newest
112 assert head in repo.changelog.nodemap, \
112 assert head in repo.changelog.nodemap, \
113 "tag cache returned bogus head %s" % short(head)
113 "tag cache returned bogus head %s" % short(head)
114
114
115 fnode = tagfnode.get(head)
115 fnode = tagfnode.get(head)
116 if fnode and fnode not in seen:
116 if fnode and fnode not in seen:
117 seen.add(fnode)
117 seen.add(fnode)
118 if not fctx:
118 if not fctx:
119 fctx = repo.filectx('.hgtags', fileid=fnode)
119 fctx = repo.filectx('.hgtags', fileid=fnode)
120 else:
120 else:
121 fctx = fctx.filectx(fnode)
121 fctx = fctx.filectx(fnode)
122
122
123 filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
123 filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
124 _updatetags(filetags, 'global', alltags, tagtypes)
124 _updatetags(filetags, 'global', alltags, tagtypes)
125
125
126 # and update the cache (if necessary)
126 # and update the cache (if necessary)
127 if shouldwrite:
127 if shouldwrite:
128 _writetagcache(ui, repo, valid, alltags)
128 _writetagcache(ui, repo, valid, alltags)
129
129
130 def readlocaltags(ui, repo, alltags, tagtypes):
130 def readlocaltags(ui, repo, alltags, tagtypes):
131 '''Read local tags in repo. Update alltags and tagtypes.'''
131 '''Read local tags in repo. Update alltags and tagtypes.'''
132 try:
132 try:
133 data = repo.vfs.read("localtags")
133 data = repo.vfs.read("localtags")
134 except IOError as inst:
134 except IOError as inst:
135 if inst.errno != errno.ENOENT:
135 if inst.errno != errno.ENOENT:
136 raise
136 raise
137 return
137 return
138
138
139 # localtags is in the local encoding; re-encode to UTF-8 on
139 # localtags is in the local encoding; re-encode to UTF-8 on
140 # input for consistency with the rest of this module.
140 # input for consistency with the rest of this module.
141 filetags = _readtags(
141 filetags = _readtags(
142 ui, repo, data.splitlines(), "localtags",
142 ui, repo, data.splitlines(), "localtags",
143 recode=encoding.fromlocal)
143 recode=encoding.fromlocal)
144
144
145 # remove tags pointing to invalid nodes
145 # remove tags pointing to invalid nodes
146 cl = repo.changelog
146 cl = repo.changelog
147 for t in filetags.keys():
147 for t in filetags.keys():
148 try:
148 try:
149 cl.rev(filetags[t][0])
149 cl.rev(filetags[t][0])
150 except (LookupError, ValueError):
150 except (LookupError, ValueError):
151 del filetags[t]
151 del filetags[t]
152
152
153 _updatetags(filetags, "local", alltags, tagtypes)
153 _updatetags(filetags, "local", alltags, tagtypes)
154
154
155 def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False):
155 def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False):
156 '''Read tag definitions from a file (or any source of lines).
156 '''Read tag definitions from a file (or any source of lines).
157
157
158 This function returns two sortdicts with similar information:
158 This function returns two sortdicts with similar information:
159
159
160 - the first dict, bintaghist, contains the tag information as expected by
160 - the first dict, bintaghist, contains the tag information as expected by
161 the _readtags function, i.e. a mapping from tag name to (node, hist):
161 the _readtags function, i.e. a mapping from tag name to (node, hist):
162 - node is the node id from the last line read for that name,
162 - node is the node id from the last line read for that name,
163 - hist is the list of node ids previously associated with it (in file
163 - hist is the list of node ids previously associated with it (in file
164 order). All node ids are binary, not hex.
164 order). All node ids are binary, not hex.
165
165
166 - the second dict, hextaglines, is a mapping from tag name to a list of
166 - the second dict, hextaglines, is a mapping from tag name to a list of
167 [hexnode, line number] pairs, ordered from the oldest to the newest node.
167 [hexnode, line number] pairs, ordered from the oldest to the newest node.
168
168
169 When calcnodelines is False the hextaglines dict is not calculated (an
169 When calcnodelines is False the hextaglines dict is not calculated (an
170 empty dict is returned). This is done to improve this function's
170 empty dict is returned). This is done to improve this function's
171 performance in cases where the line numbers are not needed.
171 performance in cases where the line numbers are not needed.
172 '''
172 '''
173
173
174 bintaghist = util.sortdict()
174 bintaghist = util.sortdict()
175 hextaglines = util.sortdict()
175 hextaglines = util.sortdict()
176 count = 0
176 count = 0
177
177
178 def dbg(msg):
178 def dbg(msg):
179 ui.debug("%s, line %s: %s\n" % (fn, count, msg))
179 ui.debug("%s, line %s: %s\n" % (fn, count, msg))
180
180
181 for nline, line in enumerate(lines):
181 for nline, line in enumerate(lines):
182 count += 1
182 count += 1
183 if not line:
183 if not line:
184 continue
184 continue
185 try:
185 try:
186 (nodehex, name) = line.split(" ", 1)
186 (nodehex, name) = line.split(" ", 1)
187 except ValueError:
187 except ValueError:
188 dbg("cannot parse entry")
188 dbg("cannot parse entry")
189 continue
189 continue
190 name = name.strip()
190 name = name.strip()
191 if recode:
191 if recode:
192 name = recode(name)
192 name = recode(name)
193 try:
193 try:
194 nodebin = bin(nodehex)
194 nodebin = bin(nodehex)
195 except TypeError:
195 except TypeError:
196 dbg("node '%s' is not well formed" % nodehex)
196 dbg("node '%s' is not well formed" % nodehex)
197 continue
197 continue
198
198
199 # update filetags
199 # update filetags
200 if calcnodelines:
200 if calcnodelines:
201 # map tag name to a list of line numbers
201 # map tag name to a list of line numbers
202 if name not in hextaglines:
202 if name not in hextaglines:
203 hextaglines[name] = []
203 hextaglines[name] = []
204 hextaglines[name].append([nodehex, nline])
204 hextaglines[name].append([nodehex, nline])
205 continue
205 continue
206 # map tag name to (node, hist)
206 # map tag name to (node, hist)
207 if name not in bintaghist:
207 if name not in bintaghist:
208 bintaghist[name] = []
208 bintaghist[name] = []
209 bintaghist[name].append(nodebin)
209 bintaghist[name].append(nodebin)
210 return bintaghist, hextaglines
210 return bintaghist, hextaglines
211
211
212 def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False):
212 def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False):
213 '''Read tag definitions from a file (or any source of lines).
213 '''Read tag definitions from a file (or any source of lines).
214
214
215 Returns a mapping from tag name to (node, hist).
215 Returns a mapping from tag name to (node, hist).
216
216
217 "node" is the node id from the last line read for that name. "hist"
217 "node" is the node id from the last line read for that name. "hist"
218 is the list of node ids previously associated with it (in file order).
218 is the list of node ids previously associated with it (in file order).
219 All node ids are binary, not hex.
219 All node ids are binary, not hex.
220 '''
220 '''
221 filetags, nodelines = _readtaghist(ui, repo, lines, fn, recode=recode,
221 filetags, nodelines = _readtaghist(ui, repo, lines, fn, recode=recode,
222 calcnodelines=calcnodelines)
222 calcnodelines=calcnodelines)
223 # util.sortdict().__setitem__ is much slower at replacing then inserting
223 # util.sortdict().__setitem__ is much slower at replacing then inserting
224 # new entries. The difference can matter if there are thousands of tags.
224 # new entries. The difference can matter if there are thousands of tags.
225 # Create a new sortdict to avoid the performance penalty.
225 # Create a new sortdict to avoid the performance penalty.
226 newtags = util.sortdict()
226 newtags = util.sortdict()
227 for tag, taghist in filetags.items():
227 for tag, taghist in filetags.items():
228 newtags[tag] = (taghist[-1], taghist[:-1])
228 newtags[tag] = (taghist[-1], taghist[:-1])
229 return newtags
229 return newtags
230
230
231 def _updatetags(filetags, tagtype, alltags, tagtypes):
231 def _updatetags(filetags, tagtype, alltags, tagtypes):
232 '''Incorporate the tag info read from one file into the two
232 '''Incorporate the tag info read from one file into the two
233 dictionaries, alltags and tagtypes, that contain all tag
233 dictionaries, alltags and tagtypes, that contain all tag
234 info (global across all heads plus local).'''
234 info (global across all heads plus local).'''
235
235
236 for name, nodehist in filetags.iteritems():
236 for name, nodehist in filetags.iteritems():
237 if name not in alltags:
237 if name not in alltags:
238 alltags[name] = nodehist
238 alltags[name] = nodehist
239 tagtypes[name] = tagtype
239 tagtypes[name] = tagtype
240 continue
240 continue
241
241
242 # we prefer alltags[name] if:
242 # we prefer alltags[name] if:
243 # it supersedes us OR
243 # it supersedes us OR
244 # mutual supersedes and it has a higher rank
244 # mutual supersedes and it has a higher rank
245 # otherwise we win because we're tip-most
245 # otherwise we win because we're tip-most
246 anode, ahist = nodehist
246 anode, ahist = nodehist
247 bnode, bhist = alltags[name]
247 bnode, bhist = alltags[name]
248 if (bnode != anode and anode in bhist and
248 if (bnode != anode and anode in bhist and
249 (bnode not in ahist or len(bhist) > len(ahist))):
249 (bnode not in ahist or len(bhist) > len(ahist))):
250 anode = bnode
250 anode = bnode
251 else:
251 else:
252 tagtypes[name] = tagtype
252 tagtypes[name] = tagtype
253 ahist.extend([n for n in bhist if n not in ahist])
253 ahist.extend([n for n in bhist if n not in ahist])
254 alltags[name] = anode, ahist
254 alltags[name] = anode, ahist
255
255
256 def _filename(repo):
256 def _filename(repo):
257 """name of a tagcache file for a given repo or repoview"""
257 """name of a tagcache file for a given repo or repoview"""
258 filename = 'cache/tags2'
258 filename = 'cache/tags2'
259 if repo.filtername:
259 if repo.filtername:
260 filename = '%s-%s' % (filename, repo.filtername)
260 filename = '%s-%s' % (filename, repo.filtername)
261 return filename
261 return filename
262
262
263 def _readtagcache(ui, repo):
263 def _readtagcache(ui, repo):
264 '''Read the tag cache.
264 '''Read the tag cache.
265
265
266 Returns a tuple (heads, fnodes, validinfo, cachetags, shouldwrite).
266 Returns a tuple (heads, fnodes, validinfo, cachetags, shouldwrite).
267
267
268 If the cache is completely up-to-date, "cachetags" is a dict of the
268 If the cache is completely up-to-date, "cachetags" is a dict of the
269 form returned by _readtags() and "heads", "fnodes", and "validinfo" are
269 form returned by _readtags() and "heads", "fnodes", and "validinfo" are
270 None and "shouldwrite" is False.
270 None and "shouldwrite" is False.
271
271
272 If the cache is not up to date, "cachetags" is None. "heads" is a list
272 If the cache is not up to date, "cachetags" is None. "heads" is a list
273 of all heads currently in the repository, ordered from tip to oldest.
273 of all heads currently in the repository, ordered from tip to oldest.
274 "validinfo" is a tuple describing cache validation info. This is used
274 "validinfo" is a tuple describing cache validation info. This is used
275 when writing the tags cache. "fnodes" is a mapping from head to .hgtags
275 when writing the tags cache. "fnodes" is a mapping from head to .hgtags
276 filenode. "shouldwrite" is True.
276 filenode. "shouldwrite" is True.
277
277
278 If the cache is not up to date, the caller is responsible for reading tag
278 If the cache is not up to date, the caller is responsible for reading tag
279 info from each returned head. (See findglobaltags().)
279 info from each returned head. (See findglobaltags().)
280 '''
280 '''
281 try:
281 try:
282 cachefile = repo.vfs(_filename(repo), 'r')
282 cachefile = repo.vfs(_filename(repo), 'r')
283 # force reading the file for static-http
283 # force reading the file for static-http
284 cachelines = iter(cachefile)
284 cachelines = iter(cachefile)
285 except IOError:
285 except IOError:
286 cachefile = None
286 cachefile = None
287
287
288 cacherev = None
288 cacherev = None
289 cachenode = None
289 cachenode = None
290 cachehash = None
290 cachehash = None
291 if cachefile:
291 if cachefile:
292 try:
292 try:
293 validline = next(cachelines)
293 validline = next(cachelines)
294 validline = validline.split()
294 validline = validline.split()
295 cacherev = int(validline[0])
295 cacherev = int(validline[0])
296 cachenode = bin(validline[1])
296 cachenode = bin(validline[1])
297 if len(validline) > 2:
297 if len(validline) > 2:
298 cachehash = bin(validline[2])
298 cachehash = bin(validline[2])
299 except Exception:
299 except Exception:
300 # corruption of the cache, just recompute it.
300 # corruption of the cache, just recompute it.
301 pass
301 pass
302
302
303 tipnode = repo.changelog.tip()
303 tipnode = repo.changelog.tip()
304 tiprev = len(repo.changelog) - 1
304 tiprev = len(repo.changelog) - 1
305
305
306 # Case 1 (common): tip is the same, so nothing has changed.
306 # Case 1 (common): tip is the same, so nothing has changed.
307 # (Unchanged tip trivially means no changesets have been added.
307 # (Unchanged tip trivially means no changesets have been added.
308 # But, thanks to localrepository.destroyed(), it also means none
308 # But, thanks to localrepository.destroyed(), it also means none
309 # have been destroyed by strip or rollback.)
309 # have been destroyed by strip or rollback.)
310 if (cacherev == tiprev
310 if (cacherev == tiprev
311 and cachenode == tipnode
311 and cachenode == tipnode
312 and cachehash == scmutil.filteredhash(repo, tiprev)):
312 and cachehash == scmutil.filteredhash(repo, tiprev)):
313 tags = _readtags(ui, repo, cachelines, cachefile.name)
313 tags = _readtags(ui, repo, cachelines, cachefile.name)
314 cachefile.close()
314 cachefile.close()
315 return (None, None, None, tags, False)
315 return (None, None, None, tags, False)
316 if cachefile:
316 if cachefile:
317 cachefile.close() # ignore rest of file
317 cachefile.close() # ignore rest of file
318
318
319 valid = (tiprev, tipnode, scmutil.filteredhash(repo, tiprev))
319 valid = (tiprev, tipnode, scmutil.filteredhash(repo, tiprev))
320
320
321 repoheads = repo.heads()
321 repoheads = repo.heads()
322 # Case 2 (uncommon): empty repo; get out quickly and don't bother
322 # Case 2 (uncommon): empty repo; get out quickly and don't bother
323 # writing an empty cache.
323 # writing an empty cache.
324 if repoheads == [nullid]:
324 if repoheads == [nullid]:
325 return ([], {}, valid, {}, False)
325 return ([], {}, valid, {}, False)
326
326
327 # Case 3 (uncommon): cache file missing or empty.
327 # Case 3 (uncommon): cache file missing or empty.
328
328
329 # Case 4 (uncommon): tip rev decreased. This should only happen
329 # Case 4 (uncommon): tip rev decreased. This should only happen
330 # when we're called from localrepository.destroyed(). Refresh the
330 # when we're called from localrepository.destroyed(). Refresh the
331 # cache so future invocations will not see disappeared heads in the
331 # cache so future invocations will not see disappeared heads in the
332 # cache.
332 # cache.
333
333
334 # Case 5 (common): tip has changed, so we've added/replaced heads.
334 # Case 5 (common): tip has changed, so we've added/replaced heads.
335
335
336 # As it happens, the code to handle cases 3, 4, 5 is the same.
336 # As it happens, the code to handle cases 3, 4, 5 is the same.
337
337
338 # N.B. in case 4 (nodes destroyed), "new head" really means "newly
338 # N.B. in case 4 (nodes destroyed), "new head" really means "newly
339 # exposed".
339 # exposed".
340 if not len(repo.file('.hgtags')):
340 if not len(repo.file('.hgtags')):
341 # No tags have ever been committed, so we can avoid a
341 # No tags have ever been committed, so we can avoid a
342 # potentially expensive search.
342 # potentially expensive search.
343 return ([], {}, valid, None, True)
343 return ([], {}, valid, None, True)
344
344
345 starttime = util.timer()
345 starttime = util.timer()
346
346
347 # Now we have to lookup the .hgtags filenode for every new head.
347 # Now we have to lookup the .hgtags filenode for every new head.
348 # This is the most expensive part of finding tags, so performance
348 # This is the most expensive part of finding tags, so performance
349 # depends primarily on the size of newheads. Worst case: no cache
349 # depends primarily on the size of newheads. Worst case: no cache
350 # file, so newheads == repoheads.
350 # file, so newheads == repoheads.
351 fnodescache = hgtagsfnodescache(repo.unfiltered())
351 fnodescache = hgtagsfnodescache(repo.unfiltered())
352 cachefnode = {}
352 cachefnode = {}
353 for head in reversed(repoheads):
353 for head in reversed(repoheads):
354 fnode = fnodescache.getfnode(head)
354 fnode = fnodescache.getfnode(head)
355 if fnode != nullid:
355 if fnode != nullid:
356 cachefnode[head] = fnode
356 cachefnode[head] = fnode
357
357
358 fnodescache.write()
358 fnodescache.write()
359
359
360 duration = util.timer() - starttime
360 duration = util.timer() - starttime
361 ui.log('tagscache',
361 ui.log('tagscache',
362 '%d/%d cache hits/lookups in %0.4f '
362 '%d/%d cache hits/lookups in %0.4f '
363 'seconds\n',
363 'seconds\n',
364 fnodescache.hitcount, fnodescache.lookupcount, duration)
364 fnodescache.hitcount, fnodescache.lookupcount, duration)
365
365
366 # Caller has to iterate over all heads, but can use the filenodes in
366 # Caller has to iterate over all heads, but can use the filenodes in
367 # cachefnode to get to each .hgtags revision quickly.
367 # cachefnode to get to each .hgtags revision quickly.
368 return (repoheads, cachefnode, valid, None, True)
368 return (repoheads, cachefnode, valid, None, True)
369
369
370 def _writetagcache(ui, repo, valid, cachetags):
370 def _writetagcache(ui, repo, valid, cachetags):
371 filename = _filename(repo)
371 filename = _filename(repo)
372 try:
372 try:
373 cachefile = repo.vfs(filename, 'w', atomictemp=True)
373 cachefile = repo.vfs(filename, 'w', atomictemp=True)
374 except (OSError, IOError):
374 except (OSError, IOError):
375 return
375 return
376
376
377 ui.log('tagscache', 'writing .hg/%s with %d tags\n',
377 ui.log('tagscache', 'writing .hg/%s with %d tags\n',
378 filename, len(cachetags))
378 filename, len(cachetags))
379
379
380 if valid[2]:
380 if valid[2]:
381 cachefile.write('%d %s %s\n' % (valid[0], hex(valid[1]), hex(valid[2])))
381 cachefile.write('%d %s %s\n' % (valid[0], hex(valid[1]), hex(valid[2])))
382 else:
382 else:
383 cachefile.write('%d %s\n' % (valid[0], hex(valid[1])))
383 cachefile.write('%d %s\n' % (valid[0], hex(valid[1])))
384
384
385 # Tag names in the cache are in UTF-8 -- which is the whole reason
385 # Tag names in the cache are in UTF-8 -- which is the whole reason
386 # we keep them in UTF-8 throughout this module. If we converted
386 # we keep them in UTF-8 throughout this module. If we converted
387 # them local encoding on input, we would lose info writing them to
387 # them local encoding on input, we would lose info writing them to
388 # the cache.
388 # the cache.
389 for (name, (node, hist)) in sorted(cachetags.iteritems()):
389 for (name, (node, hist)) in sorted(cachetags.iteritems()):
390 for n in hist:
390 for n in hist:
391 cachefile.write("%s %s\n" % (hex(n), name))
391 cachefile.write("%s %s\n" % (hex(n), name))
392 cachefile.write("%s %s\n" % (hex(node), name))
392 cachefile.write("%s %s\n" % (hex(node), name))
393
393
394 try:
394 try:
395 cachefile.close()
395 cachefile.close()
396 except (OSError, IOError):
396 except (OSError, IOError):
397 pass
397 pass
398
398
399 _fnodescachefile = 'cache/hgtagsfnodes1'
399 _fnodescachefile = 'cache/hgtagsfnodes1'
400 _fnodesrecsize = 4 + 20 # changeset fragment + filenode
400 _fnodesrecsize = 4 + 20 # changeset fragment + filenode
401 _fnodesmissingrec = '\xff' * 24
401 _fnodesmissingrec = '\xff' * 24
402
402
403 class hgtagsfnodescache(object):
403 class hgtagsfnodescache(object):
404 """Persistent cache mapping revisions to .hgtags filenodes.
404 """Persistent cache mapping revisions to .hgtags filenodes.
405
405
406 The cache is an array of records. Each item in the array corresponds to
406 The cache is an array of records. Each item in the array corresponds to
407 a changelog revision. Values in the array contain the first 4 bytes of
407 a changelog revision. Values in the array contain the first 4 bytes of
408 the node hash and the 20 bytes .hgtags filenode for that revision.
408 the node hash and the 20 bytes .hgtags filenode for that revision.
409
409
410 The first 4 bytes are present as a form of verification. Repository
410 The first 4 bytes are present as a form of verification. Repository
411 stripping and rewriting may change the node at a numeric revision in the
411 stripping and rewriting may change the node at a numeric revision in the
412 changelog. The changeset fragment serves as a verifier to detect
412 changelog. The changeset fragment serves as a verifier to detect
413 rewriting. This logic is shared with the rev branch cache (see
413 rewriting. This logic is shared with the rev branch cache (see
414 branchmap.py).
414 branchmap.py).
415
415
416 The instance holds in memory the full cache content but entries are
416 The instance holds in memory the full cache content but entries are
417 only parsed on read.
417 only parsed on read.
418
418
419 Instances behave like lists. ``c[i]`` works where i is a rev or
419 Instances behave like lists. ``c[i]`` works where i is a rev or
420 changeset node. Missing indexes are populated automatically on access.
420 changeset node. Missing indexes are populated automatically on access.
421 """
421 """
422 def __init__(self, repo):
422 def __init__(self, repo):
423 assert repo.filtername is None
423 assert repo.filtername is None
424
424
425 self._repo = repo
425 self._repo = repo
426
426
427 # Only for reporting purposes.
427 # Only for reporting purposes.
428 self.lookupcount = 0
428 self.lookupcount = 0
429 self.hitcount = 0
429 self.hitcount = 0
430
430
431 self._raw = array('c')
432
431
433 try:
432 try:
434 data = repo.vfs.read(_fnodescachefile)
433 data = repo.vfs.read(_fnodescachefile)
435 except (OSError, IOError):
434 except (OSError, IOError):
436 data = ""
435 data = ""
437 self._raw.fromstring(data)
436 self._raw = bytearray(data)
438
437
439 # The end state of self._raw is an array that is of the exact length
438 # The end state of self._raw is an array that is of the exact length
440 # required to hold a record for every revision in the repository.
439 # required to hold a record for every revision in the repository.
441 # We truncate or extend the array as necessary. self._dirtyoffset is
440 # We truncate or extend the array as necessary. self._dirtyoffset is
442 # defined to be the start offset at which we need to write the output
441 # defined to be the start offset at which we need to write the output
443 # file. This offset is also adjusted when new entries are calculated
442 # file. This offset is also adjusted when new entries are calculated
444 # for array members.
443 # for array members.
445 cllen = len(repo.changelog)
444 cllen = len(repo.changelog)
446 wantedlen = cllen * _fnodesrecsize
445 wantedlen = cllen * _fnodesrecsize
447 rawlen = len(self._raw)
446 rawlen = len(self._raw)
448
447
449 self._dirtyoffset = None
448 self._dirtyoffset = None
450
449
451 if rawlen < wantedlen:
450 if rawlen < wantedlen:
452 self._dirtyoffset = rawlen
451 self._dirtyoffset = rawlen
453 self._raw.extend('\xff' * (wantedlen - rawlen))
452 self._raw.extend('\xff' * (wantedlen - rawlen))
454 elif rawlen > wantedlen:
453 elif rawlen > wantedlen:
455 # There's no easy way to truncate array instances. This seems
454 # There's no easy way to truncate array instances. This seems
456 # slightly less evil than copying a potentially large array slice.
455 # slightly less evil than copying a potentially large array slice.
457 for i in range(rawlen - wantedlen):
456 for i in range(rawlen - wantedlen):
458 self._raw.pop()
457 self._raw.pop()
459 self._dirtyoffset = len(self._raw)
458 self._dirtyoffset = len(self._raw)
460
459
461 def getfnode(self, node, computemissing=True):
460 def getfnode(self, node, computemissing=True):
462 """Obtain the filenode of the .hgtags file at a specified revision.
461 """Obtain the filenode of the .hgtags file at a specified revision.
463
462
464 If the value is in the cache, the entry will be validated and returned.
463 If the value is in the cache, the entry will be validated and returned.
465 Otherwise, the filenode will be computed and returned unless
464 Otherwise, the filenode will be computed and returned unless
466 "computemissing" is False, in which case None will be returned without
465 "computemissing" is False, in which case None will be returned without
467 any potentially expensive computation being performed.
466 any potentially expensive computation being performed.
468
467
469 If an .hgtags does not exist at the specified revision, nullid is
468 If an .hgtags does not exist at the specified revision, nullid is
470 returned.
469 returned.
471 """
470 """
472 ctx = self._repo[node]
471 ctx = self._repo[node]
473 rev = ctx.rev()
472 rev = ctx.rev()
474
473
475 self.lookupcount += 1
474 self.lookupcount += 1
476
475
477 offset = rev * _fnodesrecsize
476 offset = rev * _fnodesrecsize
478 record = self._raw[offset:offset + _fnodesrecsize].tostring()
477 record = '%s' % self._raw[offset:offset + _fnodesrecsize]
479 properprefix = node[0:4]
478 properprefix = node[0:4]
480
479
481 # Validate and return existing entry.
480 # Validate and return existing entry.
482 if record != _fnodesmissingrec:
481 if record != _fnodesmissingrec:
483 fileprefix = record[0:4]
482 fileprefix = record[0:4]
484
483
485 if fileprefix == properprefix:
484 if fileprefix == properprefix:
486 self.hitcount += 1
485 self.hitcount += 1
487 return record[4:]
486 return record[4:]
488
487
489 # Fall through.
488 # Fall through.
490
489
491 # If we get here, the entry is either missing or invalid.
490 # If we get here, the entry is either missing or invalid.
492
491
493 if not computemissing:
492 if not computemissing:
494 return None
493 return None
495
494
496 # Populate missing entry.
495 # Populate missing entry.
497 try:
496 try:
498 fnode = ctx.filenode('.hgtags')
497 fnode = ctx.filenode('.hgtags')
499 except error.LookupError:
498 except error.LookupError:
500 # No .hgtags file on this revision.
499 # No .hgtags file on this revision.
501 fnode = nullid
500 fnode = nullid
502
501
503 self._writeentry(offset, properprefix, fnode)
502 self._writeentry(offset, properprefix, fnode)
504 return fnode
503 return fnode
505
504
506 def setfnode(self, node, fnode):
505 def setfnode(self, node, fnode):
507 """Set the .hgtags filenode for a given changeset."""
506 """Set the .hgtags filenode for a given changeset."""
508 assert len(fnode) == 20
507 assert len(fnode) == 20
509 ctx = self._repo[node]
508 ctx = self._repo[node]
510
509
511 # Do a lookup first to avoid writing if nothing has changed.
510 # Do a lookup first to avoid writing if nothing has changed.
512 if self.getfnode(ctx.node(), computemissing=False) == fnode:
511 if self.getfnode(ctx.node(), computemissing=False) == fnode:
513 return
512 return
514
513
515 self._writeentry(ctx.rev() * _fnodesrecsize, node[0:4], fnode)
514 self._writeentry(ctx.rev() * _fnodesrecsize, node[0:4], fnode)
516
515
517 def _writeentry(self, offset, prefix, fnode):
516 def _writeentry(self, offset, prefix, fnode):
518 # Slices on array instances only accept other array.
517 # Slices on array instances only accept other array.
519 entry = array('c', prefix + fnode)
518 entry = bytearray(prefix + fnode)
520 self._raw[offset:offset + _fnodesrecsize] = entry
519 self._raw[offset:offset + _fnodesrecsize] = entry
521 # self._dirtyoffset could be None.
520 # self._dirtyoffset could be None.
522 self._dirtyoffset = min(self._dirtyoffset, offset) or 0
521 self._dirtyoffset = min(self._dirtyoffset, offset) or 0
523
522
524 def write(self):
523 def write(self):
525 """Perform all necessary writes to cache file.
524 """Perform all necessary writes to cache file.
526
525
527 This may no-op if no writes are needed or if a write lock could
526 This may no-op if no writes are needed or if a write lock could
528 not be obtained.
527 not be obtained.
529 """
528 """
530 if self._dirtyoffset is None:
529 if self._dirtyoffset is None:
531 return
530 return
532
531
533 data = self._raw[self._dirtyoffset:]
532 data = self._raw[self._dirtyoffset:]
534 if not data:
533 if not data:
535 return
534 return
536
535
537 repo = self._repo
536 repo = self._repo
538
537
539 try:
538 try:
540 lock = repo.wlock(wait=False)
539 lock = repo.wlock(wait=False)
541 except error.LockError:
540 except error.LockError:
542 repo.ui.log('tagscache',
541 repo.ui.log('tagscache',
543 'not writing .hg/%s because lock cannot be acquired\n' %
542 'not writing .hg/%s because lock cannot be acquired\n' %
544 (_fnodescachefile))
543 (_fnodescachefile))
545 return
544 return
546
545
547 try:
546 try:
548 f = repo.vfs.open(_fnodescachefile, 'ab')
547 f = repo.vfs.open(_fnodescachefile, 'ab')
549 try:
548 try:
550 # if the file has been truncated
549 # if the file has been truncated
551 actualoffset = f.tell()
550 actualoffset = f.tell()
552 if actualoffset < self._dirtyoffset:
551 if actualoffset < self._dirtyoffset:
553 self._dirtyoffset = actualoffset
552 self._dirtyoffset = actualoffset
554 data = self._raw[self._dirtyoffset:]
553 data = self._raw[self._dirtyoffset:]
555 f.seek(self._dirtyoffset)
554 f.seek(self._dirtyoffset)
556 f.truncate()
555 f.truncate()
557 repo.ui.log('tagscache',
556 repo.ui.log('tagscache',
558 'writing %d bytes to %s\n' % (
557 'writing %d bytes to %s\n' % (
559 len(data), _fnodescachefile))
558 len(data), _fnodescachefile))
560 f.write(data)
559 f.write(data)
561 self._dirtyoffset = None
560 self._dirtyoffset = None
562 finally:
561 finally:
563 f.close()
562 f.close()
564 except (IOError, OSError) as inst:
563 except (IOError, OSError) as inst:
565 repo.ui.log('tagscache',
564 repo.ui.log('tagscache',
566 "couldn't write %s: %s\n" % (
565 "couldn't write %s: %s\n" % (
567 _fnodescachefile, inst))
566 _fnodescachefile, inst))
568 finally:
567 finally:
569 lock.release()
568 lock.release()
General Comments 0
You need to be logged in to leave comments. Login now