##// END OF EJS Templates
py3: use bytearray() instead of array('c', ...) constructions...
Augie Fackler -
r31346:2a18e9e6 default
parent child Browse files
Show More
@@ -1,522 +1,520 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import array
11 11 import struct
12 12
13 13 from .node import (
14 14 bin,
15 15 hex,
16 16 nullid,
17 17 nullrev,
18 18 )
19 19 from . import (
20 20 encoding,
21 21 error,
22 22 scmutil,
23 23 util,
24 24 )
25 25
26 26 array = array.array
27 27 calcsize = struct.calcsize
28 28 pack = struct.pack
29 29 unpack = struct.unpack
30 30
31 31 def _filename(repo):
32 32 """name of a branchcache file for a given repo or repoview"""
33 33 filename = "cache/branch2"
34 34 if repo.filtername:
35 35 filename = '%s-%s' % (filename, repo.filtername)
36 36 return filename
37 37
38 38 def read(repo):
39 39 try:
40 40 f = repo.vfs(_filename(repo))
41 41 lines = f.read().split('\n')
42 42 f.close()
43 43 except (IOError, OSError):
44 44 return None
45 45
46 46 try:
47 47 cachekey = lines.pop(0).split(" ", 2)
48 48 last, lrev = cachekey[:2]
49 49 last, lrev = bin(last), int(lrev)
50 50 filteredhash = None
51 51 if len(cachekey) > 2:
52 52 filteredhash = bin(cachekey[2])
53 53 partial = branchcache(tipnode=last, tiprev=lrev,
54 54 filteredhash=filteredhash)
55 55 if not partial.validfor(repo):
56 56 # invalidate the cache
57 57 raise ValueError('tip differs')
58 58 cl = repo.changelog
59 59 for l in lines:
60 60 if not l:
61 61 continue
62 62 node, state, label = l.split(" ", 2)
63 63 if state not in 'oc':
64 64 raise ValueError('invalid branch state')
65 65 label = encoding.tolocal(label.strip())
66 66 node = bin(node)
67 67 if not cl.hasnode(node):
68 68 raise ValueError('node %s does not exist' % hex(node))
69 69 partial.setdefault(label, []).append(node)
70 70 if state == 'c':
71 71 partial._closednodes.add(node)
72 72 except KeyboardInterrupt:
73 73 raise
74 74 except Exception as inst:
75 75 if repo.ui.debugflag:
76 76 msg = 'invalid branchheads cache'
77 77 if repo.filtername is not None:
78 78 msg += ' (%s)' % repo.filtername
79 79 msg += ': %s\n'
80 80 repo.ui.debug(msg % inst)
81 81 partial = None
82 82 return partial
83 83
84 84 ### Nearest subset relation
85 85 # Nearest subset of filter X is a filter Y so that:
86 86 # * Y is included in X,
87 87 # * X - Y is as small as possible.
88 88 # This create and ordering used for branchmap purpose.
89 89 # the ordering may be partial
90 90 subsettable = {None: 'visible',
91 91 'visible': 'served',
92 92 'served': 'immutable',
93 93 'immutable': 'base'}
94 94
95 95 def updatecache(repo):
96 96 cl = repo.changelog
97 97 filtername = repo.filtername
98 98 partial = repo._branchcaches.get(filtername)
99 99
100 100 revs = []
101 101 if partial is None or not partial.validfor(repo):
102 102 partial = read(repo)
103 103 if partial is None:
104 104 subsetname = subsettable.get(filtername)
105 105 if subsetname is None:
106 106 partial = branchcache()
107 107 else:
108 108 subset = repo.filtered(subsetname)
109 109 partial = subset.branchmap().copy()
110 110 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
111 111 revs.extend(r for r in extrarevs if r <= partial.tiprev)
112 112 revs.extend(cl.revs(start=partial.tiprev + 1))
113 113 if revs:
114 114 partial.update(repo, revs)
115 115 partial.write(repo)
116 116
117 117 assert partial.validfor(repo), filtername
118 118 repo._branchcaches[repo.filtername] = partial
119 119
120 120 def replacecache(repo, bm):
121 121 """Replace the branchmap cache for a repo with a branch mapping.
122 122
123 123 This is likely only called during clone with a branch map from a remote.
124 124 """
125 125 rbheads = []
126 126 closed = []
127 127 for bheads in bm.itervalues():
128 128 rbheads.extend(bheads)
129 129 for h in bheads:
130 130 r = repo.changelog.rev(h)
131 131 b, c = repo.changelog.branchinfo(r)
132 132 if c:
133 133 closed.append(h)
134 134
135 135 if rbheads:
136 136 rtiprev = max((int(repo.changelog.rev(node))
137 137 for node in rbheads))
138 138 cache = branchcache(bm,
139 139 repo[rtiprev].node(),
140 140 rtiprev,
141 141 closednodes=closed)
142 142
143 143 # Try to stick it as low as possible
144 144 # filter above served are unlikely to be fetch from a clone
145 145 for candidate in ('base', 'immutable', 'served'):
146 146 rview = repo.filtered(candidate)
147 147 if cache.validfor(rview):
148 148 repo._branchcaches[candidate] = cache
149 149 cache.write(rview)
150 150 break
151 151
152 152 class branchcache(dict):
153 153 """A dict like object that hold branches heads cache.
154 154
155 155 This cache is used to avoid costly computations to determine all the
156 156 branch heads of a repo.
157 157
158 158 The cache is serialized on disk in the following format:
159 159
160 160 <tip hex node> <tip rev number> [optional filtered repo hex hash]
161 161 <branch head hex node> <open/closed state> <branch name>
162 162 <branch head hex node> <open/closed state> <branch name>
163 163 ...
164 164
165 165 The first line is used to check if the cache is still valid. If the
166 166 branch cache is for a filtered repo view, an optional third hash is
167 167 included that hashes the hashes of all filtered revisions.
168 168
169 169 The open/closed state is represented by a single letter 'o' or 'c'.
170 170 This field can be used to avoid changelog reads when determining if a
171 171 branch head closes a branch or not.
172 172 """
173 173
174 174 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
175 175 filteredhash=None, closednodes=None):
176 176 super(branchcache, self).__init__(entries)
177 177 self.tipnode = tipnode
178 178 self.tiprev = tiprev
179 179 self.filteredhash = filteredhash
180 180 # closednodes is a set of nodes that close their branch. If the branch
181 181 # cache has been updated, it may contain nodes that are no longer
182 182 # heads.
183 183 if closednodes is None:
184 184 self._closednodes = set()
185 185 else:
186 186 self._closednodes = closednodes
187 187
188 188 def validfor(self, repo):
189 189 """Is the cache content valid regarding a repo
190 190
191 191 - False when cached tipnode is unknown or if we detect a strip.
192 192 - True when cache is up to date or a subset of current repo."""
193 193 try:
194 194 return ((self.tipnode == repo.changelog.node(self.tiprev))
195 195 and (self.filteredhash == \
196 196 scmutil.filteredhash(repo, self.tiprev)))
197 197 except IndexError:
198 198 return False
199 199
200 200 def _branchtip(self, heads):
201 201 '''Return tuple with last open head in heads and false,
202 202 otherwise return last closed head and true.'''
203 203 tip = heads[-1]
204 204 closed = True
205 205 for h in reversed(heads):
206 206 if h not in self._closednodes:
207 207 tip = h
208 208 closed = False
209 209 break
210 210 return tip, closed
211 211
212 212 def branchtip(self, branch):
213 213 '''Return the tipmost open head on branch head, otherwise return the
214 214 tipmost closed head on branch.
215 215 Raise KeyError for unknown branch.'''
216 216 return self._branchtip(self[branch])[0]
217 217
218 218 def branchheads(self, branch, closed=False):
219 219 heads = self[branch]
220 220 if not closed:
221 221 heads = [h for h in heads if h not in self._closednodes]
222 222 return heads
223 223
224 224 def iterbranches(self):
225 225 for bn, heads in self.iteritems():
226 226 yield (bn, heads) + self._branchtip(heads)
227 227
228 228 def copy(self):
229 229 """return an deep copy of the branchcache object"""
230 230 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
231 231 self._closednodes)
232 232
233 233 def write(self, repo):
234 234 try:
235 235 f = repo.vfs(_filename(repo), "w", atomictemp=True)
236 236 cachekey = [hex(self.tipnode), str(self.tiprev)]
237 237 if self.filteredhash is not None:
238 238 cachekey.append(hex(self.filteredhash))
239 239 f.write(" ".join(cachekey) + '\n')
240 240 nodecount = 0
241 241 for label, nodes in sorted(self.iteritems()):
242 242 for node in nodes:
243 243 nodecount += 1
244 244 if node in self._closednodes:
245 245 state = 'c'
246 246 else:
247 247 state = 'o'
248 248 f.write("%s %s %s\n" % (hex(node), state,
249 249 encoding.fromlocal(label)))
250 250 f.close()
251 251 repo.ui.log('branchcache',
252 252 'wrote %s branch cache with %d labels and %d nodes\n',
253 253 repo.filtername, len(self), nodecount)
254 254 except (IOError, OSError, error.Abort) as inst:
255 255 repo.ui.debug("couldn't write branch cache: %s\n" % inst)
256 256 # Abort may be raise by read only opener
257 257 pass
258 258
259 259 def update(self, repo, revgen):
260 260 """Given a branchhead cache, self, that may have extra nodes or be
261 261 missing heads, and a generator of nodes that are strictly a superset of
262 262 heads missing, this function updates self to be correct.
263 263 """
264 264 starttime = util.timer()
265 265 cl = repo.changelog
266 266 # collect new branch entries
267 267 newbranches = {}
268 268 getbranchinfo = repo.revbranchcache().branchinfo
269 269 for r in revgen:
270 270 branch, closesbranch = getbranchinfo(r)
271 271 newbranches.setdefault(branch, []).append(r)
272 272 if closesbranch:
273 273 self._closednodes.add(cl.node(r))
274 274
275 275 # fetch current topological heads to speed up filtering
276 276 topoheads = set(cl.headrevs())
277 277
278 278 # if older branchheads are reachable from new ones, they aren't
279 279 # really branchheads. Note checking parents is insufficient:
280 280 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
281 281 for branch, newheadrevs in newbranches.iteritems():
282 282 bheads = self.setdefault(branch, [])
283 283 bheadset = set(cl.rev(node) for node in bheads)
284 284
285 285 # This have been tested True on all internal usage of this function.
286 286 # run it again in case of doubt
287 287 # assert not (set(bheadrevs) & set(newheadrevs))
288 288 newheadrevs.sort()
289 289 bheadset.update(newheadrevs)
290 290
291 291 # This prunes out two kinds of heads - heads that are superseded by
292 292 # a head in newheadrevs, and newheadrevs that are not heads because
293 293 # an existing head is their descendant.
294 294 uncertain = bheadset - topoheads
295 295 if uncertain:
296 296 floorrev = min(uncertain)
297 297 ancestors = set(cl.ancestors(newheadrevs, floorrev))
298 298 bheadset -= ancestors
299 299 bheadrevs = sorted(bheadset)
300 300 self[branch] = [cl.node(rev) for rev in bheadrevs]
301 301 tiprev = bheadrevs[-1]
302 302 if tiprev > self.tiprev:
303 303 self.tipnode = cl.node(tiprev)
304 304 self.tiprev = tiprev
305 305
306 306 if not self.validfor(repo):
307 307 # cache key are not valid anymore
308 308 self.tipnode = nullid
309 309 self.tiprev = nullrev
310 310 for heads in self.values():
311 311 tiprev = max(cl.rev(node) for node in heads)
312 312 if tiprev > self.tiprev:
313 313 self.tipnode = cl.node(tiprev)
314 314 self.tiprev = tiprev
315 315 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
316 316
317 317 duration = util.timer() - starttime
318 318 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
319 319 repo.filtername, duration)
320 320
321 321 # Revision branch info cache
322 322
323 323 _rbcversion = '-v1'
324 324 _rbcnames = 'cache/rbc-names' + _rbcversion
325 325 _rbcrevs = 'cache/rbc-revs' + _rbcversion
326 326 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
327 327 _rbcrecfmt = '>4sI'
328 328 _rbcrecsize = calcsize(_rbcrecfmt)
329 329 _rbcnodelen = 4
330 330 _rbcbranchidxmask = 0x7fffffff
331 331 _rbccloseflag = 0x80000000
332 332
333 333 class revbranchcache(object):
334 334 """Persistent cache, mapping from revision number to branch name and close.
335 335 This is a low level cache, independent of filtering.
336 336
337 337 Branch names are stored in rbc-names in internal encoding separated by 0.
338 338 rbc-names is append-only, and each branch name is only stored once and will
339 339 thus have a unique index.
340 340
341 341 The branch info for each revision is stored in rbc-revs as constant size
342 342 records. The whole file is read into memory, but it is only 'parsed' on
343 343 demand. The file is usually append-only but will be truncated if repo
344 344 modification is detected.
345 345 The record for each revision contains the first 4 bytes of the
346 346 corresponding node hash, and the record is only used if it still matches.
347 347 Even a completely trashed rbc-revs fill thus still give the right result
348 348 while converging towards full recovery ... assuming no incorrectly matching
349 349 node hashes.
350 350 The record also contains 4 bytes where 31 bits contains the index of the
351 351 branch and the last bit indicate that it is a branch close commit.
352 352 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
353 353 and will grow with it but be 1/8th of its size.
354 354 """
355 355
356 356 def __init__(self, repo, readonly=True):
357 357 assert repo.filtername is None
358 358 self._repo = repo
359 359 self._names = [] # branch names in local encoding with static index
360 self._rbcrevs = array('c') # structs of type _rbcrecfmt
360 self._rbcrevs = bytearray()
361 361 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
362 362 try:
363 363 bndata = repo.vfs.read(_rbcnames)
364 364 self._rbcsnameslen = len(bndata) # for verification before writing
365 365 self._names = [encoding.tolocal(bn) for bn in bndata.split('\0')]
366 366 except (IOError, OSError):
367 367 if readonly:
368 368 # don't try to use cache - fall back to the slow path
369 369 self.branchinfo = self._branchinfo
370 370
371 371 if self._names:
372 372 try:
373 373 data = repo.vfs.read(_rbcrevs)
374 self._rbcrevs.fromstring(data)
374 self._rbcrevs[:] = data
375 375 except (IOError, OSError) as inst:
376 376 repo.ui.debug("couldn't read revision branch cache: %s\n" %
377 377 inst)
378 378 # remember number of good records on disk
379 379 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
380 380 len(repo.changelog))
381 381 if self._rbcrevslen == 0:
382 382 self._names = []
383 383 self._rbcnamescount = len(self._names) # number of names read at
384 384 # _rbcsnameslen
385 385 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
386 386
387 387 def _clear(self):
388 388 self._rbcsnameslen = 0
389 389 del self._names[:]
390 390 self._rbcnamescount = 0
391 391 self._namesreverse.clear()
392 392 self._rbcrevslen = len(self._repo.changelog)
393 self._rbcrevs = array('c')
394 self._rbcrevs.fromstring('\0' * (self._rbcrevslen * _rbcrecsize))
393 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
395 394
396 395 def branchinfo(self, rev):
397 396 """Return branch name and close flag for rev, using and updating
398 397 persistent cache."""
399 398 changelog = self._repo.changelog
400 399 rbcrevidx = rev * _rbcrecsize
401 400
402 401 # avoid negative index, changelog.read(nullrev) is fast without cache
403 402 if rev == nullrev:
404 403 return changelog.branchinfo(rev)
405 404
406 405 # if requested rev isn't allocated, grow and cache the rev info
407 406 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
408 407 return self._branchinfo(rev)
409 408
410 409 # fast path: extract data from cache, use it if node is matching
411 410 reponode = changelog.node(rev)[:_rbcnodelen]
412 411 cachenode, branchidx = unpack(
413 412 _rbcrecfmt, buffer(self._rbcrevs, rbcrevidx, _rbcrecsize))
414 413 close = bool(branchidx & _rbccloseflag)
415 414 if close:
416 415 branchidx &= _rbcbranchidxmask
417 416 if cachenode == '\0\0\0\0':
418 417 pass
419 418 elif cachenode == reponode:
420 419 try:
421 420 return self._names[branchidx], close
422 421 except IndexError:
423 422 # recover from invalid reference to unknown branch
424 423 self._repo.ui.debug("referenced branch names not found"
425 424 " - rebuilding revision branch cache from scratch\n")
426 425 self._clear()
427 426 else:
428 427 # rev/node map has changed, invalidate the cache from here up
429 428 self._repo.ui.debug("history modification detected - truncating "
430 429 "revision branch cache to revision %s\n" % rev)
431 430 truncate = rbcrevidx + _rbcrecsize
432 431 del self._rbcrevs[truncate:]
433 432 self._rbcrevslen = min(self._rbcrevslen, truncate)
434 433
435 434 # fall back to slow path and make sure it will be written to disk
436 435 return self._branchinfo(rev)
437 436
438 437 def _branchinfo(self, rev):
439 438 """Retrieve branch info from changelog and update _rbcrevs"""
440 439 changelog = self._repo.changelog
441 440 b, close = changelog.branchinfo(rev)
442 441 if b in self._namesreverse:
443 442 branchidx = self._namesreverse[b]
444 443 else:
445 444 branchidx = len(self._names)
446 445 self._names.append(b)
447 446 self._namesreverse[b] = branchidx
448 447 reponode = changelog.node(rev)
449 448 if close:
450 449 branchidx |= _rbccloseflag
451 450 self._setcachedata(rev, reponode, branchidx)
452 451 return b, close
453 452
454 453 def _setcachedata(self, rev, node, branchidx):
455 454 """Writes the node's branch data to the in-memory cache data."""
456 455 rbcrevidx = rev * _rbcrecsize
457 rec = array('c')
458 rec.fromstring(pack(_rbcrecfmt, node, branchidx))
456 rec = bytearray(pack(_rbcrecfmt, node, branchidx))
459 457 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
460 458 self._rbcrevs.extend('\0' *
461 459 (len(self._repo.changelog) * _rbcrecsize -
462 460 len(self._rbcrevs)))
463 461 self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec
464 462 self._rbcrevslen = min(self._rbcrevslen, rev)
465 463
466 464 tr = self._repo.currenttransaction()
467 465 if tr:
468 466 tr.addfinalize('write-revbranchcache', self.write)
469 467
470 468 def write(self, tr=None):
471 469 """Save branch cache if it is dirty."""
472 470 repo = self._repo
473 471 wlock = None
474 472 step = ''
475 473 try:
476 474 if self._rbcnamescount < len(self._names):
477 475 step = ' names'
478 476 wlock = repo.wlock(wait=False)
479 477 if self._rbcnamescount != 0:
480 478 f = repo.vfs.open(_rbcnames, 'ab')
481 479 if f.tell() == self._rbcsnameslen:
482 480 f.write('\0')
483 481 else:
484 482 f.close()
485 483 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
486 484 self._rbcnamescount = 0
487 485 self._rbcrevslen = 0
488 486 if self._rbcnamescount == 0:
489 487 # before rewriting names, make sure references are removed
490 488 repo.vfs.unlinkpath(_rbcrevs, ignoremissing=True)
491 489 f = repo.vfs.open(_rbcnames, 'wb')
492 490 f.write('\0'.join(encoding.fromlocal(b)
493 491 for b in self._names[self._rbcnamescount:]))
494 492 self._rbcsnameslen = f.tell()
495 493 f.close()
496 494 self._rbcnamescount = len(self._names)
497 495
498 496 start = self._rbcrevslen * _rbcrecsize
499 497 if start != len(self._rbcrevs):
500 498 step = ''
501 499 if wlock is None:
502 500 wlock = repo.wlock(wait=False)
503 501 revs = min(len(repo.changelog),
504 502 len(self._rbcrevs) // _rbcrecsize)
505 503 f = repo.vfs.open(_rbcrevs, 'ab')
506 504 if f.tell() != start:
507 505 repo.ui.debug("truncating %s to %s\n" % (_rbcrevs, start))
508 506 f.seek(start)
509 507 if f.tell() != start:
510 508 start = 0
511 509 f.seek(start)
512 510 f.truncate()
513 511 end = revs * _rbcrecsize
514 512 f.write(self._rbcrevs[start:end])
515 513 f.close()
516 514 self._rbcrevslen = revs
517 515 except (IOError, OSError, error.Abort, error.LockError) as inst:
518 516 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
519 517 % (step, inst))
520 518 finally:
521 519 if wlock is not None:
522 520 wlock.release()
@@ -1,557 +1,557 b''
1 1 # bundlerepo.py - repository class for viewing uncompressed bundles
2 2 #
3 3 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Repository class for viewing uncompressed bundles.
9 9
10 10 This provides a read-only repository interface to bundles as if they
11 11 were part of the actual repository.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import os
17 17 import shutil
18 18 import tempfile
19 19
20 20 from .i18n import _
21 21 from .node import nullid
22 22
23 23 from . import (
24 24 bundle2,
25 25 changegroup,
26 26 changelog,
27 27 cmdutil,
28 28 discovery,
29 29 error,
30 30 exchange,
31 31 filelog,
32 32 localrepo,
33 33 manifest,
34 34 mdiff,
35 35 node as nodemod,
36 36 pathutil,
37 37 phases,
38 38 pycompat,
39 39 revlog,
40 40 util,
41 41 vfs as vfsmod,
42 42 )
43 43
44 44 class bundlerevlog(revlog.revlog):
45 45 def __init__(self, opener, indexfile, bundle, linkmapper):
46 46 # How it works:
47 47 # To retrieve a revision, we need to know the offset of the revision in
48 48 # the bundle (an unbundle object). We store this offset in the index
49 49 # (start). The base of the delta is stored in the base field.
50 50 #
51 51 # To differentiate a rev in the bundle from a rev in the revlog, we
52 52 # check revision against repotiprev.
53 53 opener = vfsmod.readonlyvfs(opener)
54 54 revlog.revlog.__init__(self, opener, indexfile)
55 55 self.bundle = bundle
56 56 n = len(self)
57 57 self.repotiprev = n - 1
58 58 chain = None
59 59 self.bundlerevs = set() # used by 'bundle()' revset expression
60 60 getchunk = lambda: bundle.deltachunk(chain)
61 61 for chunkdata in iter(getchunk, {}):
62 62 node = chunkdata['node']
63 63 p1 = chunkdata['p1']
64 64 p2 = chunkdata['p2']
65 65 cs = chunkdata['cs']
66 66 deltabase = chunkdata['deltabase']
67 67 delta = chunkdata['delta']
68 68
69 69 size = len(delta)
70 70 start = bundle.tell() - size
71 71
72 72 link = linkmapper(cs)
73 73 if node in self.nodemap:
74 74 # this can happen if two branches make the same change
75 75 chain = node
76 76 self.bundlerevs.add(self.nodemap[node])
77 77 continue
78 78
79 79 for p in (p1, p2):
80 80 if p not in self.nodemap:
81 81 raise error.LookupError(p, self.indexfile,
82 82 _("unknown parent"))
83 83
84 84 if deltabase not in self.nodemap:
85 85 raise LookupError(deltabase, self.indexfile,
86 86 _('unknown delta base'))
87 87
88 88 baserev = self.rev(deltabase)
89 89 # start, size, full unc. size, base (unused), link, p1, p2, node
90 90 e = (revlog.offset_type(start, 0), size, -1, baserev, link,
91 91 self.rev(p1), self.rev(p2), node)
92 92 self.index.insert(-1, e)
93 93 self.nodemap[node] = n
94 94 self.bundlerevs.add(n)
95 95 chain = node
96 96 n += 1
97 97
98 98 def _chunk(self, rev):
99 99 # Warning: in case of bundle, the diff is against what we stored as
100 100 # delta base, not against rev - 1
101 101 # XXX: could use some caching
102 102 if rev <= self.repotiprev:
103 103 return revlog.revlog._chunk(self, rev)
104 104 self.bundle.seek(self.start(rev))
105 105 return self.bundle.read(self.length(rev))
106 106
107 107 def revdiff(self, rev1, rev2):
108 108 """return or calculate a delta between two revisions"""
109 109 if rev1 > self.repotiprev and rev2 > self.repotiprev:
110 110 # hot path for bundle
111 111 revb = self.index[rev2][3]
112 112 if revb == rev1:
113 113 return self._chunk(rev2)
114 114 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
115 115 return revlog.revlog.revdiff(self, rev1, rev2)
116 116
117 117 return mdiff.textdiff(self.revision(self.node(rev1)),
118 118 self.revision(self.node(rev2)))
119 119
120 120 def revision(self, nodeorrev, raw=False):
121 121 """return an uncompressed revision of a given node or revision
122 122 number.
123 123 """
124 124 if isinstance(nodeorrev, int):
125 125 rev = nodeorrev
126 126 node = self.node(rev)
127 127 else:
128 128 node = nodeorrev
129 129 rev = self.rev(node)
130 130
131 131 if node == nullid:
132 132 return ""
133 133
134 134 text = None
135 135 chain = []
136 136 iterrev = rev
137 137 # reconstruct the revision if it is from a changegroup
138 138 while iterrev > self.repotiprev:
139 139 if self._cache and self._cache[1] == iterrev:
140 140 text = self._cache[2]
141 141 break
142 142 chain.append(iterrev)
143 143 iterrev = self.index[iterrev][3]
144 144 if text is None:
145 145 text = self.baserevision(iterrev)
146 146
147 147 while chain:
148 148 delta = self._chunk(chain.pop())
149 149 text = mdiff.patches(text, [delta])
150 150
151 151 text, validatehash = self._processflags(text, self.flags(rev),
152 152 'read', raw=raw)
153 153 if validatehash:
154 154 self.checkhash(text, node, rev=rev)
155 155 self._cache = (node, rev, text)
156 156 return text
157 157
158 158 def baserevision(self, nodeorrev):
159 159 # Revlog subclasses may override 'revision' method to modify format of
160 160 # content retrieved from revlog. To use bundlerevlog with such class one
161 161 # needs to override 'baserevision' and make more specific call here.
162 162 return revlog.revlog.revision(self, nodeorrev)
163 163
164 164 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
165 165 raise NotImplementedError
166 166 def addgroup(self, revs, linkmapper, transaction):
167 167 raise NotImplementedError
168 168 def strip(self, rev, minlink):
169 169 raise NotImplementedError
170 170 def checksize(self):
171 171 raise NotImplementedError
172 172
173 173 class bundlechangelog(bundlerevlog, changelog.changelog):
174 174 def __init__(self, opener, bundle):
175 175 changelog.changelog.__init__(self, opener)
176 176 linkmapper = lambda x: x
177 177 bundlerevlog.__init__(self, opener, self.indexfile, bundle,
178 178 linkmapper)
179 179
180 180 def baserevision(self, nodeorrev):
181 181 # Although changelog doesn't override 'revision' method, some extensions
182 182 # may replace this class with another that does. Same story with
183 183 # manifest and filelog classes.
184 184
185 185 # This bypasses filtering on changelog.node() and rev() because we need
186 186 # revision text of the bundle base even if it is hidden.
187 187 oldfilter = self.filteredrevs
188 188 try:
189 189 self.filteredrevs = ()
190 190 return changelog.changelog.revision(self, nodeorrev)
191 191 finally:
192 192 self.filteredrevs = oldfilter
193 193
194 194 class bundlemanifest(bundlerevlog, manifest.manifestrevlog):
195 195 def __init__(self, opener, bundle, linkmapper, dirlogstarts=None, dir=''):
196 196 manifest.manifestrevlog.__init__(self, opener, dir=dir)
197 197 bundlerevlog.__init__(self, opener, self.indexfile, bundle,
198 198 linkmapper)
199 199 if dirlogstarts is None:
200 200 dirlogstarts = {}
201 201 if self.bundle.version == "03":
202 202 dirlogstarts = _getfilestarts(self.bundle)
203 203 self._dirlogstarts = dirlogstarts
204 204 self._linkmapper = linkmapper
205 205
206 206 def baserevision(self, nodeorrev):
207 207 node = nodeorrev
208 208 if isinstance(node, int):
209 209 node = self.node(node)
210 210
211 211 if node in self.fulltextcache:
212 result = self.fulltextcache[node].tostring()
212 result = '%s' % self.fulltextcache[node]
213 213 else:
214 214 result = manifest.manifestrevlog.revision(self, nodeorrev)
215 215 return result
216 216
217 217 def dirlog(self, d):
218 218 if d in self._dirlogstarts:
219 219 self.bundle.seek(self._dirlogstarts[d])
220 220 return bundlemanifest(
221 221 self.opener, self.bundle, self._linkmapper,
222 222 self._dirlogstarts, dir=d)
223 223 return super(bundlemanifest, self).dirlog(d)
224 224
225 225 class bundlefilelog(bundlerevlog, filelog.filelog):
226 226 def __init__(self, opener, path, bundle, linkmapper):
227 227 filelog.filelog.__init__(self, opener, path)
228 228 bundlerevlog.__init__(self, opener, self.indexfile, bundle,
229 229 linkmapper)
230 230
231 231 def baserevision(self, nodeorrev):
232 232 return filelog.filelog.revision(self, nodeorrev)
233 233
234 234 class bundlepeer(localrepo.localpeer):
235 235 def canpush(self):
236 236 return False
237 237
238 238 class bundlephasecache(phases.phasecache):
239 239 def __init__(self, *args, **kwargs):
240 240 super(bundlephasecache, self).__init__(*args, **kwargs)
241 241 if util.safehasattr(self, 'opener'):
242 242 self.opener = vfsmod.readonlyvfs(self.opener)
243 243
244 244 def write(self):
245 245 raise NotImplementedError
246 246
247 247 def _write(self, fp):
248 248 raise NotImplementedError
249 249
250 250 def _updateroots(self, phase, newroots, tr):
251 251 self.phaseroots[phase] = newroots
252 252 self.invalidate()
253 253 self.dirty = True
254 254
255 255 def _getfilestarts(bundle):
256 256 bundlefilespos = {}
257 257 for chunkdata in iter(bundle.filelogheader, {}):
258 258 fname = chunkdata['filename']
259 259 bundlefilespos[fname] = bundle.tell()
260 260 for chunk in iter(lambda: bundle.deltachunk(None), {}):
261 261 pass
262 262 return bundlefilespos
263 263
264 264 class bundlerepository(localrepo.localrepository):
265 265 def __init__(self, ui, path, bundlename):
266 266 def _writetempbundle(read, suffix, header=''):
267 267 """Write a temporary file to disk
268 268
269 269 This is closure because we need to make sure this tracked by
270 270 self.tempfile for cleanup purposes."""
271 271 fdtemp, temp = self.vfs.mkstemp(prefix="hg-bundle-",
272 272 suffix=".hg10un")
273 273 self.tempfile = temp
274 274
275 275 with os.fdopen(fdtemp, pycompat.sysstr('wb')) as fptemp:
276 276 fptemp.write(header)
277 277 while True:
278 278 chunk = read(2**18)
279 279 if not chunk:
280 280 break
281 281 fptemp.write(chunk)
282 282
283 283 return self.vfs.open(self.tempfile, mode="rb")
284 284 self._tempparent = None
285 285 try:
286 286 localrepo.localrepository.__init__(self, ui, path)
287 287 except error.RepoError:
288 288 self._tempparent = tempfile.mkdtemp()
289 289 localrepo.instance(ui, self._tempparent, 1)
290 290 localrepo.localrepository.__init__(self, ui, self._tempparent)
291 291 self.ui.setconfig('phases', 'publish', False, 'bundlerepo')
292 292
293 293 if path:
294 294 self._url = 'bundle:' + util.expandpath(path) + '+' + bundlename
295 295 else:
296 296 self._url = 'bundle:' + bundlename
297 297
298 298 self.tempfile = None
299 299 f = util.posixfile(bundlename, "rb")
300 300 self.bundlefile = self.bundle = exchange.readbundle(ui, f, bundlename)
301 301
302 302 if isinstance(self.bundle, bundle2.unbundle20):
303 303 cgstream = None
304 304 for part in self.bundle.iterparts():
305 305 if part.type == 'changegroup':
306 306 if cgstream is not None:
307 307 raise NotImplementedError("can't process "
308 308 "multiple changegroups")
309 309 cgstream = part
310 310 version = part.params.get('version', '01')
311 311 legalcgvers = changegroup.supportedincomingversions(self)
312 312 if version not in legalcgvers:
313 313 msg = _('Unsupported changegroup version: %s')
314 314 raise error.Abort(msg % version)
315 315 if self.bundle.compressed():
316 316 cgstream = _writetempbundle(part.read,
317 317 ".cg%sun" % version)
318 318
319 319 if cgstream is None:
320 320 raise error.Abort(_('No changegroups found'))
321 321 cgstream.seek(0)
322 322
323 323 self.bundle = changegroup.getunbundler(version, cgstream, 'UN')
324 324
325 325 elif self.bundle.compressed():
326 326 f = _writetempbundle(self.bundle.read, '.hg10un', header='HG10UN')
327 327 self.bundlefile = self.bundle = exchange.readbundle(ui, f,
328 328 bundlename,
329 329 self.vfs)
330 330
331 331 # dict with the mapping 'filename' -> position in the bundle
332 332 self.bundlefilespos = {}
333 333
334 334 self.firstnewrev = self.changelog.repotiprev + 1
335 335 phases.retractboundary(self, None, phases.draft,
336 336 [ctx.node() for ctx in self[self.firstnewrev:]])
337 337
338 338 @localrepo.unfilteredpropertycache
339 339 def _phasecache(self):
340 340 return bundlephasecache(self, self._phasedefaults)
341 341
342 342 @localrepo.unfilteredpropertycache
343 343 def changelog(self):
344 344 # consume the header if it exists
345 345 self.bundle.changelogheader()
346 346 c = bundlechangelog(self.svfs, self.bundle)
347 347 self.manstart = self.bundle.tell()
348 348 return c
349 349
350 350 def _constructmanifest(self):
351 351 self.bundle.seek(self.manstart)
352 352 # consume the header if it exists
353 353 self.bundle.manifestheader()
354 354 linkmapper = self.unfiltered().changelog.rev
355 355 m = bundlemanifest(self.svfs, self.bundle, linkmapper)
356 356 self.filestart = self.bundle.tell()
357 357 return m
358 358
359 359 @localrepo.unfilteredpropertycache
360 360 def manstart(self):
361 361 self.changelog
362 362 return self.manstart
363 363
364 364 @localrepo.unfilteredpropertycache
365 365 def filestart(self):
366 366 self.manifestlog
367 367 return self.filestart
368 368
369 369 def url(self):
370 370 return self._url
371 371
372 372 def file(self, f):
373 373 if not self.bundlefilespos:
374 374 self.bundle.seek(self.filestart)
375 375 self.bundlefilespos = _getfilestarts(self.bundle)
376 376
377 377 if f in self.bundlefilespos:
378 378 self.bundle.seek(self.bundlefilespos[f])
379 379 linkmapper = self.unfiltered().changelog.rev
380 380 return bundlefilelog(self.svfs, f, self.bundle, linkmapper)
381 381 else:
382 382 return filelog.filelog(self.svfs, f)
383 383
384 384 def close(self):
385 385 """Close assigned bundle file immediately."""
386 386 self.bundlefile.close()
387 387 if self.tempfile is not None:
388 388 self.vfs.unlink(self.tempfile)
389 389 if self._tempparent:
390 390 shutil.rmtree(self._tempparent, True)
391 391
392 392 def cancopy(self):
393 393 return False
394 394
395 395 def peer(self):
396 396 return bundlepeer(self)
397 397
398 398 def getcwd(self):
399 399 return pycompat.getcwd() # always outside the repo
400 400
401 401 # Check if parents exist in localrepo before setting
402 402 def setparents(self, p1, p2=nullid):
403 403 p1rev = self.changelog.rev(p1)
404 404 p2rev = self.changelog.rev(p2)
405 405 msg = _("setting parent to node %s that only exists in the bundle\n")
406 406 if self.changelog.repotiprev < p1rev:
407 407 self.ui.warn(msg % nodemod.hex(p1))
408 408 if self.changelog.repotiprev < p2rev:
409 409 self.ui.warn(msg % nodemod.hex(p2))
410 410 return super(bundlerepository, self).setparents(p1, p2)
411 411
412 412 def instance(ui, path, create):
413 413 if create:
414 414 raise error.Abort(_('cannot create new bundle repository'))
415 415 # internal config: bundle.mainreporoot
416 416 parentpath = ui.config("bundle", "mainreporoot", "")
417 417 if not parentpath:
418 418 # try to find the correct path to the working directory repo
419 419 parentpath = cmdutil.findrepo(pycompat.getcwd())
420 420 if parentpath is None:
421 421 parentpath = ''
422 422 if parentpath:
423 423 # Try to make the full path relative so we get a nice, short URL.
424 424 # In particular, we don't want temp dir names in test outputs.
425 425 cwd = pycompat.getcwd()
426 426 if parentpath == cwd:
427 427 parentpath = ''
428 428 else:
429 429 cwd = pathutil.normasprefix(cwd)
430 430 if parentpath.startswith(cwd):
431 431 parentpath = parentpath[len(cwd):]
432 432 u = util.url(path)
433 433 path = u.localpath()
434 434 if u.scheme == 'bundle':
435 435 s = path.split("+", 1)
436 436 if len(s) == 1:
437 437 repopath, bundlename = parentpath, s[0]
438 438 else:
439 439 repopath, bundlename = s
440 440 else:
441 441 repopath, bundlename = parentpath, path
442 442 return bundlerepository(ui, repopath, bundlename)
443 443
444 444 class bundletransactionmanager(object):
445 445 def transaction(self):
446 446 return None
447 447
448 448 def close(self):
449 449 raise NotImplementedError
450 450
451 451 def release(self):
452 452 raise NotImplementedError
453 453
454 454 def getremotechanges(ui, repo, other, onlyheads=None, bundlename=None,
455 455 force=False):
456 456 '''obtains a bundle of changes incoming from other
457 457
458 458 "onlyheads" restricts the returned changes to those reachable from the
459 459 specified heads.
460 460 "bundlename", if given, stores the bundle to this file path permanently;
461 461 otherwise it's stored to a temp file and gets deleted again when you call
462 462 the returned "cleanupfn".
463 463 "force" indicates whether to proceed on unrelated repos.
464 464
465 465 Returns a tuple (local, csets, cleanupfn):
466 466
467 467 "local" is a local repo from which to obtain the actual incoming
468 468 changesets; it is a bundlerepo for the obtained bundle when the
469 469 original "other" is remote.
470 470 "csets" lists the incoming changeset node ids.
471 471 "cleanupfn" must be called without arguments when you're done processing
472 472 the changes; it closes both the original "other" and the one returned
473 473 here.
474 474 '''
475 475 tmp = discovery.findcommonincoming(repo, other, heads=onlyheads,
476 476 force=force)
477 477 common, incoming, rheads = tmp
478 478 if not incoming:
479 479 try:
480 480 if bundlename:
481 481 os.unlink(bundlename)
482 482 except OSError:
483 483 pass
484 484 return repo, [], other.close
485 485
486 486 commonset = set(common)
487 487 rheads = [x for x in rheads if x not in commonset]
488 488
489 489 bundle = None
490 490 bundlerepo = None
491 491 localrepo = other.local()
492 492 if bundlename or not localrepo:
493 493 # create a bundle (uncompressed if other repo is not local)
494 494
495 495 # developer config: devel.legacy.exchange
496 496 legexc = ui.configlist('devel', 'legacy.exchange')
497 497 forcebundle1 = 'bundle2' not in legexc and 'bundle1' in legexc
498 498 canbundle2 = (not forcebundle1
499 499 and other.capable('getbundle')
500 500 and other.capable('bundle2'))
501 501 if canbundle2:
502 502 kwargs = {}
503 503 kwargs['common'] = common
504 504 kwargs['heads'] = rheads
505 505 kwargs['bundlecaps'] = exchange.caps20to10(repo)
506 506 kwargs['cg'] = True
507 507 b2 = other.getbundle('incoming', **kwargs)
508 508 fname = bundle = changegroup.writechunks(ui, b2._forwardchunks(),
509 509 bundlename)
510 510 else:
511 511 if other.capable('getbundle'):
512 512 cg = other.getbundle('incoming', common=common, heads=rheads)
513 513 elif onlyheads is None and not other.capable('changegroupsubset'):
514 514 # compat with older servers when pulling all remote heads
515 515 cg = other.changegroup(incoming, "incoming")
516 516 rheads = None
517 517 else:
518 518 cg = other.changegroupsubset(incoming, rheads, 'incoming')
519 519 if localrepo:
520 520 bundletype = "HG10BZ"
521 521 else:
522 522 bundletype = "HG10UN"
523 523 fname = bundle = bundle2.writebundle(ui, cg, bundlename,
524 524 bundletype)
525 525 # keep written bundle?
526 526 if bundlename:
527 527 bundle = None
528 528 if not localrepo:
529 529 # use the created uncompressed bundlerepo
530 530 localrepo = bundlerepo = bundlerepository(repo.baseui, repo.root,
531 531 fname)
532 532 # this repo contains local and other now, so filter out local again
533 533 common = repo.heads()
534 534 if localrepo:
535 535 # Part of common may be remotely filtered
536 536 # So use an unfiltered version
537 537 # The discovery process probably need cleanup to avoid that
538 538 localrepo = localrepo.unfiltered()
539 539
540 540 csets = localrepo.changelog.findmissing(common, rheads)
541 541
542 542 if bundlerepo:
543 543 reponodes = [ctx.node() for ctx in bundlerepo[bundlerepo.firstnewrev:]]
544 544 remotephases = other.listkeys('phases')
545 545
546 546 pullop = exchange.pulloperation(bundlerepo, other, heads=reponodes)
547 547 pullop.trmanager = bundletransactionmanager()
548 548 exchange._pullapplyphases(pullop, remotephases)
549 549
550 550 def cleanup():
551 551 if bundlerepo:
552 552 bundlerepo.close()
553 553 if bundle:
554 554 os.unlink(bundle)
555 555 other.close()
556 556
557 557 return (localrepo, csets, cleanup)
@@ -1,1603 +1,1603 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 import array
11 10 import heapq
12 11 import os
13 12 import struct
14 13
15 14 from .i18n import _
16 15 from . import (
17 16 error,
18 17 mdiff,
19 18 parsers,
20 19 revlog,
21 20 util,
22 21 )
23 22
24 23 propertycache = util.propertycache
25 24
26 25 def _parsev1(data):
27 26 # This method does a little bit of excessive-looking
28 27 # precondition checking. This is so that the behavior of this
29 28 # class exactly matches its C counterpart to try and help
30 29 # prevent surprise breakage for anyone that develops against
31 30 # the pure version.
32 31 if data and data[-1] != '\n':
33 32 raise ValueError('Manifest did not end in a newline.')
34 33 prev = None
35 34 for l in data.splitlines():
36 35 if prev is not None and prev > l:
37 36 raise ValueError('Manifest lines not in sorted order.')
38 37 prev = l
39 38 f, n = l.split('\0')
40 39 if len(n) > 40:
41 40 yield f, revlog.bin(n[:40]), n[40:]
42 41 else:
43 42 yield f, revlog.bin(n), ''
44 43
45 44 def _parsev2(data):
46 45 metadataend = data.find('\n')
47 46 # Just ignore metadata for now
48 47 pos = metadataend + 1
49 48 prevf = ''
50 49 while pos < len(data):
51 50 end = data.find('\n', pos + 1) # +1 to skip stem length byte
52 51 if end == -1:
53 52 raise ValueError('Manifest ended with incomplete file entry.')
54 53 stemlen = ord(data[pos])
55 54 items = data[pos + 1:end].split('\0')
56 55 f = prevf[:stemlen] + items[0]
57 56 if prevf > f:
58 57 raise ValueError('Manifest entries not in sorted order.')
59 58 fl = items[1]
60 59 # Just ignore metadata (items[2:] for now)
61 60 n = data[end + 1:end + 21]
62 61 yield f, n, fl
63 62 pos = end + 22
64 63 prevf = f
65 64
66 65 def _parse(data):
67 66 """Generates (path, node, flags) tuples from a manifest text"""
68 67 if data.startswith('\0'):
69 68 return iter(_parsev2(data))
70 69 else:
71 70 return iter(_parsev1(data))
72 71
73 72 def _text(it, usemanifestv2):
74 73 """Given an iterator over (path, node, flags) tuples, returns a manifest
75 74 text"""
76 75 if usemanifestv2:
77 76 return _textv2(it)
78 77 else:
79 78 return _textv1(it)
80 79
81 80 def _textv1(it):
82 81 files = []
83 82 lines = []
84 83 _hex = revlog.hex
85 84 for f, n, fl in it:
86 85 files.append(f)
87 86 # if this is changed to support newlines in filenames,
88 87 # be sure to check the templates/ dir again (especially *-raw.tmpl)
89 88 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
90 89
91 90 _checkforbidden(files)
92 91 return ''.join(lines)
93 92
94 93 def _textv2(it):
95 94 files = []
96 95 lines = ['\0\n']
97 96 prevf = ''
98 97 for f, n, fl in it:
99 98 files.append(f)
100 99 stem = os.path.commonprefix([prevf, f])
101 100 stemlen = min(len(stem), 255)
102 101 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
103 102 prevf = f
104 103 _checkforbidden(files)
105 104 return ''.join(lines)
106 105
107 106 class lazymanifestiter(object):
108 107 def __init__(self, lm):
109 108 self.pos = 0
110 109 self.lm = lm
111 110
112 111 def __iter__(self):
113 112 return self
114 113
115 114 def next(self):
116 115 try:
117 116 data, pos = self.lm._get(self.pos)
118 117 except IndexError:
119 118 raise StopIteration
120 119 if pos == -1:
121 120 self.pos += 1
122 121 return data[0]
123 122 self.pos += 1
124 123 zeropos = data.find('\x00', pos)
125 124 return data[pos:zeropos]
126 125
127 126 class lazymanifestiterentries(object):
128 127 def __init__(self, lm):
129 128 self.lm = lm
130 129 self.pos = 0
131 130
132 131 def __iter__(self):
133 132 return self
134 133
135 134 def next(self):
136 135 try:
137 136 data, pos = self.lm._get(self.pos)
138 137 except IndexError:
139 138 raise StopIteration
140 139 if pos == -1:
141 140 self.pos += 1
142 141 return data
143 142 zeropos = data.find('\x00', pos)
144 143 hashval = unhexlify(data, self.lm.extrainfo[self.pos],
145 144 zeropos + 1, 40)
146 145 flags = self.lm._getflags(data, self.pos, zeropos)
147 146 self.pos += 1
148 147 return (data[pos:zeropos], hashval, flags)
149 148
150 149 def unhexlify(data, extra, pos, length):
151 150 s = data[pos:pos + length].decode('hex')
152 151 if extra:
153 152 s += chr(extra & 0xff)
154 153 return s
155 154
156 155 def _cmp(a, b):
157 156 return (a > b) - (a < b)
158 157
159 158 class _lazymanifest(object):
160 159 def __init__(self, data, positions=None, extrainfo=None, extradata=None):
161 160 if positions is None:
162 161 self.positions = self.findlines(data)
163 162 self.extrainfo = [0] * len(self.positions)
164 163 self.data = data
165 164 self.extradata = []
166 165 else:
167 166 self.positions = positions[:]
168 167 self.extrainfo = extrainfo[:]
169 168 self.extradata = extradata[:]
170 169 self.data = data
171 170
172 171 def findlines(self, data):
173 172 if not data:
174 173 return []
175 174 pos = data.find("\n")
176 175 if pos == -1 or data[-1] != '\n':
177 176 raise ValueError("Manifest did not end in a newline.")
178 177 positions = [0]
179 178 prev = data[:data.find('\x00')]
180 179 while pos < len(data) - 1 and pos != -1:
181 180 positions.append(pos + 1)
182 181 nexts = data[pos + 1:data.find('\x00', pos + 1)]
183 182 if nexts < prev:
184 183 raise ValueError("Manifest lines not in sorted order.")
185 184 prev = nexts
186 185 pos = data.find("\n", pos + 1)
187 186 return positions
188 187
189 188 def _get(self, index):
190 189 # get the position encoded in pos:
191 190 # positive number is an index in 'data'
192 191 # negative number is in extrapieces
193 192 pos = self.positions[index]
194 193 if pos >= 0:
195 194 return self.data, pos
196 195 return self.extradata[-pos - 1], -1
197 196
198 197 def _getkey(self, pos):
199 198 if pos >= 0:
200 199 return self.data[pos:self.data.find('\x00', pos + 1)]
201 200 return self.extradata[-pos - 1][0]
202 201
203 202 def bsearch(self, key):
204 203 first = 0
205 204 last = len(self.positions) - 1
206 205
207 206 while first <= last:
208 207 midpoint = (first + last)//2
209 208 nextpos = self.positions[midpoint]
210 209 candidate = self._getkey(nextpos)
211 210 r = _cmp(key, candidate)
212 211 if r == 0:
213 212 return midpoint
214 213 else:
215 214 if r < 0:
216 215 last = midpoint - 1
217 216 else:
218 217 first = midpoint + 1
219 218 return -1
220 219
221 220 def bsearch2(self, key):
222 221 # same as the above, but will always return the position
223 222 # done for performance reasons
224 223 first = 0
225 224 last = len(self.positions) - 1
226 225
227 226 while first <= last:
228 227 midpoint = (first + last)//2
229 228 nextpos = self.positions[midpoint]
230 229 candidate = self._getkey(nextpos)
231 230 r = _cmp(key, candidate)
232 231 if r == 0:
233 232 return (midpoint, True)
234 233 else:
235 234 if r < 0:
236 235 last = midpoint - 1
237 236 else:
238 237 first = midpoint + 1
239 238 return (first, False)
240 239
241 240 def __contains__(self, key):
242 241 return self.bsearch(key) != -1
243 242
244 243 def _getflags(self, data, needle, pos):
245 244 start = pos + 41
246 245 end = data.find("\n", start)
247 246 if end == -1:
248 247 end = len(data) - 1
249 248 if start == end:
250 249 return ''
251 250 return self.data[start:end]
252 251
253 252 def __getitem__(self, key):
254 253 if not isinstance(key, str):
255 254 raise TypeError("getitem: manifest keys must be a string.")
256 255 needle = self.bsearch(key)
257 256 if needle == -1:
258 257 raise KeyError
259 258 data, pos = self._get(needle)
260 259 if pos == -1:
261 260 return (data[1], data[2])
262 261 zeropos = data.find('\x00', pos)
263 262 assert 0 <= needle <= len(self.positions)
264 263 assert len(self.extrainfo) == len(self.positions)
265 264 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
266 265 flags = self._getflags(data, needle, zeropos)
267 266 return (hashval, flags)
268 267
269 268 def __delitem__(self, key):
270 269 needle, found = self.bsearch2(key)
271 270 if not found:
272 271 raise KeyError
273 272 cur = self.positions[needle]
274 273 self.positions = self.positions[:needle] + self.positions[needle + 1:]
275 274 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
276 275 if cur >= 0:
277 276 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
278 277
279 278 def __setitem__(self, key, value):
280 279 if not isinstance(key, str):
281 280 raise TypeError("setitem: manifest keys must be a string.")
282 281 if not isinstance(value, tuple) or len(value) != 2:
283 282 raise TypeError("Manifest values must be a tuple of (node, flags).")
284 283 hashval = value[0]
285 284 if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22:
286 285 raise TypeError("node must be a 20-byte string")
287 286 flags = value[1]
288 287 if len(hashval) == 22:
289 288 hashval = hashval[:-1]
290 289 if not isinstance(flags, str) or len(flags) > 1:
291 290 raise TypeError("flags must a 0 or 1 byte string, got %r", flags)
292 291 needle, found = self.bsearch2(key)
293 292 if found:
294 293 # put the item
295 294 pos = self.positions[needle]
296 295 if pos < 0:
297 296 self.extradata[-pos - 1] = (key, hashval, value[1])
298 297 else:
299 298 # just don't bother
300 299 self.extradata.append((key, hashval, value[1]))
301 300 self.positions[needle] = -len(self.extradata)
302 301 else:
303 302 # not found, put it in with extra positions
304 303 self.extradata.append((key, hashval, value[1]))
305 304 self.positions = (self.positions[:needle] + [-len(self.extradata)]
306 305 + self.positions[needle:])
307 306 self.extrainfo = (self.extrainfo[:needle] + [0] +
308 307 self.extrainfo[needle:])
309 308
310 309 def copy(self):
311 310 # XXX call _compact like in C?
312 311 return _lazymanifest(self.data, self.positions, self.extrainfo,
313 312 self.extradata)
314 313
315 314 def _compact(self):
316 315 # hopefully not called TOO often
317 316 if len(self.extradata) == 0:
318 317 return
319 318 l = []
320 319 last_cut = 0
321 320 i = 0
322 321 offset = 0
323 322 self.extrainfo = [0] * len(self.positions)
324 323 while i < len(self.positions):
325 324 if self.positions[i] >= 0:
326 325 cur = self.positions[i]
327 326 last_cut = cur
328 327 while True:
329 328 self.positions[i] = offset
330 329 i += 1
331 330 if i == len(self.positions) or self.positions[i] < 0:
332 331 break
333 332 offset += self.positions[i] - cur
334 333 cur = self.positions[i]
335 334 end_cut = self.data.find('\n', cur)
336 335 if end_cut != -1:
337 336 end_cut += 1
338 337 offset += end_cut - cur
339 338 l.append(self.data[last_cut:end_cut])
340 339 else:
341 340 while i < len(self.positions) and self.positions[i] < 0:
342 341 cur = self.positions[i]
343 342 t = self.extradata[-cur - 1]
344 343 l.append(self._pack(t))
345 344 self.positions[i] = offset
346 345 if len(t[1]) > 20:
347 346 self.extrainfo[i] = ord(t[1][21])
348 347 offset += len(l[-1])
349 348 i += 1
350 349 self.data = ''.join(l)
351 350 self.extradata = []
352 351
353 352 def _pack(self, d):
354 353 return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n'
355 354
356 355 def text(self):
357 356 self._compact()
358 357 return self.data
359 358
360 359 def diff(self, m2, clean=False):
361 360 '''Finds changes between the current manifest and m2.'''
362 361 # XXX think whether efficiency matters here
363 362 diff = {}
364 363
365 364 for fn, e1, flags in self.iterentries():
366 365 if fn not in m2:
367 366 diff[fn] = (e1, flags), (None, '')
368 367 else:
369 368 e2 = m2[fn]
370 369 if (e1, flags) != e2:
371 370 diff[fn] = (e1, flags), e2
372 371 elif clean:
373 372 diff[fn] = None
374 373
375 374 for fn, e2, flags in m2.iterentries():
376 375 if fn not in self:
377 376 diff[fn] = (None, ''), (e2, flags)
378 377
379 378 return diff
380 379
381 380 def iterentries(self):
382 381 return lazymanifestiterentries(self)
383 382
384 383 def iterkeys(self):
385 384 return lazymanifestiter(self)
386 385
387 386 def __iter__(self):
388 387 return lazymanifestiter(self)
389 388
390 389 def __len__(self):
391 390 return len(self.positions)
392 391
393 392 def filtercopy(self, filterfn):
394 393 # XXX should be optimized
395 394 c = _lazymanifest('')
396 395 for f, n, fl in self.iterentries():
397 396 if filterfn(f):
398 397 c[f] = n, fl
399 398 return c
400 399
401 400 try:
402 401 _lazymanifest = parsers.lazymanifest
403 402 except AttributeError:
404 403 pass
405 404
406 405 class manifestdict(object):
407 406 def __init__(self, data=''):
408 407 if data.startswith('\0'):
409 408 #_lazymanifest can not parse v2
410 409 self._lm = _lazymanifest('')
411 410 for f, n, fl in _parsev2(data):
412 411 self._lm[f] = n, fl
413 412 else:
414 413 self._lm = _lazymanifest(data)
415 414
416 415 def __getitem__(self, key):
417 416 return self._lm[key][0]
418 417
419 418 def find(self, key):
420 419 return self._lm[key]
421 420
422 421 def __len__(self):
423 422 return len(self._lm)
424 423
425 424 def __nonzero__(self):
426 425 # nonzero is covered by the __len__ function, but implementing it here
427 426 # makes it easier for extensions to override.
428 427 return len(self._lm) != 0
429 428
430 429 def __setitem__(self, key, node):
431 430 self._lm[key] = node, self.flags(key, '')
432 431
433 432 def __contains__(self, key):
434 433 return key in self._lm
435 434
436 435 def __delitem__(self, key):
437 436 del self._lm[key]
438 437
439 438 def __iter__(self):
440 439 return self._lm.__iter__()
441 440
442 441 def iterkeys(self):
443 442 return self._lm.iterkeys()
444 443
445 444 def keys(self):
446 445 return list(self.iterkeys())
447 446
448 447 def filesnotin(self, m2, match=None):
449 448 '''Set of files in this manifest that are not in the other'''
450 449 if match:
451 450 m1 = self.matches(match)
452 451 m2 = m2.matches(match)
453 452 return m1.filesnotin(m2)
454 453 diff = self.diff(m2)
455 454 files = set(filepath
456 455 for filepath, hashflags in diff.iteritems()
457 456 if hashflags[1][0] is None)
458 457 return files
459 458
460 459 @propertycache
461 460 def _dirs(self):
462 461 return util.dirs(self)
463 462
464 463 def dirs(self):
465 464 return self._dirs
466 465
467 466 def hasdir(self, dir):
468 467 return dir in self._dirs
469 468
470 469 def _filesfastpath(self, match):
471 470 '''Checks whether we can correctly and quickly iterate over matcher
472 471 files instead of over manifest files.'''
473 472 files = match.files()
474 473 return (len(files) < 100 and (match.isexact() or
475 474 (match.prefix() and all(fn in self for fn in files))))
476 475
477 476 def walk(self, match):
478 477 '''Generates matching file names.
479 478
480 479 Equivalent to manifest.matches(match).iterkeys(), but without creating
481 480 an entirely new manifest.
482 481
483 482 It also reports nonexistent files by marking them bad with match.bad().
484 483 '''
485 484 if match.always():
486 485 for f in iter(self):
487 486 yield f
488 487 return
489 488
490 489 fset = set(match.files())
491 490
492 491 # avoid the entire walk if we're only looking for specific files
493 492 if self._filesfastpath(match):
494 493 for fn in sorted(fset):
495 494 yield fn
496 495 return
497 496
498 497 for fn in self:
499 498 if fn in fset:
500 499 # specified pattern is the exact name
501 500 fset.remove(fn)
502 501 if match(fn):
503 502 yield fn
504 503
505 504 # for dirstate.walk, files=['.'] means "walk the whole tree".
506 505 # follow that here, too
507 506 fset.discard('.')
508 507
509 508 for fn in sorted(fset):
510 509 if not self.hasdir(fn):
511 510 match.bad(fn, None)
512 511
513 512 def matches(self, match):
514 513 '''generate a new manifest filtered by the match argument'''
515 514 if match.always():
516 515 return self.copy()
517 516
518 517 if self._filesfastpath(match):
519 518 m = manifestdict()
520 519 lm = self._lm
521 520 for fn in match.files():
522 521 if fn in lm:
523 522 m._lm[fn] = lm[fn]
524 523 return m
525 524
526 525 m = manifestdict()
527 526 m._lm = self._lm.filtercopy(match)
528 527 return m
529 528
530 529 def diff(self, m2, match=None, clean=False):
531 530 '''Finds changes between the current manifest and m2.
532 531
533 532 Args:
534 533 m2: the manifest to which this manifest should be compared.
535 534 clean: if true, include files unchanged between these manifests
536 535 with a None value in the returned dictionary.
537 536
538 537 The result is returned as a dict with filename as key and
539 538 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
540 539 nodeid in the current/other manifest and fl1/fl2 is the flag
541 540 in the current/other manifest. Where the file does not exist,
542 541 the nodeid will be None and the flags will be the empty
543 542 string.
544 543 '''
545 544 if match:
546 545 m1 = self.matches(match)
547 546 m2 = m2.matches(match)
548 547 return m1.diff(m2, clean=clean)
549 548 return self._lm.diff(m2._lm, clean)
550 549
551 550 def setflag(self, key, flag):
552 551 self._lm[key] = self[key], flag
553 552
554 553 def get(self, key, default=None):
555 554 try:
556 555 return self._lm[key][0]
557 556 except KeyError:
558 557 return default
559 558
560 559 def flags(self, key, default=''):
561 560 try:
562 561 return self._lm[key][1]
563 562 except KeyError:
564 563 return default
565 564
566 565 def copy(self):
567 566 c = manifestdict()
568 567 c._lm = self._lm.copy()
569 568 return c
570 569
571 570 def iteritems(self):
572 571 return (x[:2] for x in self._lm.iterentries())
573 572
574 573 def iterentries(self):
575 574 return self._lm.iterentries()
576 575
577 576 def text(self, usemanifestv2=False):
578 577 if usemanifestv2:
579 578 return _textv2(self._lm.iterentries())
580 579 else:
581 580 # use (probably) native version for v1
582 581 return self._lm.text()
583 582
584 583 def fastdelta(self, base, changes):
585 584 """Given a base manifest text as an array.array and a list of changes
586 585 relative to that text, compute a delta that can be used by revlog.
587 586 """
588 587 delta = []
589 588 dstart = None
590 589 dend = None
591 590 dline = [""]
592 591 start = 0
593 592 # zero copy representation of base as a buffer
594 593 addbuf = util.buffer(base)
595 594
596 595 changes = list(changes)
597 596 if len(changes) < 1000:
598 597 # start with a readonly loop that finds the offset of
599 598 # each line and creates the deltas
600 599 for f, todelete in changes:
601 600 # bs will either be the index of the item or the insert point
602 601 start, end = _msearch(addbuf, f, start)
603 602 if not todelete:
604 603 h, fl = self._lm[f]
605 604 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
606 605 else:
607 606 if start == end:
608 607 # item we want to delete was not found, error out
609 608 raise AssertionError(
610 609 _("failed to remove %s from manifest") % f)
611 610 l = ""
612 611 if dstart is not None and dstart <= start and dend >= start:
613 612 if dend < end:
614 613 dend = end
615 614 if l:
616 615 dline.append(l)
617 616 else:
618 617 if dstart is not None:
619 618 delta.append([dstart, dend, "".join(dline)])
620 619 dstart = start
621 620 dend = end
622 621 dline = [l]
623 622
624 623 if dstart is not None:
625 624 delta.append([dstart, dend, "".join(dline)])
626 625 # apply the delta to the base, and get a delta for addrevision
627 626 deltatext, arraytext = _addlistdelta(base, delta)
628 627 else:
629 628 # For large changes, it's much cheaper to just build the text and
630 629 # diff it.
631 arraytext = array.array('c', self.text())
632 deltatext = mdiff.textdiff(base, arraytext)
630 arraytext = bytearray(self.text())
631 deltatext = mdiff.textdiff(
632 util.buffer(base), util.buffer(arraytext))
633 633
634 634 return arraytext, deltatext
635 635
636 636 def _msearch(m, s, lo=0, hi=None):
637 637 '''return a tuple (start, end) that says where to find s within m.
638 638
639 639 If the string is found m[start:end] are the line containing
640 640 that string. If start == end the string was not found and
641 641 they indicate the proper sorted insertion point.
642 642
643 643 m should be a buffer or a string
644 644 s is a string'''
645 645 def advance(i, c):
646 646 while i < lenm and m[i] != c:
647 647 i += 1
648 648 return i
649 649 if not s:
650 650 return (lo, lo)
651 651 lenm = len(m)
652 652 if not hi:
653 653 hi = lenm
654 654 while lo < hi:
655 655 mid = (lo + hi) // 2
656 656 start = mid
657 657 while start > 0 and m[start - 1] != '\n':
658 658 start -= 1
659 659 end = advance(start, '\0')
660 660 if m[start:end] < s:
661 661 # we know that after the null there are 40 bytes of sha1
662 662 # this translates to the bisect lo = mid + 1
663 663 lo = advance(end + 40, '\n') + 1
664 664 else:
665 665 # this translates to the bisect hi = mid
666 666 hi = start
667 667 end = advance(lo, '\0')
668 668 found = m[lo:end]
669 669 if s == found:
670 670 # we know that after the null there are 40 bytes of sha1
671 671 end = advance(end + 40, '\n')
672 672 return (lo, end + 1)
673 673 else:
674 674 return (lo, lo)
675 675
676 676 def _checkforbidden(l):
677 677 """Check filenames for illegal characters."""
678 678 for f in l:
679 679 if '\n' in f or '\r' in f:
680 680 raise error.RevlogError(
681 681 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
682 682
683 683
684 684 # apply the changes collected during the bisect loop to our addlist
685 685 # return a delta suitable for addrevision
686 686 def _addlistdelta(addlist, x):
687 687 # for large addlist arrays, building a new array is cheaper
688 688 # than repeatedly modifying the existing one
689 689 currentposition = 0
690 newaddlist = array.array('c')
690 newaddlist = bytearray()
691 691
692 692 for start, end, content in x:
693 693 newaddlist += addlist[currentposition:start]
694 694 if content:
695 newaddlist += array.array('c', content)
695 newaddlist += bytearray(content)
696 696
697 697 currentposition = end
698 698
699 699 newaddlist += addlist[currentposition:]
700 700
701 701 deltatext = "".join(struct.pack(">lll", start, end, len(content))
702 702 + content for start, end, content in x)
703 703 return deltatext, newaddlist
704 704
705 705 def _splittopdir(f):
706 706 if '/' in f:
707 707 dir, subpath = f.split('/', 1)
708 708 return dir + '/', subpath
709 709 else:
710 710 return '', f
711 711
712 712 _noop = lambda s: None
713 713
714 714 class treemanifest(object):
715 715 def __init__(self, dir='', text=''):
716 716 self._dir = dir
717 717 self._node = revlog.nullid
718 718 self._loadfunc = _noop
719 719 self._copyfunc = _noop
720 720 self._dirty = False
721 721 self._dirs = {}
722 722 # Using _lazymanifest here is a little slower than plain old dicts
723 723 self._files = {}
724 724 self._flags = {}
725 725 if text:
726 726 def readsubtree(subdir, subm):
727 727 raise AssertionError('treemanifest constructor only accepts '
728 728 'flat manifests')
729 729 self.parse(text, readsubtree)
730 730 self._dirty = True # Mark flat manifest dirty after parsing
731 731
732 732 def _subpath(self, path):
733 733 return self._dir + path
734 734
735 735 def __len__(self):
736 736 self._load()
737 737 size = len(self._files)
738 738 for m in self._dirs.values():
739 739 size += m.__len__()
740 740 return size
741 741
742 742 def _isempty(self):
743 743 self._load() # for consistency; already loaded by all callers
744 744 return (not self._files and (not self._dirs or
745 745 all(m._isempty() for m in self._dirs.values())))
746 746
747 747 def __repr__(self):
748 748 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
749 749 (self._dir, revlog.hex(self._node),
750 750 bool(self._loadfunc is _noop),
751 751 self._dirty, id(self)))
752 752
753 753 def dir(self):
754 754 '''The directory that this tree manifest represents, including a
755 755 trailing '/'. Empty string for the repo root directory.'''
756 756 return self._dir
757 757
758 758 def node(self):
759 759 '''This node of this instance. nullid for unsaved instances. Should
760 760 be updated when the instance is read or written from a revlog.
761 761 '''
762 762 assert not self._dirty
763 763 return self._node
764 764
765 765 def setnode(self, node):
766 766 self._node = node
767 767 self._dirty = False
768 768
769 769 def iterentries(self):
770 770 self._load()
771 771 for p, n in sorted(self._dirs.items() + self._files.items()):
772 772 if p in self._files:
773 773 yield self._subpath(p), n, self._flags.get(p, '')
774 774 else:
775 775 for x in n.iterentries():
776 776 yield x
777 777
778 778 def iteritems(self):
779 779 self._load()
780 780 for p, n in sorted(self._dirs.items() + self._files.items()):
781 781 if p in self._files:
782 782 yield self._subpath(p), n
783 783 else:
784 784 for f, sn in n.iteritems():
785 785 yield f, sn
786 786
787 787 def iterkeys(self):
788 788 self._load()
789 789 for p in sorted(self._dirs.keys() + self._files.keys()):
790 790 if p in self._files:
791 791 yield self._subpath(p)
792 792 else:
793 793 for f in self._dirs[p].iterkeys():
794 794 yield f
795 795
796 796 def keys(self):
797 797 return list(self.iterkeys())
798 798
799 799 def __iter__(self):
800 800 return self.iterkeys()
801 801
802 802 def __contains__(self, f):
803 803 if f is None:
804 804 return False
805 805 self._load()
806 806 dir, subpath = _splittopdir(f)
807 807 if dir:
808 808 if dir not in self._dirs:
809 809 return False
810 810 return self._dirs[dir].__contains__(subpath)
811 811 else:
812 812 return f in self._files
813 813
814 814 def get(self, f, default=None):
815 815 self._load()
816 816 dir, subpath = _splittopdir(f)
817 817 if dir:
818 818 if dir not in self._dirs:
819 819 return default
820 820 return self._dirs[dir].get(subpath, default)
821 821 else:
822 822 return self._files.get(f, default)
823 823
824 824 def __getitem__(self, f):
825 825 self._load()
826 826 dir, subpath = _splittopdir(f)
827 827 if dir:
828 828 return self._dirs[dir].__getitem__(subpath)
829 829 else:
830 830 return self._files[f]
831 831
832 832 def flags(self, f):
833 833 self._load()
834 834 dir, subpath = _splittopdir(f)
835 835 if dir:
836 836 if dir not in self._dirs:
837 837 return ''
838 838 return self._dirs[dir].flags(subpath)
839 839 else:
840 840 if f in self._dirs:
841 841 return ''
842 842 return self._flags.get(f, '')
843 843
844 844 def find(self, f):
845 845 self._load()
846 846 dir, subpath = _splittopdir(f)
847 847 if dir:
848 848 return self._dirs[dir].find(subpath)
849 849 else:
850 850 return self._files[f], self._flags.get(f, '')
851 851
852 852 def __delitem__(self, f):
853 853 self._load()
854 854 dir, subpath = _splittopdir(f)
855 855 if dir:
856 856 self._dirs[dir].__delitem__(subpath)
857 857 # If the directory is now empty, remove it
858 858 if self._dirs[dir]._isempty():
859 859 del self._dirs[dir]
860 860 else:
861 861 del self._files[f]
862 862 if f in self._flags:
863 863 del self._flags[f]
864 864 self._dirty = True
865 865
866 866 def __setitem__(self, f, n):
867 867 assert n is not None
868 868 self._load()
869 869 dir, subpath = _splittopdir(f)
870 870 if dir:
871 871 if dir not in self._dirs:
872 872 self._dirs[dir] = treemanifest(self._subpath(dir))
873 873 self._dirs[dir].__setitem__(subpath, n)
874 874 else:
875 875 self._files[f] = n[:21] # to match manifestdict's behavior
876 876 self._dirty = True
877 877
878 878 def _load(self):
879 879 if self._loadfunc is not _noop:
880 880 lf, self._loadfunc = self._loadfunc, _noop
881 881 lf(self)
882 882 elif self._copyfunc is not _noop:
883 883 cf, self._copyfunc = self._copyfunc, _noop
884 884 cf(self)
885 885
886 886 def setflag(self, f, flags):
887 887 """Set the flags (symlink, executable) for path f."""
888 888 self._load()
889 889 dir, subpath = _splittopdir(f)
890 890 if dir:
891 891 if dir not in self._dirs:
892 892 self._dirs[dir] = treemanifest(self._subpath(dir))
893 893 self._dirs[dir].setflag(subpath, flags)
894 894 else:
895 895 self._flags[f] = flags
896 896 self._dirty = True
897 897
898 898 def copy(self):
899 899 copy = treemanifest(self._dir)
900 900 copy._node = self._node
901 901 copy._dirty = self._dirty
902 902 if self._copyfunc is _noop:
903 903 def _copyfunc(s):
904 904 self._load()
905 905 for d in self._dirs:
906 906 s._dirs[d] = self._dirs[d].copy()
907 907 s._files = dict.copy(self._files)
908 908 s._flags = dict.copy(self._flags)
909 909 if self._loadfunc is _noop:
910 910 _copyfunc(copy)
911 911 else:
912 912 copy._copyfunc = _copyfunc
913 913 else:
914 914 copy._copyfunc = self._copyfunc
915 915 return copy
916 916
917 917 def filesnotin(self, m2, match=None):
918 918 '''Set of files in this manifest that are not in the other'''
919 919 if match:
920 920 m1 = self.matches(match)
921 921 m2 = m2.matches(match)
922 922 return m1.filesnotin(m2)
923 923
924 924 files = set()
925 925 def _filesnotin(t1, t2):
926 926 if t1._node == t2._node and not t1._dirty and not t2._dirty:
927 927 return
928 928 t1._load()
929 929 t2._load()
930 930 for d, m1 in t1._dirs.iteritems():
931 931 if d in t2._dirs:
932 932 m2 = t2._dirs[d]
933 933 _filesnotin(m1, m2)
934 934 else:
935 935 files.update(m1.iterkeys())
936 936
937 937 for fn in t1._files.iterkeys():
938 938 if fn not in t2._files:
939 939 files.add(t1._subpath(fn))
940 940
941 941 _filesnotin(self, m2)
942 942 return files
943 943
944 944 @propertycache
945 945 def _alldirs(self):
946 946 return util.dirs(self)
947 947
948 948 def dirs(self):
949 949 return self._alldirs
950 950
951 951 def hasdir(self, dir):
952 952 self._load()
953 953 topdir, subdir = _splittopdir(dir)
954 954 if topdir:
955 955 if topdir in self._dirs:
956 956 return self._dirs[topdir].hasdir(subdir)
957 957 return False
958 958 return (dir + '/') in self._dirs
959 959
960 960 def walk(self, match):
961 961 '''Generates matching file names.
962 962
963 963 Equivalent to manifest.matches(match).iterkeys(), but without creating
964 964 an entirely new manifest.
965 965
966 966 It also reports nonexistent files by marking them bad with match.bad().
967 967 '''
968 968 if match.always():
969 969 for f in iter(self):
970 970 yield f
971 971 return
972 972
973 973 fset = set(match.files())
974 974
975 975 for fn in self._walk(match):
976 976 if fn in fset:
977 977 # specified pattern is the exact name
978 978 fset.remove(fn)
979 979 yield fn
980 980
981 981 # for dirstate.walk, files=['.'] means "walk the whole tree".
982 982 # follow that here, too
983 983 fset.discard('.')
984 984
985 985 for fn in sorted(fset):
986 986 if not self.hasdir(fn):
987 987 match.bad(fn, None)
988 988
989 989 def _walk(self, match):
990 990 '''Recursively generates matching file names for walk().'''
991 991 if not match.visitdir(self._dir[:-1] or '.'):
992 992 return
993 993
994 994 # yield this dir's files and walk its submanifests
995 995 self._load()
996 996 for p in sorted(self._dirs.keys() + self._files.keys()):
997 997 if p in self._files:
998 998 fullp = self._subpath(p)
999 999 if match(fullp):
1000 1000 yield fullp
1001 1001 else:
1002 1002 for f in self._dirs[p]._walk(match):
1003 1003 yield f
1004 1004
1005 1005 def matches(self, match):
1006 1006 '''generate a new manifest filtered by the match argument'''
1007 1007 if match.always():
1008 1008 return self.copy()
1009 1009
1010 1010 return self._matches(match)
1011 1011
1012 1012 def _matches(self, match):
1013 1013 '''recursively generate a new manifest filtered by the match argument.
1014 1014 '''
1015 1015
1016 1016 visit = match.visitdir(self._dir[:-1] or '.')
1017 1017 if visit == 'all':
1018 1018 return self.copy()
1019 1019 ret = treemanifest(self._dir)
1020 1020 if not visit:
1021 1021 return ret
1022 1022
1023 1023 self._load()
1024 1024 for fn in self._files:
1025 1025 fullp = self._subpath(fn)
1026 1026 if not match(fullp):
1027 1027 continue
1028 1028 ret._files[fn] = self._files[fn]
1029 1029 if fn in self._flags:
1030 1030 ret._flags[fn] = self._flags[fn]
1031 1031
1032 1032 for dir, subm in self._dirs.iteritems():
1033 1033 m = subm._matches(match)
1034 1034 if not m._isempty():
1035 1035 ret._dirs[dir] = m
1036 1036
1037 1037 if not ret._isempty():
1038 1038 ret._dirty = True
1039 1039 return ret
1040 1040
1041 1041 def diff(self, m2, match=None, clean=False):
1042 1042 '''Finds changes between the current manifest and m2.
1043 1043
1044 1044 Args:
1045 1045 m2: the manifest to which this manifest should be compared.
1046 1046 clean: if true, include files unchanged between these manifests
1047 1047 with a None value in the returned dictionary.
1048 1048
1049 1049 The result is returned as a dict with filename as key and
1050 1050 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1051 1051 nodeid in the current/other manifest and fl1/fl2 is the flag
1052 1052 in the current/other manifest. Where the file does not exist,
1053 1053 the nodeid will be None and the flags will be the empty
1054 1054 string.
1055 1055 '''
1056 1056 if match:
1057 1057 m1 = self.matches(match)
1058 1058 m2 = m2.matches(match)
1059 1059 return m1.diff(m2, clean=clean)
1060 1060 result = {}
1061 1061 emptytree = treemanifest()
1062 1062 def _diff(t1, t2):
1063 1063 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1064 1064 return
1065 1065 t1._load()
1066 1066 t2._load()
1067 1067 for d, m1 in t1._dirs.iteritems():
1068 1068 m2 = t2._dirs.get(d, emptytree)
1069 1069 _diff(m1, m2)
1070 1070
1071 1071 for d, m2 in t2._dirs.iteritems():
1072 1072 if d not in t1._dirs:
1073 1073 _diff(emptytree, m2)
1074 1074
1075 1075 for fn, n1 in t1._files.iteritems():
1076 1076 fl1 = t1._flags.get(fn, '')
1077 1077 n2 = t2._files.get(fn, None)
1078 1078 fl2 = t2._flags.get(fn, '')
1079 1079 if n1 != n2 or fl1 != fl2:
1080 1080 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1081 1081 elif clean:
1082 1082 result[t1._subpath(fn)] = None
1083 1083
1084 1084 for fn, n2 in t2._files.iteritems():
1085 1085 if fn not in t1._files:
1086 1086 fl2 = t2._flags.get(fn, '')
1087 1087 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
1088 1088
1089 1089 _diff(self, m2)
1090 1090 return result
1091 1091
1092 1092 def unmodifiedsince(self, m2):
1093 1093 return not self._dirty and not m2._dirty and self._node == m2._node
1094 1094
1095 1095 def parse(self, text, readsubtree):
1096 1096 for f, n, fl in _parse(text):
1097 1097 if fl == 't':
1098 1098 f = f + '/'
1099 1099 self._dirs[f] = readsubtree(self._subpath(f), n)
1100 1100 elif '/' in f:
1101 1101 # This is a flat manifest, so use __setitem__ and setflag rather
1102 1102 # than assigning directly to _files and _flags, so we can
1103 1103 # assign a path in a subdirectory, and to mark dirty (compared
1104 1104 # to nullid).
1105 1105 self[f] = n
1106 1106 if fl:
1107 1107 self.setflag(f, fl)
1108 1108 else:
1109 1109 # Assigning to _files and _flags avoids marking as dirty,
1110 1110 # and should be a little faster.
1111 1111 self._files[f] = n
1112 1112 if fl:
1113 1113 self._flags[f] = fl
1114 1114
1115 1115 def text(self, usemanifestv2=False):
1116 1116 """Get the full data of this manifest as a bytestring."""
1117 1117 self._load()
1118 1118 return _text(self.iterentries(), usemanifestv2)
1119 1119
1120 1120 def dirtext(self, usemanifestv2=False):
1121 1121 """Get the full data of this directory as a bytestring. Make sure that
1122 1122 any submanifests have been written first, so their nodeids are correct.
1123 1123 """
1124 1124 self._load()
1125 1125 flags = self.flags
1126 1126 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
1127 1127 files = [(f, self._files[f], flags(f)) for f in self._files]
1128 1128 return _text(sorted(dirs + files), usemanifestv2)
1129 1129
1130 1130 def read(self, gettext, readsubtree):
1131 1131 def _load_for_read(s):
1132 1132 s.parse(gettext(), readsubtree)
1133 1133 s._dirty = False
1134 1134 self._loadfunc = _load_for_read
1135 1135
1136 1136 def writesubtrees(self, m1, m2, writesubtree):
1137 1137 self._load() # for consistency; should never have any effect here
1138 1138 m1._load()
1139 1139 m2._load()
1140 1140 emptytree = treemanifest()
1141 1141 for d, subm in self._dirs.iteritems():
1142 1142 subp1 = m1._dirs.get(d, emptytree)._node
1143 1143 subp2 = m2._dirs.get(d, emptytree)._node
1144 1144 if subp1 == revlog.nullid:
1145 1145 subp1, subp2 = subp2, subp1
1146 1146 writesubtree(subm, subp1, subp2)
1147 1147
1148 1148 class manifestrevlog(revlog.revlog):
1149 1149 '''A revlog that stores manifest texts. This is responsible for caching the
1150 1150 full-text manifest contents.
1151 1151 '''
1152 1152 def __init__(self, opener, dir='', dirlogcache=None, indexfile=None):
1153 1153 """Constructs a new manifest revlog
1154 1154
1155 1155 `indexfile` - used by extensions to have two manifests at once, like
1156 1156 when transitioning between flatmanifeset and treemanifests.
1157 1157 """
1158 1158 # During normal operations, we expect to deal with not more than four
1159 1159 # revs at a time (such as during commit --amend). When rebasing large
1160 1160 # stacks of commits, the number can go up, hence the config knob below.
1161 1161 cachesize = 4
1162 1162 usetreemanifest = False
1163 1163 usemanifestv2 = False
1164 1164 opts = getattr(opener, 'options', None)
1165 1165 if opts is not None:
1166 1166 cachesize = opts.get('manifestcachesize', cachesize)
1167 1167 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1168 1168 usemanifestv2 = opts.get('manifestv2', usemanifestv2)
1169 1169
1170 1170 self._treeondisk = usetreemanifest
1171 1171 self._usemanifestv2 = usemanifestv2
1172 1172
1173 1173 self._fulltextcache = util.lrucachedict(cachesize)
1174 1174
1175 1175 if dir:
1176 1176 assert self._treeondisk, 'opts is %r' % opts
1177 1177 if not dir.endswith('/'):
1178 1178 dir = dir + '/'
1179 1179
1180 1180 if indexfile is None:
1181 1181 indexfile = '00manifest.i'
1182 1182 if dir:
1183 1183 indexfile = "meta/" + dir + indexfile
1184 1184
1185 1185 self._dir = dir
1186 1186 # The dirlogcache is kept on the root manifest log
1187 1187 if dir:
1188 1188 self._dirlogcache = dirlogcache
1189 1189 else:
1190 1190 self._dirlogcache = {'': self}
1191 1191
1192 1192 super(manifestrevlog, self).__init__(opener, indexfile,
1193 1193 checkambig=bool(dir))
1194 1194
1195 1195 @property
1196 1196 def fulltextcache(self):
1197 1197 return self._fulltextcache
1198 1198
1199 1199 def clearcaches(self):
1200 1200 super(manifestrevlog, self).clearcaches()
1201 1201 self._fulltextcache.clear()
1202 1202 self._dirlogcache = {'': self}
1203 1203
1204 1204 def dirlog(self, dir):
1205 1205 if dir:
1206 1206 assert self._treeondisk
1207 1207 if dir not in self._dirlogcache:
1208 1208 self._dirlogcache[dir] = manifestrevlog(self.opener, dir,
1209 1209 self._dirlogcache)
1210 1210 return self._dirlogcache[dir]
1211 1211
1212 1212 def add(self, m, transaction, link, p1, p2, added, removed, readtree=None):
1213 1213 if (p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta')
1214 1214 and not self._usemanifestv2):
1215 1215 # If our first parent is in the manifest cache, we can
1216 1216 # compute a delta here using properties we know about the
1217 1217 # manifest up-front, which may save time later for the
1218 1218 # revlog layer.
1219 1219
1220 1220 _checkforbidden(added)
1221 1221 # combine the changed lists into one sorted iterator
1222 1222 work = heapq.merge([(x, False) for x in added],
1223 1223 [(x, True) for x in removed])
1224 1224
1225 1225 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1226 1226 cachedelta = self.rev(p1), deltatext
1227 1227 text = util.buffer(arraytext)
1228 1228 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
1229 1229 else:
1230 1230 # The first parent manifest isn't already loaded, so we'll
1231 1231 # just encode a fulltext of the manifest and pass that
1232 1232 # through to the revlog layer, and let it handle the delta
1233 1233 # process.
1234 1234 if self._treeondisk:
1235 1235 assert readtree, "readtree must be set for treemanifest writes"
1236 1236 m1 = readtree(self._dir, p1)
1237 1237 m2 = readtree(self._dir, p2)
1238 1238 n = self._addtree(m, transaction, link, m1, m2, readtree)
1239 1239 arraytext = None
1240 1240 else:
1241 1241 text = m.text(self._usemanifestv2)
1242 1242 n = self.addrevision(text, transaction, link, p1, p2)
1243 arraytext = array.array('c', text)
1243 arraytext = bytearray(text)
1244 1244
1245 1245 if arraytext is not None:
1246 1246 self.fulltextcache[n] = arraytext
1247 1247
1248 1248 return n
1249 1249
1250 1250 def _addtree(self, m, transaction, link, m1, m2, readtree):
1251 1251 # If the manifest is unchanged compared to one parent,
1252 1252 # don't write a new revision
1253 1253 if self._dir != '' and (m.unmodifiedsince(m1) or m.unmodifiedsince(m2)):
1254 1254 return m.node()
1255 1255 def writesubtree(subm, subp1, subp2):
1256 1256 sublog = self.dirlog(subm.dir())
1257 1257 sublog.add(subm, transaction, link, subp1, subp2, None, None,
1258 1258 readtree=readtree)
1259 1259 m.writesubtrees(m1, m2, writesubtree)
1260 1260 text = m.dirtext(self._usemanifestv2)
1261 1261 n = None
1262 1262 if self._dir != '':
1263 1263 # Double-check whether contents are unchanged to one parent
1264 1264 if text == m1.dirtext(self._usemanifestv2):
1265 1265 n = m1.node()
1266 1266 elif text == m2.dirtext(self._usemanifestv2):
1267 1267 n = m2.node()
1268 1268
1269 1269 if not n:
1270 1270 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
1271 1271
1272 1272 # Save nodeid so parent manifest can calculate its nodeid
1273 1273 m.setnode(n)
1274 1274 return n
1275 1275
1276 1276 class manifestlog(object):
1277 1277 """A collection class representing the collection of manifest snapshots
1278 1278 referenced by commits in the repository.
1279 1279
1280 1280 In this situation, 'manifest' refers to the abstract concept of a snapshot
1281 1281 of the list of files in the given commit. Consumers of the output of this
1282 1282 class do not care about the implementation details of the actual manifests
1283 1283 they receive (i.e. tree or flat or lazily loaded, etc)."""
1284 1284 def __init__(self, opener, repo):
1285 1285 usetreemanifest = False
1286 1286 cachesize = 4
1287 1287
1288 1288 opts = getattr(opener, 'options', None)
1289 1289 if opts is not None:
1290 1290 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1291 1291 cachesize = opts.get('manifestcachesize', cachesize)
1292 1292 self._treeinmem = usetreemanifest
1293 1293
1294 1294 self._oldmanifest = repo._constructmanifest()
1295 1295 self._revlog = self._oldmanifest
1296 1296
1297 1297 # A cache of the manifestctx or treemanifestctx for each directory
1298 1298 self._dirmancache = {}
1299 1299 self._dirmancache[''] = util.lrucachedict(cachesize)
1300 1300
1301 1301 self.cachesize = cachesize
1302 1302
1303 1303 def __getitem__(self, node):
1304 1304 """Retrieves the manifest instance for the given node. Throws a
1305 1305 LookupError if not found.
1306 1306 """
1307 1307 return self.get('', node)
1308 1308
1309 1309 def get(self, dir, node, verify=True):
1310 1310 """Retrieves the manifest instance for the given node. Throws a
1311 1311 LookupError if not found.
1312 1312
1313 1313 `verify` - if True an exception will be thrown if the node is not in
1314 1314 the revlog
1315 1315 """
1316 1316 if node in self._dirmancache.get(dir, ()):
1317 1317 cachemf = self._dirmancache[dir][node]
1318 1318 # The old manifest may put non-ctx manifests in the cache, so
1319 1319 # skip those since they don't implement the full api.
1320 1320 if (isinstance(cachemf, manifestctx) or
1321 1321 isinstance(cachemf, treemanifestctx)):
1322 1322 return cachemf
1323 1323
1324 1324 if dir:
1325 1325 if self._revlog._treeondisk:
1326 1326 if verify:
1327 1327 dirlog = self._revlog.dirlog(dir)
1328 1328 if node not in dirlog.nodemap:
1329 1329 raise LookupError(node, dirlog.indexfile,
1330 1330 _('no node'))
1331 1331 m = treemanifestctx(self, dir, node)
1332 1332 else:
1333 1333 raise error.Abort(
1334 1334 _("cannot ask for manifest directory '%s' in a flat "
1335 1335 "manifest") % dir)
1336 1336 else:
1337 1337 if verify:
1338 1338 if node not in self._revlog.nodemap:
1339 1339 raise LookupError(node, self._revlog.indexfile,
1340 1340 _('no node'))
1341 1341 if self._treeinmem:
1342 1342 m = treemanifestctx(self, '', node)
1343 1343 else:
1344 1344 m = manifestctx(self, node)
1345 1345
1346 1346 if node != revlog.nullid:
1347 1347 mancache = self._dirmancache.get(dir)
1348 1348 if not mancache:
1349 1349 mancache = util.lrucachedict(self.cachesize)
1350 1350 self._dirmancache[dir] = mancache
1351 1351 mancache[node] = m
1352 1352 return m
1353 1353
1354 1354 def clearcaches(self):
1355 1355 self._dirmancache.clear()
1356 1356 self._revlog.clearcaches()
1357 1357
1358 1358 class memmanifestctx(object):
1359 1359 def __init__(self, manifestlog):
1360 1360 self._manifestlog = manifestlog
1361 1361 self._manifestdict = manifestdict()
1362 1362
1363 1363 def _revlog(self):
1364 1364 return self._manifestlog._revlog
1365 1365
1366 1366 def new(self):
1367 1367 return memmanifestctx(self._manifestlog)
1368 1368
1369 1369 def copy(self):
1370 1370 memmf = memmanifestctx(self._manifestlog)
1371 1371 memmf._manifestdict = self.read().copy()
1372 1372 return memmf
1373 1373
1374 1374 def read(self):
1375 1375 return self._manifestdict
1376 1376
1377 1377 def write(self, transaction, link, p1, p2, added, removed):
1378 1378 return self._revlog().add(self._manifestdict, transaction, link, p1, p2,
1379 1379 added, removed)
1380 1380
1381 1381 class manifestctx(object):
1382 1382 """A class representing a single revision of a manifest, including its
1383 1383 contents, its parent revs, and its linkrev.
1384 1384 """
1385 1385 def __init__(self, manifestlog, node):
1386 1386 self._manifestlog = manifestlog
1387 1387 self._data = None
1388 1388
1389 1389 self._node = node
1390 1390
1391 1391 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1392 1392 # but let's add it later when something needs it and we can load it
1393 1393 # lazily.
1394 1394 #self.p1, self.p2 = revlog.parents(node)
1395 1395 #rev = revlog.rev(node)
1396 1396 #self.linkrev = revlog.linkrev(rev)
1397 1397
1398 1398 def _revlog(self):
1399 1399 return self._manifestlog._revlog
1400 1400
1401 1401 def node(self):
1402 1402 return self._node
1403 1403
1404 1404 def new(self):
1405 1405 return memmanifestctx(self._manifestlog)
1406 1406
1407 1407 def copy(self):
1408 1408 memmf = memmanifestctx(self._manifestlog)
1409 1409 memmf._manifestdict = self.read().copy()
1410 1410 return memmf
1411 1411
1412 1412 @propertycache
1413 1413 def parents(self):
1414 1414 return self._revlog().parents(self._node)
1415 1415
1416 1416 def read(self):
1417 1417 if self._data is None:
1418 1418 if self._node == revlog.nullid:
1419 1419 self._data = manifestdict()
1420 1420 else:
1421 1421 rl = self._revlog()
1422 1422 text = rl.revision(self._node)
1423 arraytext = array.array('c', text)
1423 arraytext = bytearray(text)
1424 1424 rl._fulltextcache[self._node] = arraytext
1425 1425 self._data = manifestdict(text)
1426 1426 return self._data
1427 1427
1428 1428 def readfast(self, shallow=False):
1429 1429 '''Calls either readdelta or read, based on which would be less work.
1430 1430 readdelta is called if the delta is against the p1, and therefore can be
1431 1431 read quickly.
1432 1432
1433 1433 If `shallow` is True, nothing changes since this is a flat manifest.
1434 1434 '''
1435 1435 rl = self._revlog()
1436 1436 r = rl.rev(self._node)
1437 1437 deltaparent = rl.deltaparent(r)
1438 1438 if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
1439 1439 return self.readdelta()
1440 1440 return self.read()
1441 1441
1442 1442 def readdelta(self, shallow=False):
1443 1443 '''Returns a manifest containing just the entries that are present
1444 1444 in this manifest, but not in its p1 manifest. This is efficient to read
1445 1445 if the revlog delta is already p1.
1446 1446
1447 1447 Changing the value of `shallow` has no effect on flat manifests.
1448 1448 '''
1449 1449 revlog = self._revlog()
1450 1450 if revlog._usemanifestv2:
1451 1451 # Need to perform a slow delta
1452 1452 r0 = revlog.deltaparent(revlog.rev(self._node))
1453 1453 m0 = self._manifestlog[revlog.node(r0)].read()
1454 1454 m1 = self.read()
1455 1455 md = manifestdict()
1456 1456 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1457 1457 if n1:
1458 1458 md[f] = n1
1459 1459 if fl1:
1460 1460 md.setflag(f, fl1)
1461 1461 return md
1462 1462
1463 1463 r = revlog.rev(self._node)
1464 1464 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1465 1465 return manifestdict(d)
1466 1466
1467 1467 def find(self, key):
1468 1468 return self.read().find(key)
1469 1469
1470 1470 class memtreemanifestctx(object):
1471 1471 def __init__(self, manifestlog, dir=''):
1472 1472 self._manifestlog = manifestlog
1473 1473 self._dir = dir
1474 1474 self._treemanifest = treemanifest()
1475 1475
1476 1476 def _revlog(self):
1477 1477 return self._manifestlog._revlog
1478 1478
1479 1479 def new(self, dir=''):
1480 1480 return memtreemanifestctx(self._manifestlog, dir=dir)
1481 1481
1482 1482 def copy(self):
1483 1483 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1484 1484 memmf._treemanifest = self._treemanifest.copy()
1485 1485 return memmf
1486 1486
1487 1487 def read(self):
1488 1488 return self._treemanifest
1489 1489
1490 1490 def write(self, transaction, link, p1, p2, added, removed):
1491 1491 def readtree(dir, node):
1492 1492 return self._manifestlog.get(dir, node).read()
1493 1493 return self._revlog().add(self._treemanifest, transaction, link, p1, p2,
1494 1494 added, removed, readtree=readtree)
1495 1495
1496 1496 class treemanifestctx(object):
1497 1497 def __init__(self, manifestlog, dir, node):
1498 1498 self._manifestlog = manifestlog
1499 1499 self._dir = dir
1500 1500 self._data = None
1501 1501
1502 1502 self._node = node
1503 1503
1504 1504 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
1505 1505 # we can instantiate treemanifestctx objects for directories we don't
1506 1506 # have on disk.
1507 1507 #self.p1, self.p2 = revlog.parents(node)
1508 1508 #rev = revlog.rev(node)
1509 1509 #self.linkrev = revlog.linkrev(rev)
1510 1510
1511 1511 def _revlog(self):
1512 1512 return self._manifestlog._revlog.dirlog(self._dir)
1513 1513
1514 1514 def read(self):
1515 1515 if self._data is None:
1516 1516 rl = self._revlog()
1517 1517 if self._node == revlog.nullid:
1518 1518 self._data = treemanifest()
1519 1519 elif rl._treeondisk:
1520 1520 m = treemanifest(dir=self._dir)
1521 1521 def gettext():
1522 1522 return rl.revision(self._node)
1523 1523 def readsubtree(dir, subm):
1524 1524 # Set verify to False since we need to be able to create
1525 1525 # subtrees for trees that don't exist on disk.
1526 1526 return self._manifestlog.get(dir, subm, verify=False).read()
1527 1527 m.read(gettext, readsubtree)
1528 1528 m.setnode(self._node)
1529 1529 self._data = m
1530 1530 else:
1531 1531 text = rl.revision(self._node)
1532 arraytext = array.array('c', text)
1532 arraytext = bytearray(text)
1533 1533 rl.fulltextcache[self._node] = arraytext
1534 1534 self._data = treemanifest(dir=self._dir, text=text)
1535 1535
1536 1536 return self._data
1537 1537
1538 1538 def node(self):
1539 1539 return self._node
1540 1540
1541 1541 def new(self, dir=''):
1542 1542 return memtreemanifestctx(self._manifestlog, dir=dir)
1543 1543
1544 1544 def copy(self):
1545 1545 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1546 1546 memmf._treemanifest = self.read().copy()
1547 1547 return memmf
1548 1548
1549 1549 @propertycache
1550 1550 def parents(self):
1551 1551 return self._revlog().parents(self._node)
1552 1552
1553 1553 def readdelta(self, shallow=False):
1554 1554 '''Returns a manifest containing just the entries that are present
1555 1555 in this manifest, but not in its p1 manifest. This is efficient to read
1556 1556 if the revlog delta is already p1.
1557 1557
1558 1558 If `shallow` is True, this will read the delta for this directory,
1559 1559 without recursively reading subdirectory manifests. Instead, any
1560 1560 subdirectory entry will be reported as it appears in the manifest, i.e.
1561 1561 the subdirectory will be reported among files and distinguished only by
1562 1562 its 't' flag.
1563 1563 '''
1564 1564 revlog = self._revlog()
1565 1565 if shallow and not revlog._usemanifestv2:
1566 1566 r = revlog.rev(self._node)
1567 1567 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1568 1568 return manifestdict(d)
1569 1569 else:
1570 1570 # Need to perform a slow delta
1571 1571 r0 = revlog.deltaparent(revlog.rev(self._node))
1572 1572 m0 = self._manifestlog.get(self._dir, revlog.node(r0)).read()
1573 1573 m1 = self.read()
1574 1574 md = treemanifest(dir=self._dir)
1575 1575 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1576 1576 if n1:
1577 1577 md[f] = n1
1578 1578 if fl1:
1579 1579 md.setflag(f, fl1)
1580 1580 return md
1581 1581
1582 1582 def readfast(self, shallow=False):
1583 1583 '''Calls either readdelta or read, based on which would be less work.
1584 1584 readdelta is called if the delta is against the p1, and therefore can be
1585 1585 read quickly.
1586 1586
1587 1587 If `shallow` is True, it only returns the entries from this manifest,
1588 1588 and not any submanifests.
1589 1589 '''
1590 1590 rl = self._revlog()
1591 1591 r = rl.rev(self._node)
1592 1592 deltaparent = rl.deltaparent(r)
1593 1593 if (deltaparent != revlog.nullrev and
1594 1594 deltaparent in rl.parentrevs(r)):
1595 1595 return self.readdelta(shallow=shallow)
1596 1596
1597 1597 if shallow:
1598 1598 return manifestdict(rl.revision(self._node))
1599 1599 else:
1600 1600 return self.read()
1601 1601
1602 1602 def find(self, key):
1603 1603 return self.read().find(key)
@@ -1,569 +1,568 b''
1 1 # tags.py - read tag info from local repository
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2009 Greg Ward <greg@gerg.ca>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 # Currently this module only deals with reading and caching tags.
10 10 # Eventually, it could take care of updating (adding/removing/moving)
11 11 # tags too.
12 12
13 13 from __future__ import absolute_import
14 14
15 15 import array
16 16 import errno
17 17
18 18 from .node import (
19 19 bin,
20 20 hex,
21 21 nullid,
22 22 short,
23 23 )
24 24 from . import (
25 25 encoding,
26 26 error,
27 27 scmutil,
28 28 util,
29 29 )
30 30
31 31 array = array.array
32 32
33 33 # Tags computation can be expensive and caches exist to make it fast in
34 34 # the common case.
35 35 #
36 36 # The "hgtagsfnodes1" cache file caches the .hgtags filenode values for
37 37 # each revision in the repository. The file is effectively an array of
38 38 # fixed length records. Read the docs for "hgtagsfnodescache" for technical
39 39 # details.
40 40 #
41 41 # The .hgtags filenode cache grows in proportion to the length of the
42 42 # changelog. The file is truncated when the # changelog is stripped.
43 43 #
44 44 # The purpose of the filenode cache is to avoid the most expensive part
45 45 # of finding global tags, which is looking up the .hgtags filenode in the
46 46 # manifest for each head. This can take dozens or over 100ms for
47 47 # repositories with very large manifests. Multiplied by dozens or even
48 48 # hundreds of heads and there is a significant performance concern.
49 49 #
50 50 # There also exist a separate cache file for each repository filter.
51 51 # These "tags-*" files store information about the history of tags.
52 52 #
53 53 # The tags cache files consists of a cache validation line followed by
54 54 # a history of tags.
55 55 #
56 56 # The cache validation line has the format:
57 57 #
58 58 # <tiprev> <tipnode> [<filteredhash>]
59 59 #
60 60 # <tiprev> is an integer revision and <tipnode> is a 40 character hex
61 61 # node for that changeset. These redundantly identify the repository
62 62 # tip from the time the cache was written. In addition, <filteredhash>,
63 63 # if present, is a 40 character hex hash of the contents of the filtered
64 64 # revisions for this filter. If the set of filtered revs changes, the
65 65 # hash will change and invalidate the cache.
66 66 #
67 67 # The history part of the tags cache consists of lines of the form:
68 68 #
69 69 # <node> <tag>
70 70 #
71 71 # (This format is identical to that of .hgtags files.)
72 72 #
73 73 # <tag> is the tag name and <node> is the 40 character hex changeset
74 74 # the tag is associated with.
75 75 #
76 76 # Tags are written sorted by tag name.
77 77 #
78 78 # Tags associated with multiple changesets have an entry for each changeset.
79 79 # The most recent changeset (in terms of revlog ordering for the head
80 80 # setting it) for each tag is last.
81 81
82 82 def findglobaltags(ui, repo, alltags, tagtypes):
83 83 '''Find global tags in a repo.
84 84
85 85 "alltags" maps tag name to (node, hist) 2-tuples.
86 86
87 87 "tagtypes" maps tag name to tag type. Global tags always have the
88 88 "global" tag type.
89 89
90 90 The "alltags" and "tagtypes" dicts are updated in place. Empty dicts
91 91 should be passed in.
92 92
93 93 The tags cache is read and updated as a side-effect of calling.
94 94 '''
95 95 # This is so we can be lazy and assume alltags contains only global
96 96 # tags when we pass it to _writetagcache().
97 97 assert len(alltags) == len(tagtypes) == 0, \
98 98 "findglobaltags() should be called first"
99 99
100 100 (heads, tagfnode, valid, cachetags, shouldwrite) = _readtagcache(ui, repo)
101 101 if cachetags is not None:
102 102 assert not shouldwrite
103 103 # XXX is this really 100% correct? are there oddball special
104 104 # cases where a global tag should outrank a local tag but won't,
105 105 # because cachetags does not contain rank info?
106 106 _updatetags(cachetags, 'global', alltags, tagtypes)
107 107 return
108 108
109 109 seen = set() # set of fnode
110 110 fctx = None
111 111 for head in reversed(heads): # oldest to newest
112 112 assert head in repo.changelog.nodemap, \
113 113 "tag cache returned bogus head %s" % short(head)
114 114
115 115 fnode = tagfnode.get(head)
116 116 if fnode and fnode not in seen:
117 117 seen.add(fnode)
118 118 if not fctx:
119 119 fctx = repo.filectx('.hgtags', fileid=fnode)
120 120 else:
121 121 fctx = fctx.filectx(fnode)
122 122
123 123 filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
124 124 _updatetags(filetags, 'global', alltags, tagtypes)
125 125
126 126 # and update the cache (if necessary)
127 127 if shouldwrite:
128 128 _writetagcache(ui, repo, valid, alltags)
129 129
130 130 def readlocaltags(ui, repo, alltags, tagtypes):
131 131 '''Read local tags in repo. Update alltags and tagtypes.'''
132 132 try:
133 133 data = repo.vfs.read("localtags")
134 134 except IOError as inst:
135 135 if inst.errno != errno.ENOENT:
136 136 raise
137 137 return
138 138
139 139 # localtags is in the local encoding; re-encode to UTF-8 on
140 140 # input for consistency with the rest of this module.
141 141 filetags = _readtags(
142 142 ui, repo, data.splitlines(), "localtags",
143 143 recode=encoding.fromlocal)
144 144
145 145 # remove tags pointing to invalid nodes
146 146 cl = repo.changelog
147 147 for t in filetags.keys():
148 148 try:
149 149 cl.rev(filetags[t][0])
150 150 except (LookupError, ValueError):
151 151 del filetags[t]
152 152
153 153 _updatetags(filetags, "local", alltags, tagtypes)
154 154
155 155 def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False):
156 156 '''Read tag definitions from a file (or any source of lines).
157 157
158 158 This function returns two sortdicts with similar information:
159 159
160 160 - the first dict, bintaghist, contains the tag information as expected by
161 161 the _readtags function, i.e. a mapping from tag name to (node, hist):
162 162 - node is the node id from the last line read for that name,
163 163 - hist is the list of node ids previously associated with it (in file
164 164 order). All node ids are binary, not hex.
165 165
166 166 - the second dict, hextaglines, is a mapping from tag name to a list of
167 167 [hexnode, line number] pairs, ordered from the oldest to the newest node.
168 168
169 169 When calcnodelines is False the hextaglines dict is not calculated (an
170 170 empty dict is returned). This is done to improve this function's
171 171 performance in cases where the line numbers are not needed.
172 172 '''
173 173
174 174 bintaghist = util.sortdict()
175 175 hextaglines = util.sortdict()
176 176 count = 0
177 177
178 178 def dbg(msg):
179 179 ui.debug("%s, line %s: %s\n" % (fn, count, msg))
180 180
181 181 for nline, line in enumerate(lines):
182 182 count += 1
183 183 if not line:
184 184 continue
185 185 try:
186 186 (nodehex, name) = line.split(" ", 1)
187 187 except ValueError:
188 188 dbg("cannot parse entry")
189 189 continue
190 190 name = name.strip()
191 191 if recode:
192 192 name = recode(name)
193 193 try:
194 194 nodebin = bin(nodehex)
195 195 except TypeError:
196 196 dbg("node '%s' is not well formed" % nodehex)
197 197 continue
198 198
199 199 # update filetags
200 200 if calcnodelines:
201 201 # map tag name to a list of line numbers
202 202 if name not in hextaglines:
203 203 hextaglines[name] = []
204 204 hextaglines[name].append([nodehex, nline])
205 205 continue
206 206 # map tag name to (node, hist)
207 207 if name not in bintaghist:
208 208 bintaghist[name] = []
209 209 bintaghist[name].append(nodebin)
210 210 return bintaghist, hextaglines
211 211
212 212 def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False):
213 213 '''Read tag definitions from a file (or any source of lines).
214 214
215 215 Returns a mapping from tag name to (node, hist).
216 216
217 217 "node" is the node id from the last line read for that name. "hist"
218 218 is the list of node ids previously associated with it (in file order).
219 219 All node ids are binary, not hex.
220 220 '''
221 221 filetags, nodelines = _readtaghist(ui, repo, lines, fn, recode=recode,
222 222 calcnodelines=calcnodelines)
223 223 # util.sortdict().__setitem__ is much slower at replacing then inserting
224 224 # new entries. The difference can matter if there are thousands of tags.
225 225 # Create a new sortdict to avoid the performance penalty.
226 226 newtags = util.sortdict()
227 227 for tag, taghist in filetags.items():
228 228 newtags[tag] = (taghist[-1], taghist[:-1])
229 229 return newtags
230 230
231 231 def _updatetags(filetags, tagtype, alltags, tagtypes):
232 232 '''Incorporate the tag info read from one file into the two
233 233 dictionaries, alltags and tagtypes, that contain all tag
234 234 info (global across all heads plus local).'''
235 235
236 236 for name, nodehist in filetags.iteritems():
237 237 if name not in alltags:
238 238 alltags[name] = nodehist
239 239 tagtypes[name] = tagtype
240 240 continue
241 241
242 242 # we prefer alltags[name] if:
243 243 # it supersedes us OR
244 244 # mutual supersedes and it has a higher rank
245 245 # otherwise we win because we're tip-most
246 246 anode, ahist = nodehist
247 247 bnode, bhist = alltags[name]
248 248 if (bnode != anode and anode in bhist and
249 249 (bnode not in ahist or len(bhist) > len(ahist))):
250 250 anode = bnode
251 251 else:
252 252 tagtypes[name] = tagtype
253 253 ahist.extend([n for n in bhist if n not in ahist])
254 254 alltags[name] = anode, ahist
255 255
256 256 def _filename(repo):
257 257 """name of a tagcache file for a given repo or repoview"""
258 258 filename = 'cache/tags2'
259 259 if repo.filtername:
260 260 filename = '%s-%s' % (filename, repo.filtername)
261 261 return filename
262 262
263 263 def _readtagcache(ui, repo):
264 264 '''Read the tag cache.
265 265
266 266 Returns a tuple (heads, fnodes, validinfo, cachetags, shouldwrite).
267 267
268 268 If the cache is completely up-to-date, "cachetags" is a dict of the
269 269 form returned by _readtags() and "heads", "fnodes", and "validinfo" are
270 270 None and "shouldwrite" is False.
271 271
272 272 If the cache is not up to date, "cachetags" is None. "heads" is a list
273 273 of all heads currently in the repository, ordered from tip to oldest.
274 274 "validinfo" is a tuple describing cache validation info. This is used
275 275 when writing the tags cache. "fnodes" is a mapping from head to .hgtags
276 276 filenode. "shouldwrite" is True.
277 277
278 278 If the cache is not up to date, the caller is responsible for reading tag
279 279 info from each returned head. (See findglobaltags().)
280 280 '''
281 281 try:
282 282 cachefile = repo.vfs(_filename(repo), 'r')
283 283 # force reading the file for static-http
284 284 cachelines = iter(cachefile)
285 285 except IOError:
286 286 cachefile = None
287 287
288 288 cacherev = None
289 289 cachenode = None
290 290 cachehash = None
291 291 if cachefile:
292 292 try:
293 293 validline = next(cachelines)
294 294 validline = validline.split()
295 295 cacherev = int(validline[0])
296 296 cachenode = bin(validline[1])
297 297 if len(validline) > 2:
298 298 cachehash = bin(validline[2])
299 299 except Exception:
300 300 # corruption of the cache, just recompute it.
301 301 pass
302 302
303 303 tipnode = repo.changelog.tip()
304 304 tiprev = len(repo.changelog) - 1
305 305
306 306 # Case 1 (common): tip is the same, so nothing has changed.
307 307 # (Unchanged tip trivially means no changesets have been added.
308 308 # But, thanks to localrepository.destroyed(), it also means none
309 309 # have been destroyed by strip or rollback.)
310 310 if (cacherev == tiprev
311 311 and cachenode == tipnode
312 312 and cachehash == scmutil.filteredhash(repo, tiprev)):
313 313 tags = _readtags(ui, repo, cachelines, cachefile.name)
314 314 cachefile.close()
315 315 return (None, None, None, tags, False)
316 316 if cachefile:
317 317 cachefile.close() # ignore rest of file
318 318
319 319 valid = (tiprev, tipnode, scmutil.filteredhash(repo, tiprev))
320 320
321 321 repoheads = repo.heads()
322 322 # Case 2 (uncommon): empty repo; get out quickly and don't bother
323 323 # writing an empty cache.
324 324 if repoheads == [nullid]:
325 325 return ([], {}, valid, {}, False)
326 326
327 327 # Case 3 (uncommon): cache file missing or empty.
328 328
329 329 # Case 4 (uncommon): tip rev decreased. This should only happen
330 330 # when we're called from localrepository.destroyed(). Refresh the
331 331 # cache so future invocations will not see disappeared heads in the
332 332 # cache.
333 333
334 334 # Case 5 (common): tip has changed, so we've added/replaced heads.
335 335
336 336 # As it happens, the code to handle cases 3, 4, 5 is the same.
337 337
338 338 # N.B. in case 4 (nodes destroyed), "new head" really means "newly
339 339 # exposed".
340 340 if not len(repo.file('.hgtags')):
341 341 # No tags have ever been committed, so we can avoid a
342 342 # potentially expensive search.
343 343 return ([], {}, valid, None, True)
344 344
345 345 starttime = util.timer()
346 346
347 347 # Now we have to lookup the .hgtags filenode for every new head.
348 348 # This is the most expensive part of finding tags, so performance
349 349 # depends primarily on the size of newheads. Worst case: no cache
350 350 # file, so newheads == repoheads.
351 351 fnodescache = hgtagsfnodescache(repo.unfiltered())
352 352 cachefnode = {}
353 353 for head in reversed(repoheads):
354 354 fnode = fnodescache.getfnode(head)
355 355 if fnode != nullid:
356 356 cachefnode[head] = fnode
357 357
358 358 fnodescache.write()
359 359
360 360 duration = util.timer() - starttime
361 361 ui.log('tagscache',
362 362 '%d/%d cache hits/lookups in %0.4f '
363 363 'seconds\n',
364 364 fnodescache.hitcount, fnodescache.lookupcount, duration)
365 365
366 366 # Caller has to iterate over all heads, but can use the filenodes in
367 367 # cachefnode to get to each .hgtags revision quickly.
368 368 return (repoheads, cachefnode, valid, None, True)
369 369
370 370 def _writetagcache(ui, repo, valid, cachetags):
371 371 filename = _filename(repo)
372 372 try:
373 373 cachefile = repo.vfs(filename, 'w', atomictemp=True)
374 374 except (OSError, IOError):
375 375 return
376 376
377 377 ui.log('tagscache', 'writing .hg/%s with %d tags\n',
378 378 filename, len(cachetags))
379 379
380 380 if valid[2]:
381 381 cachefile.write('%d %s %s\n' % (valid[0], hex(valid[1]), hex(valid[2])))
382 382 else:
383 383 cachefile.write('%d %s\n' % (valid[0], hex(valid[1])))
384 384
385 385 # Tag names in the cache are in UTF-8 -- which is the whole reason
386 386 # we keep them in UTF-8 throughout this module. If we converted
387 387 # them local encoding on input, we would lose info writing them to
388 388 # the cache.
389 389 for (name, (node, hist)) in sorted(cachetags.iteritems()):
390 390 for n in hist:
391 391 cachefile.write("%s %s\n" % (hex(n), name))
392 392 cachefile.write("%s %s\n" % (hex(node), name))
393 393
394 394 try:
395 395 cachefile.close()
396 396 except (OSError, IOError):
397 397 pass
398 398
399 399 _fnodescachefile = 'cache/hgtagsfnodes1'
400 400 _fnodesrecsize = 4 + 20 # changeset fragment + filenode
401 401 _fnodesmissingrec = '\xff' * 24
402 402
403 403 class hgtagsfnodescache(object):
404 404 """Persistent cache mapping revisions to .hgtags filenodes.
405 405
406 406 The cache is an array of records. Each item in the array corresponds to
407 407 a changelog revision. Values in the array contain the first 4 bytes of
408 408 the node hash and the 20 bytes .hgtags filenode for that revision.
409 409
410 410 The first 4 bytes are present as a form of verification. Repository
411 411 stripping and rewriting may change the node at a numeric revision in the
412 412 changelog. The changeset fragment serves as a verifier to detect
413 413 rewriting. This logic is shared with the rev branch cache (see
414 414 branchmap.py).
415 415
416 416 The instance holds in memory the full cache content but entries are
417 417 only parsed on read.
418 418
419 419 Instances behave like lists. ``c[i]`` works where i is a rev or
420 420 changeset node. Missing indexes are populated automatically on access.
421 421 """
422 422 def __init__(self, repo):
423 423 assert repo.filtername is None
424 424
425 425 self._repo = repo
426 426
427 427 # Only for reporting purposes.
428 428 self.lookupcount = 0
429 429 self.hitcount = 0
430 430
431 self._raw = array('c')
432 431
433 432 try:
434 433 data = repo.vfs.read(_fnodescachefile)
435 434 except (OSError, IOError):
436 435 data = ""
437 self._raw.fromstring(data)
436 self._raw = bytearray(data)
438 437
439 438 # The end state of self._raw is an array that is of the exact length
440 439 # required to hold a record for every revision in the repository.
441 440 # We truncate or extend the array as necessary. self._dirtyoffset is
442 441 # defined to be the start offset at which we need to write the output
443 442 # file. This offset is also adjusted when new entries are calculated
444 443 # for array members.
445 444 cllen = len(repo.changelog)
446 445 wantedlen = cllen * _fnodesrecsize
447 446 rawlen = len(self._raw)
448 447
449 448 self._dirtyoffset = None
450 449
451 450 if rawlen < wantedlen:
452 451 self._dirtyoffset = rawlen
453 452 self._raw.extend('\xff' * (wantedlen - rawlen))
454 453 elif rawlen > wantedlen:
455 454 # There's no easy way to truncate array instances. This seems
456 455 # slightly less evil than copying a potentially large array slice.
457 456 for i in range(rawlen - wantedlen):
458 457 self._raw.pop()
459 458 self._dirtyoffset = len(self._raw)
460 459
461 460 def getfnode(self, node, computemissing=True):
462 461 """Obtain the filenode of the .hgtags file at a specified revision.
463 462
464 463 If the value is in the cache, the entry will be validated and returned.
465 464 Otherwise, the filenode will be computed and returned unless
466 465 "computemissing" is False, in which case None will be returned without
467 466 any potentially expensive computation being performed.
468 467
469 468 If an .hgtags does not exist at the specified revision, nullid is
470 469 returned.
471 470 """
472 471 ctx = self._repo[node]
473 472 rev = ctx.rev()
474 473
475 474 self.lookupcount += 1
476 475
477 476 offset = rev * _fnodesrecsize
478 record = self._raw[offset:offset + _fnodesrecsize].tostring()
477 record = '%s' % self._raw[offset:offset + _fnodesrecsize]
479 478 properprefix = node[0:4]
480 479
481 480 # Validate and return existing entry.
482 481 if record != _fnodesmissingrec:
483 482 fileprefix = record[0:4]
484 483
485 484 if fileprefix == properprefix:
486 485 self.hitcount += 1
487 486 return record[4:]
488 487
489 488 # Fall through.
490 489
491 490 # If we get here, the entry is either missing or invalid.
492 491
493 492 if not computemissing:
494 493 return None
495 494
496 495 # Populate missing entry.
497 496 try:
498 497 fnode = ctx.filenode('.hgtags')
499 498 except error.LookupError:
500 499 # No .hgtags file on this revision.
501 500 fnode = nullid
502 501
503 502 self._writeentry(offset, properprefix, fnode)
504 503 return fnode
505 504
506 505 def setfnode(self, node, fnode):
507 506 """Set the .hgtags filenode for a given changeset."""
508 507 assert len(fnode) == 20
509 508 ctx = self._repo[node]
510 509
511 510 # Do a lookup first to avoid writing if nothing has changed.
512 511 if self.getfnode(ctx.node(), computemissing=False) == fnode:
513 512 return
514 513
515 514 self._writeentry(ctx.rev() * _fnodesrecsize, node[0:4], fnode)
516 515
517 516 def _writeentry(self, offset, prefix, fnode):
518 517 # Slices on array instances only accept other array.
519 entry = array('c', prefix + fnode)
518 entry = bytearray(prefix + fnode)
520 519 self._raw[offset:offset + _fnodesrecsize] = entry
521 520 # self._dirtyoffset could be None.
522 521 self._dirtyoffset = min(self._dirtyoffset, offset) or 0
523 522
524 523 def write(self):
525 524 """Perform all necessary writes to cache file.
526 525
527 526 This may no-op if no writes are needed or if a write lock could
528 527 not be obtained.
529 528 """
530 529 if self._dirtyoffset is None:
531 530 return
532 531
533 532 data = self._raw[self._dirtyoffset:]
534 533 if not data:
535 534 return
536 535
537 536 repo = self._repo
538 537
539 538 try:
540 539 lock = repo.wlock(wait=False)
541 540 except error.LockError:
542 541 repo.ui.log('tagscache',
543 542 'not writing .hg/%s because lock cannot be acquired\n' %
544 543 (_fnodescachefile))
545 544 return
546 545
547 546 try:
548 547 f = repo.vfs.open(_fnodescachefile, 'ab')
549 548 try:
550 549 # if the file has been truncated
551 550 actualoffset = f.tell()
552 551 if actualoffset < self._dirtyoffset:
553 552 self._dirtyoffset = actualoffset
554 553 data = self._raw[self._dirtyoffset:]
555 554 f.seek(self._dirtyoffset)
556 555 f.truncate()
557 556 repo.ui.log('tagscache',
558 557 'writing %d bytes to %s\n' % (
559 558 len(data), _fnodescachefile))
560 559 f.write(data)
561 560 self._dirtyoffset = None
562 561 finally:
563 562 f.close()
564 563 except (IOError, OSError) as inst:
565 564 repo.ui.log('tagscache',
566 565 "couldn't write %s: %s\n" % (
567 566 _fnodescachefile, inst))
568 567 finally:
569 568 lock.release()
General Comments 0
You need to be logged in to leave comments. Login now