##// END OF EJS Templates
narrow: move manifestlog overrides to core...
Martin von Zweigbergk -
r37392:ac42e39b default
parent child Browse files
Show More
@@ -1,81 +1,75 b''
1 1 # narrowrepo.py - repository which supports narrow revlogs, lazy loading
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from mercurial import (
11 11 bundlerepo,
12 12 changegroup,
13 13 hg,
14 14 localrepo,
15 15 narrowspec,
16 16 scmutil,
17 17 )
18 18
19 19 from . import (
20 20 narrowrevlog,
21 21 )
22 22
23 23 def wrappostshare(orig, sourcerepo, destrepo, **kwargs):
24 24 orig(sourcerepo, destrepo, **kwargs)
25 25 if changegroup.NARROW_REQUIREMENT in sourcerepo.requirements:
26 26 with destrepo.wlock():
27 27 with destrepo.vfs('shared', 'a') as fp:
28 28 fp.write(narrowspec.FILENAME + '\n')
29 29
30 30 def unsharenarrowspec(orig, ui, repo, repopath):
31 31 if (changegroup.NARROW_REQUIREMENT in repo.requirements
32 32 and repo.path == repopath and repo.shared()):
33 33 srcrepo = hg.sharedreposource(repo)
34 34 with srcrepo.vfs(narrowspec.FILENAME) as f:
35 35 spec = f.read()
36 36 with repo.vfs(narrowspec.FILENAME, 'w') as f:
37 37 f.write(spec)
38 38 return orig(ui, repo, repopath)
39 39
40 40 def wraprepo(repo):
41 41 """Enables narrow clone functionality on a single local repository."""
42 42
43 43 cacheprop = localrepo.storecache
44 44 if isinstance(repo, bundlerepo.bundlerepository):
45 45 # We have to use a different caching property decorator for
46 46 # bundlerepo because storecache blows up in strange ways on a
47 47 # bundlerepo. Fortunately, there's no risk of data changing in
48 48 # a bundlerepo.
49 49 cacheprop = lambda name: localrepo.unfilteredpropertycache
50 50
51 51 class narrowrepository(repo.__class__):
52 52
53 @cacheprop('00manifest.i')
54 def manifestlog(self):
55 mfl = super(narrowrepository, self).manifestlog
56 narrowrevlog.makenarrowmanifestlog(mfl, self)
57 return mfl
58
59 53 def file(self, f):
60 54 fl = super(narrowrepository, self).file(f)
61 55 narrowrevlog.makenarrowfilelog(fl, self.narrowmatch())
62 56 return fl
63 57
64 58 # I'm not sure this is the right place to do this filter.
65 59 # context._manifestmatches() would probably be better, or perhaps
66 60 # move it to a later place, in case some of the callers do want to know
67 61 # which directories changed. This seems to work for now, though.
68 62 def status(self, *args, **kwargs):
69 63 s = super(narrowrepository, self).status(*args, **kwargs)
70 64 narrowmatch = self.narrowmatch()
71 65 modified = list(filter(narrowmatch, s.modified))
72 66 added = list(filter(narrowmatch, s.added))
73 67 removed = list(filter(narrowmatch, s.removed))
74 68 deleted = list(filter(narrowmatch, s.deleted))
75 69 unknown = list(filter(narrowmatch, s.unknown))
76 70 ignored = list(filter(narrowmatch, s.ignored))
77 71 clean = list(filter(narrowmatch, s.clean))
78 72 return scmutil.status(modified, added, removed, deleted, unknown,
79 73 ignored, clean)
80 74
81 75 repo.__class__ = narrowrepository
@@ -1,89 +1,80 b''
1 1 # narrowrevlog.py - revlog storing irrelevant nodes as "ellipsis" nodes
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from mercurial import (
11 manifest,
12 11 revlog,
13 12 util,
14 13 )
15 14
16 15 def readtransform(self, text):
17 16 return text, False
18 17
19 18 def writetransform(self, text):
20 19 return text, False
21 20
22 21 def rawtransform(self, text):
23 22 return False
24 23
25 24 revlog.addflagprocessor(revlog.REVIDX_ELLIPSIS,
26 25 (readtransform, writetransform, rawtransform))
27 26
28 27 def setup():
29 28 # We just wanted to add the flag processor, which is done at module
30 29 # load time.
31 30 pass
32 31
33 def makenarrowmanifestlog(mfl, repo):
34 class narrowmanifestlog(mfl.__class__):
35 def get(self, dir, node, verify=True):
36 if not repo.narrowmatch().visitdir(dir[:-1] or '.'):
37 return manifest.excludeddirmanifestctx(dir, node)
38 return super(narrowmanifestlog, self).get(dir, node, verify=verify)
39 mfl.__class__ = narrowmanifestlog
40
41 32 def makenarrowfilelog(fl, narrowmatch):
42 33 class narrowfilelog(fl.__class__):
43 34 def renamed(self, node):
44 35 # Renames that come from outside the narrowspec are
45 36 # problematic at least for git-diffs, because we lack the
46 37 # base text for the rename. This logic was introduced in
47 38 # 3cd72b1 of narrowhg (authored by martinvonz, reviewed by
48 39 # adgar), but that revision doesn't have any additional
49 40 # commentary on what problems we can encounter.
50 41 m = super(narrowfilelog, self).renamed(node)
51 42 if m and not narrowmatch(m[0]):
52 43 return None
53 44 return m
54 45
55 46 def size(self, rev):
56 47 # We take advantage of the fact that remotefilelog
57 48 # lacks a node() method to just skip the
58 49 # rename-checking logic when on remotefilelog. This
59 50 # might be incorrect on other non-revlog-based storage
60 51 # engines, but for now this seems to be fine.
61 52 #
62 53 # TODO: when remotefilelog is in core, improve this to
63 54 # explicitly look for remotefilelog instead of cheating
64 55 # with a hasattr check.
65 56 if util.safehasattr(self, 'node'):
66 57 node = self.node(rev)
67 58 # Because renamed() is overridden above to
68 59 # sometimes return None even if there is metadata
69 60 # in the revlog, size can be incorrect for
70 61 # copies/renames, so we need to make sure we call
71 62 # the super class's implementation of renamed()
72 63 # for the purpose of size calculation.
73 64 if super(narrowfilelog, self).renamed(node):
74 65 return len(self.read(node))
75 66 return super(narrowfilelog, self).size(rev)
76 67
77 68 def cmp(self, node, text):
78 69 different = super(narrowfilelog, self).cmp(node, text)
79 70 if different:
80 71 # Similar to size() above, if the file was copied from
81 72 # a file outside the narrowspec, the super class's
82 73 # would have returned True because we tricked it into
83 74 # thinking that the file was not renamed.
84 75 if super(narrowfilelog, self).renamed(node):
85 76 t2 = self.read(node)
86 77 return t2 != text
87 78 return different
88 79
89 80 fl.__class__ = narrowfilelog
@@ -1,1653 +1,1656 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 bin,
17 17 hex,
18 18 )
19 19 from . import (
20 20 error,
21 21 mdiff,
22 22 policy,
23 23 revlog,
24 24 util,
25 25 )
26 26
27 27 parsers = policy.importmod(r'parsers')
28 28 propertycache = util.propertycache
29 29
30 30 def _parse(data):
31 31 # This method does a little bit of excessive-looking
32 32 # precondition checking. This is so that the behavior of this
33 33 # class exactly matches its C counterpart to try and help
34 34 # prevent surprise breakage for anyone that develops against
35 35 # the pure version.
36 36 if data and data[-1:] != '\n':
37 37 raise ValueError('Manifest did not end in a newline.')
38 38 prev = None
39 39 for l in data.splitlines():
40 40 if prev is not None and prev > l:
41 41 raise ValueError('Manifest lines not in sorted order.')
42 42 prev = l
43 43 f, n = l.split('\0')
44 44 if len(n) > 40:
45 45 yield f, bin(n[:40]), n[40:]
46 46 else:
47 47 yield f, bin(n), ''
48 48
49 49 def _text(it):
50 50 files = []
51 51 lines = []
52 52 _hex = revlog.hex
53 53 for f, n, fl in it:
54 54 files.append(f)
55 55 # if this is changed to support newlines in filenames,
56 56 # be sure to check the templates/ dir again (especially *-raw.tmpl)
57 57 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
58 58
59 59 _checkforbidden(files)
60 60 return ''.join(lines)
61 61
62 62 class lazymanifestiter(object):
63 63 def __init__(self, lm):
64 64 self.pos = 0
65 65 self.lm = lm
66 66
67 67 def __iter__(self):
68 68 return self
69 69
70 70 def next(self):
71 71 try:
72 72 data, pos = self.lm._get(self.pos)
73 73 except IndexError:
74 74 raise StopIteration
75 75 if pos == -1:
76 76 self.pos += 1
77 77 return data[0]
78 78 self.pos += 1
79 79 zeropos = data.find('\x00', pos)
80 80 return data[pos:zeropos]
81 81
82 82 __next__ = next
83 83
84 84 class lazymanifestiterentries(object):
85 85 def __init__(self, lm):
86 86 self.lm = lm
87 87 self.pos = 0
88 88
89 89 def __iter__(self):
90 90 return self
91 91
92 92 def next(self):
93 93 try:
94 94 data, pos = self.lm._get(self.pos)
95 95 except IndexError:
96 96 raise StopIteration
97 97 if pos == -1:
98 98 self.pos += 1
99 99 return data
100 100 zeropos = data.find('\x00', pos)
101 101 hashval = unhexlify(data, self.lm.extrainfo[self.pos],
102 102 zeropos + 1, 40)
103 103 flags = self.lm._getflags(data, self.pos, zeropos)
104 104 self.pos += 1
105 105 return (data[pos:zeropos], hashval, flags)
106 106
107 107 __next__ = next
108 108
109 109 def unhexlify(data, extra, pos, length):
110 110 s = bin(data[pos:pos + length])
111 111 if extra:
112 112 s += chr(extra & 0xff)
113 113 return s
114 114
115 115 def _cmp(a, b):
116 116 return (a > b) - (a < b)
117 117
118 118 class _lazymanifest(object):
119 119 def __init__(self, data, positions=None, extrainfo=None, extradata=None):
120 120 if positions is None:
121 121 self.positions = self.findlines(data)
122 122 self.extrainfo = [0] * len(self.positions)
123 123 self.data = data
124 124 self.extradata = []
125 125 else:
126 126 self.positions = positions[:]
127 127 self.extrainfo = extrainfo[:]
128 128 self.extradata = extradata[:]
129 129 self.data = data
130 130
131 131 def findlines(self, data):
132 132 if not data:
133 133 return []
134 134 pos = data.find("\n")
135 135 if pos == -1 or data[-1:] != '\n':
136 136 raise ValueError("Manifest did not end in a newline.")
137 137 positions = [0]
138 138 prev = data[:data.find('\x00')]
139 139 while pos < len(data) - 1 and pos != -1:
140 140 positions.append(pos + 1)
141 141 nexts = data[pos + 1:data.find('\x00', pos + 1)]
142 142 if nexts < prev:
143 143 raise ValueError("Manifest lines not in sorted order.")
144 144 prev = nexts
145 145 pos = data.find("\n", pos + 1)
146 146 return positions
147 147
148 148 def _get(self, index):
149 149 # get the position encoded in pos:
150 150 # positive number is an index in 'data'
151 151 # negative number is in extrapieces
152 152 pos = self.positions[index]
153 153 if pos >= 0:
154 154 return self.data, pos
155 155 return self.extradata[-pos - 1], -1
156 156
157 157 def _getkey(self, pos):
158 158 if pos >= 0:
159 159 return self.data[pos:self.data.find('\x00', pos + 1)]
160 160 return self.extradata[-pos - 1][0]
161 161
162 162 def bsearch(self, key):
163 163 first = 0
164 164 last = len(self.positions) - 1
165 165
166 166 while first <= last:
167 167 midpoint = (first + last)//2
168 168 nextpos = self.positions[midpoint]
169 169 candidate = self._getkey(nextpos)
170 170 r = _cmp(key, candidate)
171 171 if r == 0:
172 172 return midpoint
173 173 else:
174 174 if r < 0:
175 175 last = midpoint - 1
176 176 else:
177 177 first = midpoint + 1
178 178 return -1
179 179
180 180 def bsearch2(self, key):
181 181 # same as the above, but will always return the position
182 182 # done for performance reasons
183 183 first = 0
184 184 last = len(self.positions) - 1
185 185
186 186 while first <= last:
187 187 midpoint = (first + last)//2
188 188 nextpos = self.positions[midpoint]
189 189 candidate = self._getkey(nextpos)
190 190 r = _cmp(key, candidate)
191 191 if r == 0:
192 192 return (midpoint, True)
193 193 else:
194 194 if r < 0:
195 195 last = midpoint - 1
196 196 else:
197 197 first = midpoint + 1
198 198 return (first, False)
199 199
200 200 def __contains__(self, key):
201 201 return self.bsearch(key) != -1
202 202
203 203 def _getflags(self, data, needle, pos):
204 204 start = pos + 41
205 205 end = data.find("\n", start)
206 206 if end == -1:
207 207 end = len(data) - 1
208 208 if start == end:
209 209 return ''
210 210 return self.data[start:end]
211 211
212 212 def __getitem__(self, key):
213 213 if not isinstance(key, bytes):
214 214 raise TypeError("getitem: manifest keys must be a bytes.")
215 215 needle = self.bsearch(key)
216 216 if needle == -1:
217 217 raise KeyError
218 218 data, pos = self._get(needle)
219 219 if pos == -1:
220 220 return (data[1], data[2])
221 221 zeropos = data.find('\x00', pos)
222 222 assert 0 <= needle <= len(self.positions)
223 223 assert len(self.extrainfo) == len(self.positions)
224 224 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
225 225 flags = self._getflags(data, needle, zeropos)
226 226 return (hashval, flags)
227 227
228 228 def __delitem__(self, key):
229 229 needle, found = self.bsearch2(key)
230 230 if not found:
231 231 raise KeyError
232 232 cur = self.positions[needle]
233 233 self.positions = self.positions[:needle] + self.positions[needle + 1:]
234 234 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
235 235 if cur >= 0:
236 236 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
237 237
238 238 def __setitem__(self, key, value):
239 239 if not isinstance(key, bytes):
240 240 raise TypeError("setitem: manifest keys must be a byte string.")
241 241 if not isinstance(value, tuple) or len(value) != 2:
242 242 raise TypeError("Manifest values must be a tuple of (node, flags).")
243 243 hashval = value[0]
244 244 if not isinstance(hashval, bytes) or not 20 <= len(hashval) <= 22:
245 245 raise TypeError("node must be a 20-byte byte string")
246 246 flags = value[1]
247 247 if len(hashval) == 22:
248 248 hashval = hashval[:-1]
249 249 if not isinstance(flags, bytes) or len(flags) > 1:
250 250 raise TypeError("flags must a 0 or 1 byte string, got %r", flags)
251 251 needle, found = self.bsearch2(key)
252 252 if found:
253 253 # put the item
254 254 pos = self.positions[needle]
255 255 if pos < 0:
256 256 self.extradata[-pos - 1] = (key, hashval, value[1])
257 257 else:
258 258 # just don't bother
259 259 self.extradata.append((key, hashval, value[1]))
260 260 self.positions[needle] = -len(self.extradata)
261 261 else:
262 262 # not found, put it in with extra positions
263 263 self.extradata.append((key, hashval, value[1]))
264 264 self.positions = (self.positions[:needle] + [-len(self.extradata)]
265 265 + self.positions[needle:])
266 266 self.extrainfo = (self.extrainfo[:needle] + [0] +
267 267 self.extrainfo[needle:])
268 268
269 269 def copy(self):
270 270 # XXX call _compact like in C?
271 271 return _lazymanifest(self.data, self.positions, self.extrainfo,
272 272 self.extradata)
273 273
274 274 def _compact(self):
275 275 # hopefully not called TOO often
276 276 if len(self.extradata) == 0:
277 277 return
278 278 l = []
279 279 last_cut = 0
280 280 i = 0
281 281 offset = 0
282 282 self.extrainfo = [0] * len(self.positions)
283 283 while i < len(self.positions):
284 284 if self.positions[i] >= 0:
285 285 cur = self.positions[i]
286 286 last_cut = cur
287 287 while True:
288 288 self.positions[i] = offset
289 289 i += 1
290 290 if i == len(self.positions) or self.positions[i] < 0:
291 291 break
292 292 offset += self.positions[i] - cur
293 293 cur = self.positions[i]
294 294 end_cut = self.data.find('\n', cur)
295 295 if end_cut != -1:
296 296 end_cut += 1
297 297 offset += end_cut - cur
298 298 l.append(self.data[last_cut:end_cut])
299 299 else:
300 300 while i < len(self.positions) and self.positions[i] < 0:
301 301 cur = self.positions[i]
302 302 t = self.extradata[-cur - 1]
303 303 l.append(self._pack(t))
304 304 self.positions[i] = offset
305 305 if len(t[1]) > 20:
306 306 self.extrainfo[i] = ord(t[1][21])
307 307 offset += len(l[-1])
308 308 i += 1
309 309 self.data = ''.join(l)
310 310 self.extradata = []
311 311
312 312 def _pack(self, d):
313 313 return d[0] + '\x00' + hex(d[1][:20]) + d[2] + '\n'
314 314
315 315 def text(self):
316 316 self._compact()
317 317 return self.data
318 318
319 319 def diff(self, m2, clean=False):
320 320 '''Finds changes between the current manifest and m2.'''
321 321 # XXX think whether efficiency matters here
322 322 diff = {}
323 323
324 324 for fn, e1, flags in self.iterentries():
325 325 if fn not in m2:
326 326 diff[fn] = (e1, flags), (None, '')
327 327 else:
328 328 e2 = m2[fn]
329 329 if (e1, flags) != e2:
330 330 diff[fn] = (e1, flags), e2
331 331 elif clean:
332 332 diff[fn] = None
333 333
334 334 for fn, e2, flags in m2.iterentries():
335 335 if fn not in self:
336 336 diff[fn] = (None, ''), (e2, flags)
337 337
338 338 return diff
339 339
340 340 def iterentries(self):
341 341 return lazymanifestiterentries(self)
342 342
343 343 def iterkeys(self):
344 344 return lazymanifestiter(self)
345 345
346 346 def __iter__(self):
347 347 return lazymanifestiter(self)
348 348
349 349 def __len__(self):
350 350 return len(self.positions)
351 351
352 352 def filtercopy(self, filterfn):
353 353 # XXX should be optimized
354 354 c = _lazymanifest('')
355 355 for f, n, fl in self.iterentries():
356 356 if filterfn(f):
357 357 c[f] = n, fl
358 358 return c
359 359
360 360 try:
361 361 _lazymanifest = parsers.lazymanifest
362 362 except AttributeError:
363 363 pass
364 364
365 365 class manifestdict(object):
366 366 def __init__(self, data=''):
367 367 self._lm = _lazymanifest(data)
368 368
369 369 def __getitem__(self, key):
370 370 return self._lm[key][0]
371 371
372 372 def find(self, key):
373 373 return self._lm[key]
374 374
375 375 def __len__(self):
376 376 return len(self._lm)
377 377
378 378 def __nonzero__(self):
379 379 # nonzero is covered by the __len__ function, but implementing it here
380 380 # makes it easier for extensions to override.
381 381 return len(self._lm) != 0
382 382
383 383 __bool__ = __nonzero__
384 384
385 385 def __setitem__(self, key, node):
386 386 self._lm[key] = node, self.flags(key, '')
387 387
388 388 def __contains__(self, key):
389 389 if key is None:
390 390 return False
391 391 return key in self._lm
392 392
393 393 def __delitem__(self, key):
394 394 del self._lm[key]
395 395
396 396 def __iter__(self):
397 397 return self._lm.__iter__()
398 398
399 399 def iterkeys(self):
400 400 return self._lm.iterkeys()
401 401
402 402 def keys(self):
403 403 return list(self.iterkeys())
404 404
405 405 def filesnotin(self, m2, match=None):
406 406 '''Set of files in this manifest that are not in the other'''
407 407 if match:
408 408 m1 = self.matches(match)
409 409 m2 = m2.matches(match)
410 410 return m1.filesnotin(m2)
411 411 diff = self.diff(m2)
412 412 files = set(filepath
413 413 for filepath, hashflags in diff.iteritems()
414 414 if hashflags[1][0] is None)
415 415 return files
416 416
417 417 @propertycache
418 418 def _dirs(self):
419 419 return util.dirs(self)
420 420
421 421 def dirs(self):
422 422 return self._dirs
423 423
424 424 def hasdir(self, dir):
425 425 return dir in self._dirs
426 426
427 427 def _filesfastpath(self, match):
428 428 '''Checks whether we can correctly and quickly iterate over matcher
429 429 files instead of over manifest files.'''
430 430 files = match.files()
431 431 return (len(files) < 100 and (match.isexact() or
432 432 (match.prefix() and all(fn in self for fn in files))))
433 433
434 434 def walk(self, match):
435 435 '''Generates matching file names.
436 436
437 437 Equivalent to manifest.matches(match).iterkeys(), but without creating
438 438 an entirely new manifest.
439 439
440 440 It also reports nonexistent files by marking them bad with match.bad().
441 441 '''
442 442 if match.always():
443 443 for f in iter(self):
444 444 yield f
445 445 return
446 446
447 447 fset = set(match.files())
448 448
449 449 # avoid the entire walk if we're only looking for specific files
450 450 if self._filesfastpath(match):
451 451 for fn in sorted(fset):
452 452 yield fn
453 453 return
454 454
455 455 for fn in self:
456 456 if fn in fset:
457 457 # specified pattern is the exact name
458 458 fset.remove(fn)
459 459 if match(fn):
460 460 yield fn
461 461
462 462 # for dirstate.walk, files=['.'] means "walk the whole tree".
463 463 # follow that here, too
464 464 fset.discard('.')
465 465
466 466 for fn in sorted(fset):
467 467 if not self.hasdir(fn):
468 468 match.bad(fn, None)
469 469
470 470 def matches(self, match):
471 471 '''generate a new manifest filtered by the match argument'''
472 472 if match.always():
473 473 return self.copy()
474 474
475 475 if self._filesfastpath(match):
476 476 m = manifestdict()
477 477 lm = self._lm
478 478 for fn in match.files():
479 479 if fn in lm:
480 480 m._lm[fn] = lm[fn]
481 481 return m
482 482
483 483 m = manifestdict()
484 484 m._lm = self._lm.filtercopy(match)
485 485 return m
486 486
487 487 def diff(self, m2, match=None, clean=False):
488 488 '''Finds changes between the current manifest and m2.
489 489
490 490 Args:
491 491 m2: the manifest to which this manifest should be compared.
492 492 clean: if true, include files unchanged between these manifests
493 493 with a None value in the returned dictionary.
494 494
495 495 The result is returned as a dict with filename as key and
496 496 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
497 497 nodeid in the current/other manifest and fl1/fl2 is the flag
498 498 in the current/other manifest. Where the file does not exist,
499 499 the nodeid will be None and the flags will be the empty
500 500 string.
501 501 '''
502 502 if match:
503 503 m1 = self.matches(match)
504 504 m2 = m2.matches(match)
505 505 return m1.diff(m2, clean=clean)
506 506 return self._lm.diff(m2._lm, clean)
507 507
508 508 def setflag(self, key, flag):
509 509 self._lm[key] = self[key], flag
510 510
511 511 def get(self, key, default=None):
512 512 try:
513 513 return self._lm[key][0]
514 514 except KeyError:
515 515 return default
516 516
517 517 def flags(self, key, default=''):
518 518 try:
519 519 return self._lm[key][1]
520 520 except KeyError:
521 521 return default
522 522
523 523 def copy(self):
524 524 c = manifestdict()
525 525 c._lm = self._lm.copy()
526 526 return c
527 527
528 528 def items(self):
529 529 return (x[:2] for x in self._lm.iterentries())
530 530
531 531 iteritems = items
532 532
533 533 def iterentries(self):
534 534 return self._lm.iterentries()
535 535
536 536 def text(self):
537 537 # most likely uses native version
538 538 return self._lm.text()
539 539
540 540 def fastdelta(self, base, changes):
541 541 """Given a base manifest text as a bytearray and a list of changes
542 542 relative to that text, compute a delta that can be used by revlog.
543 543 """
544 544 delta = []
545 545 dstart = None
546 546 dend = None
547 547 dline = [""]
548 548 start = 0
549 549 # zero copy representation of base as a buffer
550 550 addbuf = util.buffer(base)
551 551
552 552 changes = list(changes)
553 553 if len(changes) < 1000:
554 554 # start with a readonly loop that finds the offset of
555 555 # each line and creates the deltas
556 556 for f, todelete in changes:
557 557 # bs will either be the index of the item or the insert point
558 558 start, end = _msearch(addbuf, f, start)
559 559 if not todelete:
560 560 h, fl = self._lm[f]
561 561 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
562 562 else:
563 563 if start == end:
564 564 # item we want to delete was not found, error out
565 565 raise AssertionError(
566 566 _("failed to remove %s from manifest") % f)
567 567 l = ""
568 568 if dstart is not None and dstart <= start and dend >= start:
569 569 if dend < end:
570 570 dend = end
571 571 if l:
572 572 dline.append(l)
573 573 else:
574 574 if dstart is not None:
575 575 delta.append([dstart, dend, "".join(dline)])
576 576 dstart = start
577 577 dend = end
578 578 dline = [l]
579 579
580 580 if dstart is not None:
581 581 delta.append([dstart, dend, "".join(dline)])
582 582 # apply the delta to the base, and get a delta for addrevision
583 583 deltatext, arraytext = _addlistdelta(base, delta)
584 584 else:
585 585 # For large changes, it's much cheaper to just build the text and
586 586 # diff it.
587 587 arraytext = bytearray(self.text())
588 588 deltatext = mdiff.textdiff(
589 589 util.buffer(base), util.buffer(arraytext))
590 590
591 591 return arraytext, deltatext
592 592
593 593 def _msearch(m, s, lo=0, hi=None):
594 594 '''return a tuple (start, end) that says where to find s within m.
595 595
596 596 If the string is found m[start:end] are the line containing
597 597 that string. If start == end the string was not found and
598 598 they indicate the proper sorted insertion point.
599 599
600 600 m should be a buffer, a memoryview or a byte string.
601 601 s is a byte string'''
602 602 def advance(i, c):
603 603 while i < lenm and m[i:i + 1] != c:
604 604 i += 1
605 605 return i
606 606 if not s:
607 607 return (lo, lo)
608 608 lenm = len(m)
609 609 if not hi:
610 610 hi = lenm
611 611 while lo < hi:
612 612 mid = (lo + hi) // 2
613 613 start = mid
614 614 while start > 0 and m[start - 1:start] != '\n':
615 615 start -= 1
616 616 end = advance(start, '\0')
617 617 if bytes(m[start:end]) < s:
618 618 # we know that after the null there are 40 bytes of sha1
619 619 # this translates to the bisect lo = mid + 1
620 620 lo = advance(end + 40, '\n') + 1
621 621 else:
622 622 # this translates to the bisect hi = mid
623 623 hi = start
624 624 end = advance(lo, '\0')
625 625 found = m[lo:end]
626 626 if s == found:
627 627 # we know that after the null there are 40 bytes of sha1
628 628 end = advance(end + 40, '\n')
629 629 return (lo, end + 1)
630 630 else:
631 631 return (lo, lo)
632 632
633 633 def _checkforbidden(l):
634 634 """Check filenames for illegal characters."""
635 635 for f in l:
636 636 if '\n' in f or '\r' in f:
637 637 raise error.RevlogError(
638 638 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
639 639
640 640
641 641 # apply the changes collected during the bisect loop to our addlist
642 642 # return a delta suitable for addrevision
643 643 def _addlistdelta(addlist, x):
644 644 # for large addlist arrays, building a new array is cheaper
645 645 # than repeatedly modifying the existing one
646 646 currentposition = 0
647 647 newaddlist = bytearray()
648 648
649 649 for start, end, content in x:
650 650 newaddlist += addlist[currentposition:start]
651 651 if content:
652 652 newaddlist += bytearray(content)
653 653
654 654 currentposition = end
655 655
656 656 newaddlist += addlist[currentposition:]
657 657
658 658 deltatext = "".join(struct.pack(">lll", start, end, len(content))
659 659 + content for start, end, content in x)
660 660 return deltatext, newaddlist
661 661
662 662 def _splittopdir(f):
663 663 if '/' in f:
664 664 dir, subpath = f.split('/', 1)
665 665 return dir + '/', subpath
666 666 else:
667 667 return '', f
668 668
669 669 _noop = lambda s: None
670 670
671 671 class treemanifest(object):
672 672 def __init__(self, dir='', text=''):
673 673 self._dir = dir
674 674 self._node = revlog.nullid
675 675 self._loadfunc = _noop
676 676 self._copyfunc = _noop
677 677 self._dirty = False
678 678 self._dirs = {}
679 679 # Using _lazymanifest here is a little slower than plain old dicts
680 680 self._files = {}
681 681 self._flags = {}
682 682 if text:
683 683 def readsubtree(subdir, subm):
684 684 raise AssertionError('treemanifest constructor only accepts '
685 685 'flat manifests')
686 686 self.parse(text, readsubtree)
687 687 self._dirty = True # Mark flat manifest dirty after parsing
688 688
689 689 def _subpath(self, path):
690 690 return self._dir + path
691 691
692 692 def __len__(self):
693 693 self._load()
694 694 size = len(self._files)
695 695 for m in self._dirs.values():
696 696 size += m.__len__()
697 697 return size
698 698
699 699 def __nonzero__(self):
700 700 # Faster than "__len() != 0" since it avoids loading sub-manifests
701 701 return not self._isempty()
702 702
703 703 __bool__ = __nonzero__
704 704
705 705 def _isempty(self):
706 706 self._load() # for consistency; already loaded by all callers
707 707 return (not self._files and (not self._dirs or
708 708 all(m._isempty() for m in self._dirs.values())))
709 709
710 710 def __repr__(self):
711 711 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
712 712 (self._dir, revlog.hex(self._node),
713 713 bool(self._loadfunc is _noop),
714 714 self._dirty, id(self)))
715 715
716 716 def dir(self):
717 717 '''The directory that this tree manifest represents, including a
718 718 trailing '/'. Empty string for the repo root directory.'''
719 719 return self._dir
720 720
721 721 def node(self):
722 722 '''This node of this instance. nullid for unsaved instances. Should
723 723 be updated when the instance is read or written from a revlog.
724 724 '''
725 725 assert not self._dirty
726 726 return self._node
727 727
728 728 def setnode(self, node):
729 729 self._node = node
730 730 self._dirty = False
731 731
732 732 def iterentries(self):
733 733 self._load()
734 734 for p, n in sorted(itertools.chain(self._dirs.items(),
735 735 self._files.items())):
736 736 if p in self._files:
737 737 yield self._subpath(p), n, self._flags.get(p, '')
738 738 else:
739 739 for x in n.iterentries():
740 740 yield x
741 741
742 742 def items(self):
743 743 self._load()
744 744 for p, n in sorted(itertools.chain(self._dirs.items(),
745 745 self._files.items())):
746 746 if p in self._files:
747 747 yield self._subpath(p), n
748 748 else:
749 749 for f, sn in n.iteritems():
750 750 yield f, sn
751 751
752 752 iteritems = items
753 753
754 754 def iterkeys(self):
755 755 self._load()
756 756 for p in sorted(itertools.chain(self._dirs, self._files)):
757 757 if p in self._files:
758 758 yield self._subpath(p)
759 759 else:
760 760 for f in self._dirs[p]:
761 761 yield f
762 762
763 763 def keys(self):
764 764 return list(self.iterkeys())
765 765
766 766 def __iter__(self):
767 767 return self.iterkeys()
768 768
769 769 def __contains__(self, f):
770 770 if f is None:
771 771 return False
772 772 self._load()
773 773 dir, subpath = _splittopdir(f)
774 774 if dir:
775 775 if dir not in self._dirs:
776 776 return False
777 777 return self._dirs[dir].__contains__(subpath)
778 778 else:
779 779 return f in self._files
780 780
781 781 def get(self, f, default=None):
782 782 self._load()
783 783 dir, subpath = _splittopdir(f)
784 784 if dir:
785 785 if dir not in self._dirs:
786 786 return default
787 787 return self._dirs[dir].get(subpath, default)
788 788 else:
789 789 return self._files.get(f, default)
790 790
791 791 def __getitem__(self, f):
792 792 self._load()
793 793 dir, subpath = _splittopdir(f)
794 794 if dir:
795 795 return self._dirs[dir].__getitem__(subpath)
796 796 else:
797 797 return self._files[f]
798 798
799 799 def flags(self, f):
800 800 self._load()
801 801 dir, subpath = _splittopdir(f)
802 802 if dir:
803 803 if dir not in self._dirs:
804 804 return ''
805 805 return self._dirs[dir].flags(subpath)
806 806 else:
807 807 if f in self._dirs:
808 808 return ''
809 809 return self._flags.get(f, '')
810 810
811 811 def find(self, f):
812 812 self._load()
813 813 dir, subpath = _splittopdir(f)
814 814 if dir:
815 815 return self._dirs[dir].find(subpath)
816 816 else:
817 817 return self._files[f], self._flags.get(f, '')
818 818
819 819 def __delitem__(self, f):
820 820 self._load()
821 821 dir, subpath = _splittopdir(f)
822 822 if dir:
823 823 self._dirs[dir].__delitem__(subpath)
824 824 # If the directory is now empty, remove it
825 825 if self._dirs[dir]._isempty():
826 826 del self._dirs[dir]
827 827 else:
828 828 del self._files[f]
829 829 if f in self._flags:
830 830 del self._flags[f]
831 831 self._dirty = True
832 832
833 833 def __setitem__(self, f, n):
834 834 assert n is not None
835 835 self._load()
836 836 dir, subpath = _splittopdir(f)
837 837 if dir:
838 838 if dir not in self._dirs:
839 839 self._dirs[dir] = treemanifest(self._subpath(dir))
840 840 self._dirs[dir].__setitem__(subpath, n)
841 841 else:
842 842 self._files[f] = n[:21] # to match manifestdict's behavior
843 843 self._dirty = True
844 844
845 845 def _load(self):
846 846 if self._loadfunc is not _noop:
847 847 lf, self._loadfunc = self._loadfunc, _noop
848 848 lf(self)
849 849 elif self._copyfunc is not _noop:
850 850 cf, self._copyfunc = self._copyfunc, _noop
851 851 cf(self)
852 852
853 853 def setflag(self, f, flags):
854 854 """Set the flags (symlink, executable) for path f."""
855 855 self._load()
856 856 dir, subpath = _splittopdir(f)
857 857 if dir:
858 858 if dir not in self._dirs:
859 859 self._dirs[dir] = treemanifest(self._subpath(dir))
860 860 self._dirs[dir].setflag(subpath, flags)
861 861 else:
862 862 self._flags[f] = flags
863 863 self._dirty = True
864 864
865 865 def copy(self):
866 866 copy = treemanifest(self._dir)
867 867 copy._node = self._node
868 868 copy._dirty = self._dirty
869 869 if self._copyfunc is _noop:
870 870 def _copyfunc(s):
871 871 self._load()
872 872 for d in self._dirs:
873 873 s._dirs[d] = self._dirs[d].copy()
874 874 s._files = dict.copy(self._files)
875 875 s._flags = dict.copy(self._flags)
876 876 if self._loadfunc is _noop:
877 877 _copyfunc(copy)
878 878 else:
879 879 copy._copyfunc = _copyfunc
880 880 else:
881 881 copy._copyfunc = self._copyfunc
882 882 return copy
883 883
884 884 def filesnotin(self, m2, match=None):
885 885 '''Set of files in this manifest that are not in the other'''
886 886 if match:
887 887 m1 = self.matches(match)
888 888 m2 = m2.matches(match)
889 889 return m1.filesnotin(m2)
890 890
891 891 files = set()
892 892 def _filesnotin(t1, t2):
893 893 if t1._node == t2._node and not t1._dirty and not t2._dirty:
894 894 return
895 895 t1._load()
896 896 t2._load()
897 897 for d, m1 in t1._dirs.iteritems():
898 898 if d in t2._dirs:
899 899 m2 = t2._dirs[d]
900 900 _filesnotin(m1, m2)
901 901 else:
902 902 files.update(m1.iterkeys())
903 903
904 904 for fn in t1._files:
905 905 if fn not in t2._files:
906 906 files.add(t1._subpath(fn))
907 907
908 908 _filesnotin(self, m2)
909 909 return files
910 910
911 911 @propertycache
912 912 def _alldirs(self):
913 913 return util.dirs(self)
914 914
915 915 def dirs(self):
916 916 return self._alldirs
917 917
918 918 def hasdir(self, dir):
919 919 self._load()
920 920 topdir, subdir = _splittopdir(dir)
921 921 if topdir:
922 922 if topdir in self._dirs:
923 923 return self._dirs[topdir].hasdir(subdir)
924 924 return False
925 925 return (dir + '/') in self._dirs
926 926
927 927 def walk(self, match):
928 928 '''Generates matching file names.
929 929
930 930 Equivalent to manifest.matches(match).iterkeys(), but without creating
931 931 an entirely new manifest.
932 932
933 933 It also reports nonexistent files by marking them bad with match.bad().
934 934 '''
935 935 if match.always():
936 936 for f in iter(self):
937 937 yield f
938 938 return
939 939
940 940 fset = set(match.files())
941 941
942 942 for fn in self._walk(match):
943 943 if fn in fset:
944 944 # specified pattern is the exact name
945 945 fset.remove(fn)
946 946 yield fn
947 947
948 948 # for dirstate.walk, files=['.'] means "walk the whole tree".
949 949 # follow that here, too
950 950 fset.discard('.')
951 951
952 952 for fn in sorted(fset):
953 953 if not self.hasdir(fn):
954 954 match.bad(fn, None)
955 955
956 956 def _walk(self, match):
957 957 '''Recursively generates matching file names for walk().'''
958 958 if not match.visitdir(self._dir[:-1] or '.'):
959 959 return
960 960
961 961 # yield this dir's files and walk its submanifests
962 962 self._load()
963 963 for p in sorted(list(self._dirs) + list(self._files)):
964 964 if p in self._files:
965 965 fullp = self._subpath(p)
966 966 if match(fullp):
967 967 yield fullp
968 968 else:
969 969 for f in self._dirs[p]._walk(match):
970 970 yield f
971 971
972 972 def matches(self, match):
973 973 '''generate a new manifest filtered by the match argument'''
974 974 if match.always():
975 975 return self.copy()
976 976
977 977 return self._matches(match)
978 978
979 979 def _matches(self, match):
980 980 '''recursively generate a new manifest filtered by the match argument.
981 981 '''
982 982
983 983 visit = match.visitdir(self._dir[:-1] or '.')
984 984 if visit == 'all':
985 985 return self.copy()
986 986 ret = treemanifest(self._dir)
987 987 if not visit:
988 988 return ret
989 989
990 990 self._load()
991 991 for fn in self._files:
992 992 fullp = self._subpath(fn)
993 993 if not match(fullp):
994 994 continue
995 995 ret._files[fn] = self._files[fn]
996 996 if fn in self._flags:
997 997 ret._flags[fn] = self._flags[fn]
998 998
999 999 for dir, subm in self._dirs.iteritems():
1000 1000 m = subm._matches(match)
1001 1001 if not m._isempty():
1002 1002 ret._dirs[dir] = m
1003 1003
1004 1004 if not ret._isempty():
1005 1005 ret._dirty = True
1006 1006 return ret
1007 1007
1008 1008 def diff(self, m2, match=None, clean=False):
1009 1009 '''Finds changes between the current manifest and m2.
1010 1010
1011 1011 Args:
1012 1012 m2: the manifest to which this manifest should be compared.
1013 1013 clean: if true, include files unchanged between these manifests
1014 1014 with a None value in the returned dictionary.
1015 1015
1016 1016 The result is returned as a dict with filename as key and
1017 1017 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1018 1018 nodeid in the current/other manifest and fl1/fl2 is the flag
1019 1019 in the current/other manifest. Where the file does not exist,
1020 1020 the nodeid will be None and the flags will be the empty
1021 1021 string.
1022 1022 '''
1023 1023 if match:
1024 1024 m1 = self.matches(match)
1025 1025 m2 = m2.matches(match)
1026 1026 return m1.diff(m2, clean=clean)
1027 1027 result = {}
1028 1028 emptytree = treemanifest()
1029 1029 def _diff(t1, t2):
1030 1030 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1031 1031 return
1032 1032 t1._load()
1033 1033 t2._load()
1034 1034 for d, m1 in t1._dirs.iteritems():
1035 1035 m2 = t2._dirs.get(d, emptytree)
1036 1036 _diff(m1, m2)
1037 1037
1038 1038 for d, m2 in t2._dirs.iteritems():
1039 1039 if d not in t1._dirs:
1040 1040 _diff(emptytree, m2)
1041 1041
1042 1042 for fn, n1 in t1._files.iteritems():
1043 1043 fl1 = t1._flags.get(fn, '')
1044 1044 n2 = t2._files.get(fn, None)
1045 1045 fl2 = t2._flags.get(fn, '')
1046 1046 if n1 != n2 or fl1 != fl2:
1047 1047 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1048 1048 elif clean:
1049 1049 result[t1._subpath(fn)] = None
1050 1050
1051 1051 for fn, n2 in t2._files.iteritems():
1052 1052 if fn not in t1._files:
1053 1053 fl2 = t2._flags.get(fn, '')
1054 1054 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
1055 1055
1056 1056 _diff(self, m2)
1057 1057 return result
1058 1058
1059 1059 def unmodifiedsince(self, m2):
1060 1060 return not self._dirty and not m2._dirty and self._node == m2._node
1061 1061
1062 1062 def parse(self, text, readsubtree):
1063 1063 for f, n, fl in _parse(text):
1064 1064 if fl == 't':
1065 1065 f = f + '/'
1066 1066 self._dirs[f] = readsubtree(self._subpath(f), n)
1067 1067 elif '/' in f:
1068 1068 # This is a flat manifest, so use __setitem__ and setflag rather
1069 1069 # than assigning directly to _files and _flags, so we can
1070 1070 # assign a path in a subdirectory, and to mark dirty (compared
1071 1071 # to nullid).
1072 1072 self[f] = n
1073 1073 if fl:
1074 1074 self.setflag(f, fl)
1075 1075 else:
1076 1076 # Assigning to _files and _flags avoids marking as dirty,
1077 1077 # and should be a little faster.
1078 1078 self._files[f] = n
1079 1079 if fl:
1080 1080 self._flags[f] = fl
1081 1081
1082 1082 def text(self):
1083 1083 """Get the full data of this manifest as a bytestring."""
1084 1084 self._load()
1085 1085 return _text(self.iterentries())
1086 1086
1087 1087 def dirtext(self):
1088 1088 """Get the full data of this directory as a bytestring. Make sure that
1089 1089 any submanifests have been written first, so their nodeids are correct.
1090 1090 """
1091 1091 self._load()
1092 1092 flags = self.flags
1093 1093 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
1094 1094 files = [(f, self._files[f], flags(f)) for f in self._files]
1095 1095 return _text(sorted(dirs + files))
1096 1096
1097 1097 def read(self, gettext, readsubtree):
1098 1098 def _load_for_read(s):
1099 1099 s.parse(gettext(), readsubtree)
1100 1100 s._dirty = False
1101 1101 self._loadfunc = _load_for_read
1102 1102
1103 1103 def writesubtrees(self, m1, m2, writesubtree):
1104 1104 self._load() # for consistency; should never have any effect here
1105 1105 m1._load()
1106 1106 m2._load()
1107 1107 emptytree = treemanifest()
1108 1108 for d, subm in self._dirs.iteritems():
1109 1109 subp1 = m1._dirs.get(d, emptytree)._node
1110 1110 subp2 = m2._dirs.get(d, emptytree)._node
1111 1111 if subp1 == revlog.nullid:
1112 1112 subp1, subp2 = subp2, subp1
1113 1113 writesubtree(subm, subp1, subp2)
1114 1114
1115 1115 def walksubtrees(self, matcher=None):
1116 1116 """Returns an iterator of the subtrees of this manifest, including this
1117 1117 manifest itself.
1118 1118
1119 1119 If `matcher` is provided, it only returns subtrees that match.
1120 1120 """
1121 1121 if matcher and not matcher.visitdir(self._dir[:-1] or '.'):
1122 1122 return
1123 1123 if not matcher or matcher(self._dir[:-1]):
1124 1124 yield self
1125 1125
1126 1126 self._load()
1127 1127 for d, subm in self._dirs.iteritems():
1128 1128 for subtree in subm.walksubtrees(matcher=matcher):
1129 1129 yield subtree
1130 1130
1131 1131 class manifestrevlog(revlog.revlog):
1132 1132 '''A revlog that stores manifest texts. This is responsible for caching the
1133 1133 full-text manifest contents.
1134 1134 '''
1135 1135 def __init__(self, opener, dir='', dirlogcache=None, indexfile=None,
1136 1136 treemanifest=False):
1137 1137 """Constructs a new manifest revlog
1138 1138
1139 1139 `indexfile` - used by extensions to have two manifests at once, like
1140 1140 when transitioning between flatmanifeset and treemanifests.
1141 1141
1142 1142 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1143 1143 options can also be used to make this a tree manifest revlog. The opener
1144 1144 option takes precedence, so if it is set to True, we ignore whatever
1145 1145 value is passed in to the constructor.
1146 1146 """
1147 1147 # During normal operations, we expect to deal with not more than four
1148 1148 # revs at a time (such as during commit --amend). When rebasing large
1149 1149 # stacks of commits, the number can go up, hence the config knob below.
1150 1150 cachesize = 4
1151 1151 optiontreemanifest = False
1152 1152 opts = getattr(opener, 'options', None)
1153 1153 if opts is not None:
1154 1154 cachesize = opts.get('manifestcachesize', cachesize)
1155 1155 optiontreemanifest = opts.get('treemanifest', False)
1156 1156
1157 1157 self._treeondisk = optiontreemanifest or treemanifest
1158 1158
1159 1159 self._fulltextcache = util.lrucachedict(cachesize)
1160 1160
1161 1161 if dir:
1162 1162 assert self._treeondisk, 'opts is %r' % opts
1163 1163
1164 1164 if indexfile is None:
1165 1165 indexfile = '00manifest.i'
1166 1166 if dir:
1167 1167 indexfile = "meta/" + dir + indexfile
1168 1168
1169 1169 self._dir = dir
1170 1170 # The dirlogcache is kept on the root manifest log
1171 1171 if dir:
1172 1172 self._dirlogcache = dirlogcache
1173 1173 else:
1174 1174 self._dirlogcache = {'': self}
1175 1175
1176 1176 super(manifestrevlog, self).__init__(opener, indexfile,
1177 1177 # only root indexfile is cached
1178 1178 checkambig=not bool(dir),
1179 1179 mmaplargeindex=True)
1180 1180
1181 1181 @property
1182 1182 def fulltextcache(self):
1183 1183 return self._fulltextcache
1184 1184
1185 1185 def clearcaches(self):
1186 1186 super(manifestrevlog, self).clearcaches()
1187 1187 self._fulltextcache.clear()
1188 1188 self._dirlogcache = {'': self}
1189 1189
1190 1190 def dirlog(self, d):
1191 1191 if d:
1192 1192 assert self._treeondisk
1193 1193 if d not in self._dirlogcache:
1194 1194 mfrevlog = manifestrevlog(self.opener, d,
1195 1195 self._dirlogcache,
1196 1196 treemanifest=self._treeondisk)
1197 1197 self._dirlogcache[d] = mfrevlog
1198 1198 return self._dirlogcache[d]
1199 1199
1200 1200 def add(self, m, transaction, link, p1, p2, added, removed, readtree=None):
1201 1201 if p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta'):
1202 1202 # If our first parent is in the manifest cache, we can
1203 1203 # compute a delta here using properties we know about the
1204 1204 # manifest up-front, which may save time later for the
1205 1205 # revlog layer.
1206 1206
1207 1207 _checkforbidden(added)
1208 1208 # combine the changed lists into one sorted iterator
1209 1209 work = heapq.merge([(x, False) for x in added],
1210 1210 [(x, True) for x in removed])
1211 1211
1212 1212 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1213 1213 cachedelta = self.rev(p1), deltatext
1214 1214 text = util.buffer(arraytext)
1215 1215 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
1216 1216 else:
1217 1217 # The first parent manifest isn't already loaded, so we'll
1218 1218 # just encode a fulltext of the manifest and pass that
1219 1219 # through to the revlog layer, and let it handle the delta
1220 1220 # process.
1221 1221 if self._treeondisk:
1222 1222 assert readtree, "readtree must be set for treemanifest writes"
1223 1223 m1 = readtree(self._dir, p1)
1224 1224 m2 = readtree(self._dir, p2)
1225 1225 n = self._addtree(m, transaction, link, m1, m2, readtree)
1226 1226 arraytext = None
1227 1227 else:
1228 1228 text = m.text()
1229 1229 n = self.addrevision(text, transaction, link, p1, p2)
1230 1230 arraytext = bytearray(text)
1231 1231
1232 1232 if arraytext is not None:
1233 1233 self.fulltextcache[n] = arraytext
1234 1234
1235 1235 return n
1236 1236
1237 1237 def _addtree(self, m, transaction, link, m1, m2, readtree):
1238 1238 # If the manifest is unchanged compared to one parent,
1239 1239 # don't write a new revision
1240 1240 if self._dir != '' and (m.unmodifiedsince(m1) or m.unmodifiedsince(m2)):
1241 1241 return m.node()
1242 1242 def writesubtree(subm, subp1, subp2):
1243 1243 sublog = self.dirlog(subm.dir())
1244 1244 sublog.add(subm, transaction, link, subp1, subp2, None, None,
1245 1245 readtree=readtree)
1246 1246 m.writesubtrees(m1, m2, writesubtree)
1247 1247 text = m.dirtext()
1248 1248 n = None
1249 1249 if self._dir != '':
1250 1250 # Double-check whether contents are unchanged to one parent
1251 1251 if text == m1.dirtext():
1252 1252 n = m1.node()
1253 1253 elif text == m2.dirtext():
1254 1254 n = m2.node()
1255 1255
1256 1256 if not n:
1257 1257 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
1258 1258
1259 1259 # Save nodeid so parent manifest can calculate its nodeid
1260 1260 m.setnode(n)
1261 1261 return n
1262 1262
1263 1263 class manifestlog(object):
1264 1264 """A collection class representing the collection of manifest snapshots
1265 1265 referenced by commits in the repository.
1266 1266
1267 1267 In this situation, 'manifest' refers to the abstract concept of a snapshot
1268 1268 of the list of files in the given commit. Consumers of the output of this
1269 1269 class do not care about the implementation details of the actual manifests
1270 1270 they receive (i.e. tree or flat or lazily loaded, etc)."""
1271 1271 def __init__(self, opener, repo):
1272 1272 usetreemanifest = False
1273 1273 cachesize = 4
1274 1274
1275 1275 opts = getattr(opener, 'options', None)
1276 1276 if opts is not None:
1277 1277 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1278 1278 cachesize = opts.get('manifestcachesize', cachesize)
1279 1279 self._treeinmem = usetreemanifest
1280 1280
1281 1281 self._revlog = repo._constructmanifest()
1282 1282 self._narrowmatch = repo.narrowmatch()
1283 1283
1284 1284 # A cache of the manifestctx or treemanifestctx for each directory
1285 1285 self._dirmancache = {}
1286 1286 self._dirmancache[''] = util.lrucachedict(cachesize)
1287 1287
1288 1288 self.cachesize = cachesize
1289 1289
1290 1290 def __getitem__(self, node):
1291 1291 """Retrieves the manifest instance for the given node. Throws a
1292 1292 LookupError if not found.
1293 1293 """
1294 1294 return self.get('', node)
1295 1295
1296 1296 def get(self, dir, node, verify=True):
1297 1297 """Retrieves the manifest instance for the given node. Throws a
1298 1298 LookupError if not found.
1299 1299
1300 1300 `verify` - if True an exception will be thrown if the node is not in
1301 1301 the revlog
1302 1302 """
1303 1303 if node in self._dirmancache.get(dir, ()):
1304 1304 return self._dirmancache[dir][node]
1305 1305
1306 if not self._narrowmatch.always():
1307 if not self._narrowmatch.visitdir(dir[:-1] or '.'):
1308 return excludeddirmanifestctx(dir, node)
1306 1309 if dir:
1307 1310 if self._revlog._treeondisk:
1308 1311 if verify:
1309 1312 dirlog = self._revlog.dirlog(dir)
1310 1313 if node not in dirlog.nodemap:
1311 1314 raise LookupError(node, dirlog.indexfile,
1312 1315 _('no node'))
1313 1316 m = treemanifestctx(self, dir, node)
1314 1317 else:
1315 1318 raise error.Abort(
1316 1319 _("cannot ask for manifest directory '%s' in a flat "
1317 1320 "manifest") % dir)
1318 1321 else:
1319 1322 if verify:
1320 1323 if node not in self._revlog.nodemap:
1321 1324 raise LookupError(node, self._revlog.indexfile,
1322 1325 _('no node'))
1323 1326 if self._treeinmem:
1324 1327 m = treemanifestctx(self, '', node)
1325 1328 else:
1326 1329 m = manifestctx(self, node)
1327 1330
1328 1331 if node != revlog.nullid:
1329 1332 mancache = self._dirmancache.get(dir)
1330 1333 if not mancache:
1331 1334 mancache = util.lrucachedict(self.cachesize)
1332 1335 self._dirmancache[dir] = mancache
1333 1336 mancache[node] = m
1334 1337 return m
1335 1338
1336 1339 def clearcaches(self):
1337 1340 self._dirmancache.clear()
1338 1341 self._revlog.clearcaches()
1339 1342
1340 1343 class memmanifestctx(object):
1341 1344 def __init__(self, manifestlog):
1342 1345 self._manifestlog = manifestlog
1343 1346 self._manifestdict = manifestdict()
1344 1347
1345 1348 def _revlog(self):
1346 1349 return self._manifestlog._revlog
1347 1350
1348 1351 def new(self):
1349 1352 return memmanifestctx(self._manifestlog)
1350 1353
1351 1354 def copy(self):
1352 1355 memmf = memmanifestctx(self._manifestlog)
1353 1356 memmf._manifestdict = self.read().copy()
1354 1357 return memmf
1355 1358
1356 1359 def read(self):
1357 1360 return self._manifestdict
1358 1361
1359 1362 def write(self, transaction, link, p1, p2, added, removed):
1360 1363 return self._revlog().add(self._manifestdict, transaction, link, p1, p2,
1361 1364 added, removed)
1362 1365
1363 1366 class manifestctx(object):
1364 1367 """A class representing a single revision of a manifest, including its
1365 1368 contents, its parent revs, and its linkrev.
1366 1369 """
1367 1370 def __init__(self, manifestlog, node):
1368 1371 self._manifestlog = manifestlog
1369 1372 self._data = None
1370 1373
1371 1374 self._node = node
1372 1375
1373 1376 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1374 1377 # but let's add it later when something needs it and we can load it
1375 1378 # lazily.
1376 1379 #self.p1, self.p2 = revlog.parents(node)
1377 1380 #rev = revlog.rev(node)
1378 1381 #self.linkrev = revlog.linkrev(rev)
1379 1382
1380 1383 def _revlog(self):
1381 1384 return self._manifestlog._revlog
1382 1385
1383 1386 def node(self):
1384 1387 return self._node
1385 1388
1386 1389 def new(self):
1387 1390 return memmanifestctx(self._manifestlog)
1388 1391
1389 1392 def copy(self):
1390 1393 memmf = memmanifestctx(self._manifestlog)
1391 1394 memmf._manifestdict = self.read().copy()
1392 1395 return memmf
1393 1396
1394 1397 @propertycache
1395 1398 def parents(self):
1396 1399 return self._revlog().parents(self._node)
1397 1400
1398 1401 def read(self):
1399 1402 if self._data is None:
1400 1403 if self._node == revlog.nullid:
1401 1404 self._data = manifestdict()
1402 1405 else:
1403 1406 rl = self._revlog()
1404 1407 text = rl.revision(self._node)
1405 1408 arraytext = bytearray(text)
1406 1409 rl._fulltextcache[self._node] = arraytext
1407 1410 self._data = manifestdict(text)
1408 1411 return self._data
1409 1412
1410 1413 def readfast(self, shallow=False):
1411 1414 '''Calls either readdelta or read, based on which would be less work.
1412 1415 readdelta is called if the delta is against the p1, and therefore can be
1413 1416 read quickly.
1414 1417
1415 1418 If `shallow` is True, nothing changes since this is a flat manifest.
1416 1419 '''
1417 1420 rl = self._revlog()
1418 1421 r = rl.rev(self._node)
1419 1422 deltaparent = rl.deltaparent(r)
1420 1423 if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
1421 1424 return self.readdelta()
1422 1425 return self.read()
1423 1426
1424 1427 def readdelta(self, shallow=False):
1425 1428 '''Returns a manifest containing just the entries that are present
1426 1429 in this manifest, but not in its p1 manifest. This is efficient to read
1427 1430 if the revlog delta is already p1.
1428 1431
1429 1432 Changing the value of `shallow` has no effect on flat manifests.
1430 1433 '''
1431 1434 revlog = self._revlog()
1432 1435 r = revlog.rev(self._node)
1433 1436 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1434 1437 return manifestdict(d)
1435 1438
1436 1439 def find(self, key):
1437 1440 return self.read().find(key)
1438 1441
1439 1442 class memtreemanifestctx(object):
1440 1443 def __init__(self, manifestlog, dir=''):
1441 1444 self._manifestlog = manifestlog
1442 1445 self._dir = dir
1443 1446 self._treemanifest = treemanifest()
1444 1447
1445 1448 def _revlog(self):
1446 1449 return self._manifestlog._revlog
1447 1450
1448 1451 def new(self, dir=''):
1449 1452 return memtreemanifestctx(self._manifestlog, dir=dir)
1450 1453
1451 1454 def copy(self):
1452 1455 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1453 1456 memmf._treemanifest = self._treemanifest.copy()
1454 1457 return memmf
1455 1458
1456 1459 def read(self):
1457 1460 return self._treemanifest
1458 1461
1459 1462 def write(self, transaction, link, p1, p2, added, removed):
1460 1463 def readtree(dir, node):
1461 1464 return self._manifestlog.get(dir, node).read()
1462 1465 return self._revlog().add(self._treemanifest, transaction, link, p1, p2,
1463 1466 added, removed, readtree=readtree)
1464 1467
1465 1468 class treemanifestctx(object):
1466 1469 def __init__(self, manifestlog, dir, node):
1467 1470 self._manifestlog = manifestlog
1468 1471 self._dir = dir
1469 1472 self._data = None
1470 1473
1471 1474 self._node = node
1472 1475
1473 1476 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
1474 1477 # we can instantiate treemanifestctx objects for directories we don't
1475 1478 # have on disk.
1476 1479 #self.p1, self.p2 = revlog.parents(node)
1477 1480 #rev = revlog.rev(node)
1478 1481 #self.linkrev = revlog.linkrev(rev)
1479 1482
1480 1483 def _revlog(self):
1481 1484 narrowmatch = self._manifestlog._narrowmatch
1482 1485 if not narrowmatch.always():
1483 1486 if not narrowmatch.visitdir(self._dir[:-1] or '.'):
1484 1487 return excludedmanifestrevlog(self._dir)
1485 1488 return self._manifestlog._revlog.dirlog(self._dir)
1486 1489
1487 1490 def read(self):
1488 1491 if self._data is None:
1489 1492 rl = self._revlog()
1490 1493 if self._node == revlog.nullid:
1491 1494 self._data = treemanifest()
1492 1495 elif rl._treeondisk:
1493 1496 m = treemanifest(dir=self._dir)
1494 1497 def gettext():
1495 1498 return rl.revision(self._node)
1496 1499 def readsubtree(dir, subm):
1497 1500 # Set verify to False since we need to be able to create
1498 1501 # subtrees for trees that don't exist on disk.
1499 1502 return self._manifestlog.get(dir, subm, verify=False).read()
1500 1503 m.read(gettext, readsubtree)
1501 1504 m.setnode(self._node)
1502 1505 self._data = m
1503 1506 else:
1504 1507 text = rl.revision(self._node)
1505 1508 arraytext = bytearray(text)
1506 1509 rl.fulltextcache[self._node] = arraytext
1507 1510 self._data = treemanifest(dir=self._dir, text=text)
1508 1511
1509 1512 return self._data
1510 1513
1511 1514 def node(self):
1512 1515 return self._node
1513 1516
1514 1517 def new(self, dir=''):
1515 1518 return memtreemanifestctx(self._manifestlog, dir=dir)
1516 1519
1517 1520 def copy(self):
1518 1521 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1519 1522 memmf._treemanifest = self.read().copy()
1520 1523 return memmf
1521 1524
1522 1525 @propertycache
1523 1526 def parents(self):
1524 1527 return self._revlog().parents(self._node)
1525 1528
1526 1529 def readdelta(self, shallow=False):
1527 1530 '''Returns a manifest containing just the entries that are present
1528 1531 in this manifest, but not in its p1 manifest. This is efficient to read
1529 1532 if the revlog delta is already p1.
1530 1533
1531 1534 If `shallow` is True, this will read the delta for this directory,
1532 1535 without recursively reading subdirectory manifests. Instead, any
1533 1536 subdirectory entry will be reported as it appears in the manifest, i.e.
1534 1537 the subdirectory will be reported among files and distinguished only by
1535 1538 its 't' flag.
1536 1539 '''
1537 1540 revlog = self._revlog()
1538 1541 if shallow:
1539 1542 r = revlog.rev(self._node)
1540 1543 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1541 1544 return manifestdict(d)
1542 1545 else:
1543 1546 # Need to perform a slow delta
1544 1547 r0 = revlog.deltaparent(revlog.rev(self._node))
1545 1548 m0 = self._manifestlog.get(self._dir, revlog.node(r0)).read()
1546 1549 m1 = self.read()
1547 1550 md = treemanifest(dir=self._dir)
1548 1551 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1549 1552 if n1:
1550 1553 md[f] = n1
1551 1554 if fl1:
1552 1555 md.setflag(f, fl1)
1553 1556 return md
1554 1557
1555 1558 def readfast(self, shallow=False):
1556 1559 '''Calls either readdelta or read, based on which would be less work.
1557 1560 readdelta is called if the delta is against the p1, and therefore can be
1558 1561 read quickly.
1559 1562
1560 1563 If `shallow` is True, it only returns the entries from this manifest,
1561 1564 and not any submanifests.
1562 1565 '''
1563 1566 rl = self._revlog()
1564 1567 r = rl.rev(self._node)
1565 1568 deltaparent = rl.deltaparent(r)
1566 1569 if (deltaparent != revlog.nullrev and
1567 1570 deltaparent in rl.parentrevs(r)):
1568 1571 return self.readdelta(shallow=shallow)
1569 1572
1570 1573 if shallow:
1571 1574 return manifestdict(rl.revision(self._node))
1572 1575 else:
1573 1576 return self.read()
1574 1577
1575 1578 def find(self, key):
1576 1579 return self.read().find(key)
1577 1580
1578 1581 class excludeddir(treemanifest):
1579 1582 """Stand-in for a directory that is excluded from the repository.
1580 1583
1581 1584 With narrowing active on a repository that uses treemanifests,
1582 1585 some of the directory revlogs will be excluded from the resulting
1583 1586 clone. This is a huge storage win for clients, but means we need
1584 1587 some sort of pseudo-manifest to surface to internals so we can
1585 1588 detect a merge conflict outside the narrowspec. That's what this
1586 1589 class is: it stands in for a directory whose node is known, but
1587 1590 whose contents are unknown.
1588 1591 """
1589 1592 def __init__(self, dir, node):
1590 1593 super(excludeddir, self).__init__(dir)
1591 1594 self._node = node
1592 1595 # Add an empty file, which will be included by iterators and such,
1593 1596 # appearing as the directory itself (i.e. something like "dir/")
1594 1597 self._files[''] = node
1595 1598 self._flags[''] = 't'
1596 1599
1597 1600 # Manifests outside the narrowspec should never be modified, so avoid
1598 1601 # copying. This makes a noticeable difference when there are very many
1599 1602 # directories outside the narrowspec. Also, it makes sense for the copy to
1600 1603 # be of the same type as the original, which would not happen with the
1601 1604 # super type's copy().
1602 1605 def copy(self):
1603 1606 return self
1604 1607
1605 1608 class excludeddirmanifestctx(treemanifestctx):
1606 1609 """context wrapper for excludeddir - see that docstring for rationale"""
1607 1610 def __init__(self, dir, node):
1608 1611 self._dir = dir
1609 1612 self._node = node
1610 1613
1611 1614 def read(self):
1612 1615 return excludeddir(self._dir, self._node)
1613 1616
1614 1617 def write(self, *args):
1615 1618 raise error.ProgrammingError(
1616 1619 'attempt to write manifest from excluded dir %s' % self._dir)
1617 1620
1618 1621 class excludedmanifestrevlog(manifestrevlog):
1619 1622 """Stand-in for excluded treemanifest revlogs.
1620 1623
1621 1624 When narrowing is active on a treemanifest repository, we'll have
1622 1625 references to directories we can't see due to the revlog being
1623 1626 skipped. This class exists to conform to the manifestrevlog
1624 1627 interface for those directories and proactively prevent writes to
1625 1628 outside the narrowspec.
1626 1629 """
1627 1630
1628 1631 def __init__(self, dir):
1629 1632 self._dir = dir
1630 1633
1631 1634 def __len__(self):
1632 1635 raise error.ProgrammingError(
1633 1636 'attempt to get length of excluded dir %s' % self._dir)
1634 1637
1635 1638 def rev(self, node):
1636 1639 raise error.ProgrammingError(
1637 1640 'attempt to get rev from excluded dir %s' % self._dir)
1638 1641
1639 1642 def linkrev(self, node):
1640 1643 raise error.ProgrammingError(
1641 1644 'attempt to get linkrev from excluded dir %s' % self._dir)
1642 1645
1643 1646 def node(self, rev):
1644 1647 raise error.ProgrammingError(
1645 1648 'attempt to get node from excluded dir %s' % self._dir)
1646 1649
1647 1650 def add(self, *args, **kwargs):
1648 1651 # We should never write entries in dirlogs outside the narrow clone.
1649 1652 # However, the method still gets called from writesubtree() in
1650 1653 # _addtree(), so we need to handle it. We should possibly make that
1651 1654 # avoid calling add() with a clean manifest (_dirty is always False
1652 1655 # in excludeddir instances).
1653 1656 pass
General Comments 0
You need to be logged in to leave comments. Login now