##// END OF EJS Templates
narrow: move excludeddir and related classes to core...
Martin von Zweigbergk -
r37390:1b2fa531 default
parent child Browse files
Show More
@@ -1,185 +1,107 b''
1 1 # narrowrevlog.py - revlog storing irrelevant nodes as "ellipsis" nodes
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from mercurial import (
11 error,
12 11 manifest,
13 12 revlog,
14 13 util,
15 14 )
16 15
17 16 def readtransform(self, text):
18 17 return text, False
19 18
20 19 def writetransform(self, text):
21 20 return text, False
22 21
23 22 def rawtransform(self, text):
24 23 return False
25 24
26 25 revlog.addflagprocessor(revlog.REVIDX_ELLIPSIS,
27 26 (readtransform, writetransform, rawtransform))
28 27
29 28 def setup():
30 29 # We just wanted to add the flag processor, which is done at module
31 30 # load time.
32 31 pass
33 32
34 class excludeddir(manifest.treemanifest):
35 """Stand-in for a directory that is excluded from the repository.
36
37 With narrowing active on a repository that uses treemanifests,
38 some of the directory revlogs will be excluded from the resulting
39 clone. This is a huge storage win for clients, but means we need
40 some sort of pseudo-manifest to surface to internals so we can
41 detect a merge conflict outside the narrowspec. That's what this
42 class is: it stands in for a directory whose node is known, but
43 whose contents are unknown.
44 """
45 def __init__(self, dir, node):
46 super(excludeddir, self).__init__(dir)
47 self._node = node
48 # Add an empty file, which will be included by iterators and such,
49 # appearing as the directory itself (i.e. something like "dir/")
50 self._files[''] = node
51 self._flags[''] = 't'
52
53 # Manifests outside the narrowspec should never be modified, so avoid
54 # copying. This makes a noticeable difference when there are very many
55 # directories outside the narrowspec. Also, it makes sense for the copy to
56 # be of the same type as the original, which would not happen with the
57 # super type's copy().
58 def copy(self):
59 return self
60
61 class excludeddirmanifestctx(manifest.treemanifestctx):
62 """context wrapper for excludeddir - see that docstring for rationale"""
63 def __init__(self, dir, node):
64 self._dir = dir
65 self._node = node
66
67 def read(self):
68 return excludeddir(self._dir, self._node)
69
70 def write(self, *args):
71 raise error.ProgrammingError(
72 'attempt to write manifest from excluded dir %s' % self._dir)
73
74 class excludedmanifestrevlog(manifest.manifestrevlog):
75 """Stand-in for excluded treemanifest revlogs.
76
77 When narrowing is active on a treemanifest repository, we'll have
78 references to directories we can't see due to the revlog being
79 skipped. This class exists to conform to the manifestrevlog
80 interface for those directories and proactively prevent writes to
81 outside the narrowspec.
82 """
83
84 def __init__(self, dir):
85 self._dir = dir
86
87 def __len__(self):
88 raise error.ProgrammingError(
89 'attempt to get length of excluded dir %s' % self._dir)
90
91 def rev(self, node):
92 raise error.ProgrammingError(
93 'attempt to get rev from excluded dir %s' % self._dir)
94
95 def linkrev(self, node):
96 raise error.ProgrammingError(
97 'attempt to get linkrev from excluded dir %s' % self._dir)
98
99 def node(self, rev):
100 raise error.ProgrammingError(
101 'attempt to get node from excluded dir %s' % self._dir)
102
103 def add(self, *args, **kwargs):
104 # We should never write entries in dirlogs outside the narrow clone.
105 # However, the method still gets called from writesubtree() in
106 # _addtree(), so we need to handle it. We should possibly make that
107 # avoid calling add() with a clean manifest (_dirty is always False
108 # in excludeddir instances).
109 pass
110
111 33 def makenarrowmanifestrevlog(mfrevlog, repo):
112 34 if util.safehasattr(mfrevlog, '_narrowed'):
113 35 return
114 36
115 37 class narrowmanifestrevlog(mfrevlog.__class__):
116 38 # This function is called via debug{revlog,index,data}, but also during
117 39 # at least some push operations. This will be used to wrap/exclude the
118 40 # child directories when using treemanifests.
119 41 def dirlog(self, d):
120 42 if not repo.narrowmatch().visitdir(d[:-1] or '.'):
121 return excludedmanifestrevlog(d)
43 return manifest.excludedmanifestrevlog(d)
122 44 result = super(narrowmanifestrevlog, self).dirlog(d)
123 45 makenarrowmanifestrevlog(result, repo)
124 46 return result
125 47
126 48 mfrevlog.__class__ = narrowmanifestrevlog
127 49 mfrevlog._narrowed = True
128 50
129 51 def makenarrowmanifestlog(mfl, repo):
130 52 class narrowmanifestlog(mfl.__class__):
131 53 def get(self, dir, node, verify=True):
132 54 if not repo.narrowmatch().visitdir(dir[:-1] or '.'):
133 return excludeddirmanifestctx(dir, node)
55 return manifest.excludeddirmanifestctx(dir, node)
134 56 return super(narrowmanifestlog, self).get(dir, node, verify=verify)
135 57 mfl.__class__ = narrowmanifestlog
136 58
137 59 def makenarrowfilelog(fl, narrowmatch):
138 60 class narrowfilelog(fl.__class__):
139 61 def renamed(self, node):
140 62 # Renames that come from outside the narrowspec are
141 63 # problematic at least for git-diffs, because we lack the
142 64 # base text for the rename. This logic was introduced in
143 65 # 3cd72b1 of narrowhg (authored by martinvonz, reviewed by
144 66 # adgar), but that revision doesn't have any additional
145 67 # commentary on what problems we can encounter.
146 68 m = super(narrowfilelog, self).renamed(node)
147 69 if m and not narrowmatch(m[0]):
148 70 return None
149 71 return m
150 72
151 73 def size(self, rev):
152 74 # We take advantage of the fact that remotefilelog
153 75 # lacks a node() method to just skip the
154 76 # rename-checking logic when on remotefilelog. This
155 77 # might be incorrect on other non-revlog-based storage
156 78 # engines, but for now this seems to be fine.
157 79 #
158 80 # TODO: when remotefilelog is in core, improve this to
159 81 # explicitly look for remotefilelog instead of cheating
160 82 # with a hasattr check.
161 83 if util.safehasattr(self, 'node'):
162 84 node = self.node(rev)
163 85 # Because renamed() is overridden above to
164 86 # sometimes return None even if there is metadata
165 87 # in the revlog, size can be incorrect for
166 88 # copies/renames, so we need to make sure we call
167 89 # the super class's implementation of renamed()
168 90 # for the purpose of size calculation.
169 91 if super(narrowfilelog, self).renamed(node):
170 92 return len(self.read(node))
171 93 return super(narrowfilelog, self).size(rev)
172 94
173 95 def cmp(self, node, text):
174 96 different = super(narrowfilelog, self).cmp(node, text)
175 97 if different:
176 98 # Similar to size() above, if the file was copied from
177 99 # a file outside the narrowspec, the super class's
178 100 # would have returned True because we tricked it into
179 101 # thinking that the file was not renamed.
180 102 if super(narrowfilelog, self).renamed(node):
181 103 t2 = self.read(node)
182 104 return t2 != text
183 105 return different
184 106
185 107 fl.__class__ = narrowfilelog
@@ -1,1571 +1,1648 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 bin,
17 17 hex,
18 18 )
19 19 from . import (
20 20 error,
21 21 mdiff,
22 22 policy,
23 23 revlog,
24 24 util,
25 25 )
26 26
27 27 parsers = policy.importmod(r'parsers')
28 28 propertycache = util.propertycache
29 29
30 30 def _parse(data):
31 31 # This method does a little bit of excessive-looking
32 32 # precondition checking. This is so that the behavior of this
33 33 # class exactly matches its C counterpart to try and help
34 34 # prevent surprise breakage for anyone that develops against
35 35 # the pure version.
36 36 if data and data[-1:] != '\n':
37 37 raise ValueError('Manifest did not end in a newline.')
38 38 prev = None
39 39 for l in data.splitlines():
40 40 if prev is not None and prev > l:
41 41 raise ValueError('Manifest lines not in sorted order.')
42 42 prev = l
43 43 f, n = l.split('\0')
44 44 if len(n) > 40:
45 45 yield f, bin(n[:40]), n[40:]
46 46 else:
47 47 yield f, bin(n), ''
48 48
49 49 def _text(it):
50 50 files = []
51 51 lines = []
52 52 _hex = revlog.hex
53 53 for f, n, fl in it:
54 54 files.append(f)
55 55 # if this is changed to support newlines in filenames,
56 56 # be sure to check the templates/ dir again (especially *-raw.tmpl)
57 57 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
58 58
59 59 _checkforbidden(files)
60 60 return ''.join(lines)
61 61
62 62 class lazymanifestiter(object):
63 63 def __init__(self, lm):
64 64 self.pos = 0
65 65 self.lm = lm
66 66
67 67 def __iter__(self):
68 68 return self
69 69
70 70 def next(self):
71 71 try:
72 72 data, pos = self.lm._get(self.pos)
73 73 except IndexError:
74 74 raise StopIteration
75 75 if pos == -1:
76 76 self.pos += 1
77 77 return data[0]
78 78 self.pos += 1
79 79 zeropos = data.find('\x00', pos)
80 80 return data[pos:zeropos]
81 81
82 82 __next__ = next
83 83
84 84 class lazymanifestiterentries(object):
85 85 def __init__(self, lm):
86 86 self.lm = lm
87 87 self.pos = 0
88 88
89 89 def __iter__(self):
90 90 return self
91 91
92 92 def next(self):
93 93 try:
94 94 data, pos = self.lm._get(self.pos)
95 95 except IndexError:
96 96 raise StopIteration
97 97 if pos == -1:
98 98 self.pos += 1
99 99 return data
100 100 zeropos = data.find('\x00', pos)
101 101 hashval = unhexlify(data, self.lm.extrainfo[self.pos],
102 102 zeropos + 1, 40)
103 103 flags = self.lm._getflags(data, self.pos, zeropos)
104 104 self.pos += 1
105 105 return (data[pos:zeropos], hashval, flags)
106 106
107 107 __next__ = next
108 108
109 109 def unhexlify(data, extra, pos, length):
110 110 s = bin(data[pos:pos + length])
111 111 if extra:
112 112 s += chr(extra & 0xff)
113 113 return s
114 114
115 115 def _cmp(a, b):
116 116 return (a > b) - (a < b)
117 117
118 118 class _lazymanifest(object):
119 119 def __init__(self, data, positions=None, extrainfo=None, extradata=None):
120 120 if positions is None:
121 121 self.positions = self.findlines(data)
122 122 self.extrainfo = [0] * len(self.positions)
123 123 self.data = data
124 124 self.extradata = []
125 125 else:
126 126 self.positions = positions[:]
127 127 self.extrainfo = extrainfo[:]
128 128 self.extradata = extradata[:]
129 129 self.data = data
130 130
131 131 def findlines(self, data):
132 132 if not data:
133 133 return []
134 134 pos = data.find("\n")
135 135 if pos == -1 or data[-1:] != '\n':
136 136 raise ValueError("Manifest did not end in a newline.")
137 137 positions = [0]
138 138 prev = data[:data.find('\x00')]
139 139 while pos < len(data) - 1 and pos != -1:
140 140 positions.append(pos + 1)
141 141 nexts = data[pos + 1:data.find('\x00', pos + 1)]
142 142 if nexts < prev:
143 143 raise ValueError("Manifest lines not in sorted order.")
144 144 prev = nexts
145 145 pos = data.find("\n", pos + 1)
146 146 return positions
147 147
148 148 def _get(self, index):
149 149 # get the position encoded in pos:
150 150 # positive number is an index in 'data'
151 151 # negative number is in extrapieces
152 152 pos = self.positions[index]
153 153 if pos >= 0:
154 154 return self.data, pos
155 155 return self.extradata[-pos - 1], -1
156 156
157 157 def _getkey(self, pos):
158 158 if pos >= 0:
159 159 return self.data[pos:self.data.find('\x00', pos + 1)]
160 160 return self.extradata[-pos - 1][0]
161 161
162 162 def bsearch(self, key):
163 163 first = 0
164 164 last = len(self.positions) - 1
165 165
166 166 while first <= last:
167 167 midpoint = (first + last)//2
168 168 nextpos = self.positions[midpoint]
169 169 candidate = self._getkey(nextpos)
170 170 r = _cmp(key, candidate)
171 171 if r == 0:
172 172 return midpoint
173 173 else:
174 174 if r < 0:
175 175 last = midpoint - 1
176 176 else:
177 177 first = midpoint + 1
178 178 return -1
179 179
180 180 def bsearch2(self, key):
181 181 # same as the above, but will always return the position
182 182 # done for performance reasons
183 183 first = 0
184 184 last = len(self.positions) - 1
185 185
186 186 while first <= last:
187 187 midpoint = (first + last)//2
188 188 nextpos = self.positions[midpoint]
189 189 candidate = self._getkey(nextpos)
190 190 r = _cmp(key, candidate)
191 191 if r == 0:
192 192 return (midpoint, True)
193 193 else:
194 194 if r < 0:
195 195 last = midpoint - 1
196 196 else:
197 197 first = midpoint + 1
198 198 return (first, False)
199 199
200 200 def __contains__(self, key):
201 201 return self.bsearch(key) != -1
202 202
203 203 def _getflags(self, data, needle, pos):
204 204 start = pos + 41
205 205 end = data.find("\n", start)
206 206 if end == -1:
207 207 end = len(data) - 1
208 208 if start == end:
209 209 return ''
210 210 return self.data[start:end]
211 211
212 212 def __getitem__(self, key):
213 213 if not isinstance(key, bytes):
214 214 raise TypeError("getitem: manifest keys must be a bytes.")
215 215 needle = self.bsearch(key)
216 216 if needle == -1:
217 217 raise KeyError
218 218 data, pos = self._get(needle)
219 219 if pos == -1:
220 220 return (data[1], data[2])
221 221 zeropos = data.find('\x00', pos)
222 222 assert 0 <= needle <= len(self.positions)
223 223 assert len(self.extrainfo) == len(self.positions)
224 224 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
225 225 flags = self._getflags(data, needle, zeropos)
226 226 return (hashval, flags)
227 227
228 228 def __delitem__(self, key):
229 229 needle, found = self.bsearch2(key)
230 230 if not found:
231 231 raise KeyError
232 232 cur = self.positions[needle]
233 233 self.positions = self.positions[:needle] + self.positions[needle + 1:]
234 234 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
235 235 if cur >= 0:
236 236 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
237 237
238 238 def __setitem__(self, key, value):
239 239 if not isinstance(key, bytes):
240 240 raise TypeError("setitem: manifest keys must be a byte string.")
241 241 if not isinstance(value, tuple) or len(value) != 2:
242 242 raise TypeError("Manifest values must be a tuple of (node, flags).")
243 243 hashval = value[0]
244 244 if not isinstance(hashval, bytes) or not 20 <= len(hashval) <= 22:
245 245 raise TypeError("node must be a 20-byte byte string")
246 246 flags = value[1]
247 247 if len(hashval) == 22:
248 248 hashval = hashval[:-1]
249 249 if not isinstance(flags, bytes) or len(flags) > 1:
250 250 raise TypeError("flags must a 0 or 1 byte string, got %r", flags)
251 251 needle, found = self.bsearch2(key)
252 252 if found:
253 253 # put the item
254 254 pos = self.positions[needle]
255 255 if pos < 0:
256 256 self.extradata[-pos - 1] = (key, hashval, value[1])
257 257 else:
258 258 # just don't bother
259 259 self.extradata.append((key, hashval, value[1]))
260 260 self.positions[needle] = -len(self.extradata)
261 261 else:
262 262 # not found, put it in with extra positions
263 263 self.extradata.append((key, hashval, value[1]))
264 264 self.positions = (self.positions[:needle] + [-len(self.extradata)]
265 265 + self.positions[needle:])
266 266 self.extrainfo = (self.extrainfo[:needle] + [0] +
267 267 self.extrainfo[needle:])
268 268
269 269 def copy(self):
270 270 # XXX call _compact like in C?
271 271 return _lazymanifest(self.data, self.positions, self.extrainfo,
272 272 self.extradata)
273 273
274 274 def _compact(self):
275 275 # hopefully not called TOO often
276 276 if len(self.extradata) == 0:
277 277 return
278 278 l = []
279 279 last_cut = 0
280 280 i = 0
281 281 offset = 0
282 282 self.extrainfo = [0] * len(self.positions)
283 283 while i < len(self.positions):
284 284 if self.positions[i] >= 0:
285 285 cur = self.positions[i]
286 286 last_cut = cur
287 287 while True:
288 288 self.positions[i] = offset
289 289 i += 1
290 290 if i == len(self.positions) or self.positions[i] < 0:
291 291 break
292 292 offset += self.positions[i] - cur
293 293 cur = self.positions[i]
294 294 end_cut = self.data.find('\n', cur)
295 295 if end_cut != -1:
296 296 end_cut += 1
297 297 offset += end_cut - cur
298 298 l.append(self.data[last_cut:end_cut])
299 299 else:
300 300 while i < len(self.positions) and self.positions[i] < 0:
301 301 cur = self.positions[i]
302 302 t = self.extradata[-cur - 1]
303 303 l.append(self._pack(t))
304 304 self.positions[i] = offset
305 305 if len(t[1]) > 20:
306 306 self.extrainfo[i] = ord(t[1][21])
307 307 offset += len(l[-1])
308 308 i += 1
309 309 self.data = ''.join(l)
310 310 self.extradata = []
311 311
312 312 def _pack(self, d):
313 313 return d[0] + '\x00' + hex(d[1][:20]) + d[2] + '\n'
314 314
315 315 def text(self):
316 316 self._compact()
317 317 return self.data
318 318
319 319 def diff(self, m2, clean=False):
320 320 '''Finds changes between the current manifest and m2.'''
321 321 # XXX think whether efficiency matters here
322 322 diff = {}
323 323
324 324 for fn, e1, flags in self.iterentries():
325 325 if fn not in m2:
326 326 diff[fn] = (e1, flags), (None, '')
327 327 else:
328 328 e2 = m2[fn]
329 329 if (e1, flags) != e2:
330 330 diff[fn] = (e1, flags), e2
331 331 elif clean:
332 332 diff[fn] = None
333 333
334 334 for fn, e2, flags in m2.iterentries():
335 335 if fn not in self:
336 336 diff[fn] = (None, ''), (e2, flags)
337 337
338 338 return diff
339 339
340 340 def iterentries(self):
341 341 return lazymanifestiterentries(self)
342 342
343 343 def iterkeys(self):
344 344 return lazymanifestiter(self)
345 345
346 346 def __iter__(self):
347 347 return lazymanifestiter(self)
348 348
349 349 def __len__(self):
350 350 return len(self.positions)
351 351
352 352 def filtercopy(self, filterfn):
353 353 # XXX should be optimized
354 354 c = _lazymanifest('')
355 355 for f, n, fl in self.iterentries():
356 356 if filterfn(f):
357 357 c[f] = n, fl
358 358 return c
359 359
360 360 try:
361 361 _lazymanifest = parsers.lazymanifest
362 362 except AttributeError:
363 363 pass
364 364
365 365 class manifestdict(object):
366 366 def __init__(self, data=''):
367 367 self._lm = _lazymanifest(data)
368 368
369 369 def __getitem__(self, key):
370 370 return self._lm[key][0]
371 371
372 372 def find(self, key):
373 373 return self._lm[key]
374 374
375 375 def __len__(self):
376 376 return len(self._lm)
377 377
378 378 def __nonzero__(self):
379 379 # nonzero is covered by the __len__ function, but implementing it here
380 380 # makes it easier for extensions to override.
381 381 return len(self._lm) != 0
382 382
383 383 __bool__ = __nonzero__
384 384
385 385 def __setitem__(self, key, node):
386 386 self._lm[key] = node, self.flags(key, '')
387 387
388 388 def __contains__(self, key):
389 389 if key is None:
390 390 return False
391 391 return key in self._lm
392 392
393 393 def __delitem__(self, key):
394 394 del self._lm[key]
395 395
396 396 def __iter__(self):
397 397 return self._lm.__iter__()
398 398
399 399 def iterkeys(self):
400 400 return self._lm.iterkeys()
401 401
402 402 def keys(self):
403 403 return list(self.iterkeys())
404 404
405 405 def filesnotin(self, m2, match=None):
406 406 '''Set of files in this manifest that are not in the other'''
407 407 if match:
408 408 m1 = self.matches(match)
409 409 m2 = m2.matches(match)
410 410 return m1.filesnotin(m2)
411 411 diff = self.diff(m2)
412 412 files = set(filepath
413 413 for filepath, hashflags in diff.iteritems()
414 414 if hashflags[1][0] is None)
415 415 return files
416 416
417 417 @propertycache
418 418 def _dirs(self):
419 419 return util.dirs(self)
420 420
421 421 def dirs(self):
422 422 return self._dirs
423 423
424 424 def hasdir(self, dir):
425 425 return dir in self._dirs
426 426
427 427 def _filesfastpath(self, match):
428 428 '''Checks whether we can correctly and quickly iterate over matcher
429 429 files instead of over manifest files.'''
430 430 files = match.files()
431 431 return (len(files) < 100 and (match.isexact() or
432 432 (match.prefix() and all(fn in self for fn in files))))
433 433
434 434 def walk(self, match):
435 435 '''Generates matching file names.
436 436
437 437 Equivalent to manifest.matches(match).iterkeys(), but without creating
438 438 an entirely new manifest.
439 439
440 440 It also reports nonexistent files by marking them bad with match.bad().
441 441 '''
442 442 if match.always():
443 443 for f in iter(self):
444 444 yield f
445 445 return
446 446
447 447 fset = set(match.files())
448 448
449 449 # avoid the entire walk if we're only looking for specific files
450 450 if self._filesfastpath(match):
451 451 for fn in sorted(fset):
452 452 yield fn
453 453 return
454 454
455 455 for fn in self:
456 456 if fn in fset:
457 457 # specified pattern is the exact name
458 458 fset.remove(fn)
459 459 if match(fn):
460 460 yield fn
461 461
462 462 # for dirstate.walk, files=['.'] means "walk the whole tree".
463 463 # follow that here, too
464 464 fset.discard('.')
465 465
466 466 for fn in sorted(fset):
467 467 if not self.hasdir(fn):
468 468 match.bad(fn, None)
469 469
470 470 def matches(self, match):
471 471 '''generate a new manifest filtered by the match argument'''
472 472 if match.always():
473 473 return self.copy()
474 474
475 475 if self._filesfastpath(match):
476 476 m = manifestdict()
477 477 lm = self._lm
478 478 for fn in match.files():
479 479 if fn in lm:
480 480 m._lm[fn] = lm[fn]
481 481 return m
482 482
483 483 m = manifestdict()
484 484 m._lm = self._lm.filtercopy(match)
485 485 return m
486 486
487 487 def diff(self, m2, match=None, clean=False):
488 488 '''Finds changes between the current manifest and m2.
489 489
490 490 Args:
491 491 m2: the manifest to which this manifest should be compared.
492 492 clean: if true, include files unchanged between these manifests
493 493 with a None value in the returned dictionary.
494 494
495 495 The result is returned as a dict with filename as key and
496 496 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
497 497 nodeid in the current/other manifest and fl1/fl2 is the flag
498 498 in the current/other manifest. Where the file does not exist,
499 499 the nodeid will be None and the flags will be the empty
500 500 string.
501 501 '''
502 502 if match:
503 503 m1 = self.matches(match)
504 504 m2 = m2.matches(match)
505 505 return m1.diff(m2, clean=clean)
506 506 return self._lm.diff(m2._lm, clean)
507 507
508 508 def setflag(self, key, flag):
509 509 self._lm[key] = self[key], flag
510 510
511 511 def get(self, key, default=None):
512 512 try:
513 513 return self._lm[key][0]
514 514 except KeyError:
515 515 return default
516 516
517 517 def flags(self, key, default=''):
518 518 try:
519 519 return self._lm[key][1]
520 520 except KeyError:
521 521 return default
522 522
523 523 def copy(self):
524 524 c = manifestdict()
525 525 c._lm = self._lm.copy()
526 526 return c
527 527
528 528 def items(self):
529 529 return (x[:2] for x in self._lm.iterentries())
530 530
531 531 iteritems = items
532 532
533 533 def iterentries(self):
534 534 return self._lm.iterentries()
535 535
536 536 def text(self):
537 537 # most likely uses native version
538 538 return self._lm.text()
539 539
540 540 def fastdelta(self, base, changes):
541 541 """Given a base manifest text as a bytearray and a list of changes
542 542 relative to that text, compute a delta that can be used by revlog.
543 543 """
544 544 delta = []
545 545 dstart = None
546 546 dend = None
547 547 dline = [""]
548 548 start = 0
549 549 # zero copy representation of base as a buffer
550 550 addbuf = util.buffer(base)
551 551
552 552 changes = list(changes)
553 553 if len(changes) < 1000:
554 554 # start with a readonly loop that finds the offset of
555 555 # each line and creates the deltas
556 556 for f, todelete in changes:
557 557 # bs will either be the index of the item or the insert point
558 558 start, end = _msearch(addbuf, f, start)
559 559 if not todelete:
560 560 h, fl = self._lm[f]
561 561 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
562 562 else:
563 563 if start == end:
564 564 # item we want to delete was not found, error out
565 565 raise AssertionError(
566 566 _("failed to remove %s from manifest") % f)
567 567 l = ""
568 568 if dstart is not None and dstart <= start and dend >= start:
569 569 if dend < end:
570 570 dend = end
571 571 if l:
572 572 dline.append(l)
573 573 else:
574 574 if dstart is not None:
575 575 delta.append([dstart, dend, "".join(dline)])
576 576 dstart = start
577 577 dend = end
578 578 dline = [l]
579 579
580 580 if dstart is not None:
581 581 delta.append([dstart, dend, "".join(dline)])
582 582 # apply the delta to the base, and get a delta for addrevision
583 583 deltatext, arraytext = _addlistdelta(base, delta)
584 584 else:
585 585 # For large changes, it's much cheaper to just build the text and
586 586 # diff it.
587 587 arraytext = bytearray(self.text())
588 588 deltatext = mdiff.textdiff(
589 589 util.buffer(base), util.buffer(arraytext))
590 590
591 591 return arraytext, deltatext
592 592
593 593 def _msearch(m, s, lo=0, hi=None):
594 594 '''return a tuple (start, end) that says where to find s within m.
595 595
596 596 If the string is found m[start:end] are the line containing
597 597 that string. If start == end the string was not found and
598 598 they indicate the proper sorted insertion point.
599 599
600 600 m should be a buffer, a memoryview or a byte string.
601 601 s is a byte string'''
602 602 def advance(i, c):
603 603 while i < lenm and m[i:i + 1] != c:
604 604 i += 1
605 605 return i
606 606 if not s:
607 607 return (lo, lo)
608 608 lenm = len(m)
609 609 if not hi:
610 610 hi = lenm
611 611 while lo < hi:
612 612 mid = (lo + hi) // 2
613 613 start = mid
614 614 while start > 0 and m[start - 1:start] != '\n':
615 615 start -= 1
616 616 end = advance(start, '\0')
617 617 if bytes(m[start:end]) < s:
618 618 # we know that after the null there are 40 bytes of sha1
619 619 # this translates to the bisect lo = mid + 1
620 620 lo = advance(end + 40, '\n') + 1
621 621 else:
622 622 # this translates to the bisect hi = mid
623 623 hi = start
624 624 end = advance(lo, '\0')
625 625 found = m[lo:end]
626 626 if s == found:
627 627 # we know that after the null there are 40 bytes of sha1
628 628 end = advance(end + 40, '\n')
629 629 return (lo, end + 1)
630 630 else:
631 631 return (lo, lo)
632 632
633 633 def _checkforbidden(l):
634 634 """Check filenames for illegal characters."""
635 635 for f in l:
636 636 if '\n' in f or '\r' in f:
637 637 raise error.RevlogError(
638 638 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
639 639
640 640
641 641 # apply the changes collected during the bisect loop to our addlist
642 642 # return a delta suitable for addrevision
643 643 def _addlistdelta(addlist, x):
644 644 # for large addlist arrays, building a new array is cheaper
645 645 # than repeatedly modifying the existing one
646 646 currentposition = 0
647 647 newaddlist = bytearray()
648 648
649 649 for start, end, content in x:
650 650 newaddlist += addlist[currentposition:start]
651 651 if content:
652 652 newaddlist += bytearray(content)
653 653
654 654 currentposition = end
655 655
656 656 newaddlist += addlist[currentposition:]
657 657
658 658 deltatext = "".join(struct.pack(">lll", start, end, len(content))
659 659 + content for start, end, content in x)
660 660 return deltatext, newaddlist
661 661
662 662 def _splittopdir(f):
663 663 if '/' in f:
664 664 dir, subpath = f.split('/', 1)
665 665 return dir + '/', subpath
666 666 else:
667 667 return '', f
668 668
669 669 _noop = lambda s: None
670 670
671 671 class treemanifest(object):
672 672 def __init__(self, dir='', text=''):
673 673 self._dir = dir
674 674 self._node = revlog.nullid
675 675 self._loadfunc = _noop
676 676 self._copyfunc = _noop
677 677 self._dirty = False
678 678 self._dirs = {}
679 679 # Using _lazymanifest here is a little slower than plain old dicts
680 680 self._files = {}
681 681 self._flags = {}
682 682 if text:
683 683 def readsubtree(subdir, subm):
684 684 raise AssertionError('treemanifest constructor only accepts '
685 685 'flat manifests')
686 686 self.parse(text, readsubtree)
687 687 self._dirty = True # Mark flat manifest dirty after parsing
688 688
689 689 def _subpath(self, path):
690 690 return self._dir + path
691 691
692 692 def __len__(self):
693 693 self._load()
694 694 size = len(self._files)
695 695 for m in self._dirs.values():
696 696 size += m.__len__()
697 697 return size
698 698
699 699 def __nonzero__(self):
700 700 # Faster than "__len() != 0" since it avoids loading sub-manifests
701 701 return not self._isempty()
702 702
703 703 __bool__ = __nonzero__
704 704
705 705 def _isempty(self):
706 706 self._load() # for consistency; already loaded by all callers
707 707 return (not self._files and (not self._dirs or
708 708 all(m._isempty() for m in self._dirs.values())))
709 709
710 710 def __repr__(self):
711 711 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
712 712 (self._dir, revlog.hex(self._node),
713 713 bool(self._loadfunc is _noop),
714 714 self._dirty, id(self)))
715 715
716 716 def dir(self):
717 717 '''The directory that this tree manifest represents, including a
718 718 trailing '/'. Empty string for the repo root directory.'''
719 719 return self._dir
720 720
721 721 def node(self):
722 722 '''This node of this instance. nullid for unsaved instances. Should
723 723 be updated when the instance is read or written from a revlog.
724 724 '''
725 725 assert not self._dirty
726 726 return self._node
727 727
728 728 def setnode(self, node):
729 729 self._node = node
730 730 self._dirty = False
731 731
732 732 def iterentries(self):
733 733 self._load()
734 734 for p, n in sorted(itertools.chain(self._dirs.items(),
735 735 self._files.items())):
736 736 if p in self._files:
737 737 yield self._subpath(p), n, self._flags.get(p, '')
738 738 else:
739 739 for x in n.iterentries():
740 740 yield x
741 741
742 742 def items(self):
743 743 self._load()
744 744 for p, n in sorted(itertools.chain(self._dirs.items(),
745 745 self._files.items())):
746 746 if p in self._files:
747 747 yield self._subpath(p), n
748 748 else:
749 749 for f, sn in n.iteritems():
750 750 yield f, sn
751 751
752 752 iteritems = items
753 753
754 754 def iterkeys(self):
755 755 self._load()
756 756 for p in sorted(itertools.chain(self._dirs, self._files)):
757 757 if p in self._files:
758 758 yield self._subpath(p)
759 759 else:
760 760 for f in self._dirs[p]:
761 761 yield f
762 762
763 763 def keys(self):
764 764 return list(self.iterkeys())
765 765
766 766 def __iter__(self):
767 767 return self.iterkeys()
768 768
769 769 def __contains__(self, f):
770 770 if f is None:
771 771 return False
772 772 self._load()
773 773 dir, subpath = _splittopdir(f)
774 774 if dir:
775 775 if dir not in self._dirs:
776 776 return False
777 777 return self._dirs[dir].__contains__(subpath)
778 778 else:
779 779 return f in self._files
780 780
781 781 def get(self, f, default=None):
782 782 self._load()
783 783 dir, subpath = _splittopdir(f)
784 784 if dir:
785 785 if dir not in self._dirs:
786 786 return default
787 787 return self._dirs[dir].get(subpath, default)
788 788 else:
789 789 return self._files.get(f, default)
790 790
791 791 def __getitem__(self, f):
792 792 self._load()
793 793 dir, subpath = _splittopdir(f)
794 794 if dir:
795 795 return self._dirs[dir].__getitem__(subpath)
796 796 else:
797 797 return self._files[f]
798 798
799 799 def flags(self, f):
800 800 self._load()
801 801 dir, subpath = _splittopdir(f)
802 802 if dir:
803 803 if dir not in self._dirs:
804 804 return ''
805 805 return self._dirs[dir].flags(subpath)
806 806 else:
807 807 if f in self._dirs:
808 808 return ''
809 809 return self._flags.get(f, '')
810 810
811 811 def find(self, f):
812 812 self._load()
813 813 dir, subpath = _splittopdir(f)
814 814 if dir:
815 815 return self._dirs[dir].find(subpath)
816 816 else:
817 817 return self._files[f], self._flags.get(f, '')
818 818
819 819 def __delitem__(self, f):
820 820 self._load()
821 821 dir, subpath = _splittopdir(f)
822 822 if dir:
823 823 self._dirs[dir].__delitem__(subpath)
824 824 # If the directory is now empty, remove it
825 825 if self._dirs[dir]._isempty():
826 826 del self._dirs[dir]
827 827 else:
828 828 del self._files[f]
829 829 if f in self._flags:
830 830 del self._flags[f]
831 831 self._dirty = True
832 832
833 833 def __setitem__(self, f, n):
834 834 assert n is not None
835 835 self._load()
836 836 dir, subpath = _splittopdir(f)
837 837 if dir:
838 838 if dir not in self._dirs:
839 839 self._dirs[dir] = treemanifest(self._subpath(dir))
840 840 self._dirs[dir].__setitem__(subpath, n)
841 841 else:
842 842 self._files[f] = n[:21] # to match manifestdict's behavior
843 843 self._dirty = True
844 844
845 845 def _load(self):
846 846 if self._loadfunc is not _noop:
847 847 lf, self._loadfunc = self._loadfunc, _noop
848 848 lf(self)
849 849 elif self._copyfunc is not _noop:
850 850 cf, self._copyfunc = self._copyfunc, _noop
851 851 cf(self)
852 852
853 853 def setflag(self, f, flags):
854 854 """Set the flags (symlink, executable) for path f."""
855 855 self._load()
856 856 dir, subpath = _splittopdir(f)
857 857 if dir:
858 858 if dir not in self._dirs:
859 859 self._dirs[dir] = treemanifest(self._subpath(dir))
860 860 self._dirs[dir].setflag(subpath, flags)
861 861 else:
862 862 self._flags[f] = flags
863 863 self._dirty = True
864 864
865 865 def copy(self):
866 866 copy = treemanifest(self._dir)
867 867 copy._node = self._node
868 868 copy._dirty = self._dirty
869 869 if self._copyfunc is _noop:
870 870 def _copyfunc(s):
871 871 self._load()
872 872 for d in self._dirs:
873 873 s._dirs[d] = self._dirs[d].copy()
874 874 s._files = dict.copy(self._files)
875 875 s._flags = dict.copy(self._flags)
876 876 if self._loadfunc is _noop:
877 877 _copyfunc(copy)
878 878 else:
879 879 copy._copyfunc = _copyfunc
880 880 else:
881 881 copy._copyfunc = self._copyfunc
882 882 return copy
883 883
884 884 def filesnotin(self, m2, match=None):
885 885 '''Set of files in this manifest that are not in the other'''
886 886 if match:
887 887 m1 = self.matches(match)
888 888 m2 = m2.matches(match)
889 889 return m1.filesnotin(m2)
890 890
891 891 files = set()
892 892 def _filesnotin(t1, t2):
893 893 if t1._node == t2._node and not t1._dirty and not t2._dirty:
894 894 return
895 895 t1._load()
896 896 t2._load()
897 897 for d, m1 in t1._dirs.iteritems():
898 898 if d in t2._dirs:
899 899 m2 = t2._dirs[d]
900 900 _filesnotin(m1, m2)
901 901 else:
902 902 files.update(m1.iterkeys())
903 903
904 904 for fn in t1._files:
905 905 if fn not in t2._files:
906 906 files.add(t1._subpath(fn))
907 907
908 908 _filesnotin(self, m2)
909 909 return files
910 910
911 911 @propertycache
912 912 def _alldirs(self):
913 913 return util.dirs(self)
914 914
915 915 def dirs(self):
916 916 return self._alldirs
917 917
918 918 def hasdir(self, dir):
919 919 self._load()
920 920 topdir, subdir = _splittopdir(dir)
921 921 if topdir:
922 922 if topdir in self._dirs:
923 923 return self._dirs[topdir].hasdir(subdir)
924 924 return False
925 925 return (dir + '/') in self._dirs
926 926
927 927 def walk(self, match):
928 928 '''Generates matching file names.
929 929
930 930 Equivalent to manifest.matches(match).iterkeys(), but without creating
931 931 an entirely new manifest.
932 932
933 933 It also reports nonexistent files by marking them bad with match.bad().
934 934 '''
935 935 if match.always():
936 936 for f in iter(self):
937 937 yield f
938 938 return
939 939
940 940 fset = set(match.files())
941 941
942 942 for fn in self._walk(match):
943 943 if fn in fset:
944 944 # specified pattern is the exact name
945 945 fset.remove(fn)
946 946 yield fn
947 947
948 948 # for dirstate.walk, files=['.'] means "walk the whole tree".
949 949 # follow that here, too
950 950 fset.discard('.')
951 951
952 952 for fn in sorted(fset):
953 953 if not self.hasdir(fn):
954 954 match.bad(fn, None)
955 955
956 956 def _walk(self, match):
957 957 '''Recursively generates matching file names for walk().'''
958 958 if not match.visitdir(self._dir[:-1] or '.'):
959 959 return
960 960
961 961 # yield this dir's files and walk its submanifests
962 962 self._load()
963 963 for p in sorted(list(self._dirs) + list(self._files)):
964 964 if p in self._files:
965 965 fullp = self._subpath(p)
966 966 if match(fullp):
967 967 yield fullp
968 968 else:
969 969 for f in self._dirs[p]._walk(match):
970 970 yield f
971 971
972 972 def matches(self, match):
973 973 '''generate a new manifest filtered by the match argument'''
974 974 if match.always():
975 975 return self.copy()
976 976
977 977 return self._matches(match)
978 978
979 979 def _matches(self, match):
980 980 '''recursively generate a new manifest filtered by the match argument.
981 981 '''
982 982
983 983 visit = match.visitdir(self._dir[:-1] or '.')
984 984 if visit == 'all':
985 985 return self.copy()
986 986 ret = treemanifest(self._dir)
987 987 if not visit:
988 988 return ret
989 989
990 990 self._load()
991 991 for fn in self._files:
992 992 fullp = self._subpath(fn)
993 993 if not match(fullp):
994 994 continue
995 995 ret._files[fn] = self._files[fn]
996 996 if fn in self._flags:
997 997 ret._flags[fn] = self._flags[fn]
998 998
999 999 for dir, subm in self._dirs.iteritems():
1000 1000 m = subm._matches(match)
1001 1001 if not m._isempty():
1002 1002 ret._dirs[dir] = m
1003 1003
1004 1004 if not ret._isempty():
1005 1005 ret._dirty = True
1006 1006 return ret
1007 1007
1008 1008 def diff(self, m2, match=None, clean=False):
1009 1009 '''Finds changes between the current manifest and m2.
1010 1010
1011 1011 Args:
1012 1012 m2: the manifest to which this manifest should be compared.
1013 1013 clean: if true, include files unchanged between these manifests
1014 1014 with a None value in the returned dictionary.
1015 1015
1016 1016 The result is returned as a dict with filename as key and
1017 1017 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1018 1018 nodeid in the current/other manifest and fl1/fl2 is the flag
1019 1019 in the current/other manifest. Where the file does not exist,
1020 1020 the nodeid will be None and the flags will be the empty
1021 1021 string.
1022 1022 '''
1023 1023 if match:
1024 1024 m1 = self.matches(match)
1025 1025 m2 = m2.matches(match)
1026 1026 return m1.diff(m2, clean=clean)
1027 1027 result = {}
1028 1028 emptytree = treemanifest()
1029 1029 def _diff(t1, t2):
1030 1030 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1031 1031 return
1032 1032 t1._load()
1033 1033 t2._load()
1034 1034 for d, m1 in t1._dirs.iteritems():
1035 1035 m2 = t2._dirs.get(d, emptytree)
1036 1036 _diff(m1, m2)
1037 1037
1038 1038 for d, m2 in t2._dirs.iteritems():
1039 1039 if d not in t1._dirs:
1040 1040 _diff(emptytree, m2)
1041 1041
1042 1042 for fn, n1 in t1._files.iteritems():
1043 1043 fl1 = t1._flags.get(fn, '')
1044 1044 n2 = t2._files.get(fn, None)
1045 1045 fl2 = t2._flags.get(fn, '')
1046 1046 if n1 != n2 or fl1 != fl2:
1047 1047 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1048 1048 elif clean:
1049 1049 result[t1._subpath(fn)] = None
1050 1050
1051 1051 for fn, n2 in t2._files.iteritems():
1052 1052 if fn not in t1._files:
1053 1053 fl2 = t2._flags.get(fn, '')
1054 1054 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
1055 1055
1056 1056 _diff(self, m2)
1057 1057 return result
1058 1058
1059 1059 def unmodifiedsince(self, m2):
1060 1060 return not self._dirty and not m2._dirty and self._node == m2._node
1061 1061
1062 1062 def parse(self, text, readsubtree):
1063 1063 for f, n, fl in _parse(text):
1064 1064 if fl == 't':
1065 1065 f = f + '/'
1066 1066 self._dirs[f] = readsubtree(self._subpath(f), n)
1067 1067 elif '/' in f:
1068 1068 # This is a flat manifest, so use __setitem__ and setflag rather
1069 1069 # than assigning directly to _files and _flags, so we can
1070 1070 # assign a path in a subdirectory, and to mark dirty (compared
1071 1071 # to nullid).
1072 1072 self[f] = n
1073 1073 if fl:
1074 1074 self.setflag(f, fl)
1075 1075 else:
1076 1076 # Assigning to _files and _flags avoids marking as dirty,
1077 1077 # and should be a little faster.
1078 1078 self._files[f] = n
1079 1079 if fl:
1080 1080 self._flags[f] = fl
1081 1081
1082 1082 def text(self):
1083 1083 """Get the full data of this manifest as a bytestring."""
1084 1084 self._load()
1085 1085 return _text(self.iterentries())
1086 1086
1087 1087 def dirtext(self):
1088 1088 """Get the full data of this directory as a bytestring. Make sure that
1089 1089 any submanifests have been written first, so their nodeids are correct.
1090 1090 """
1091 1091 self._load()
1092 1092 flags = self.flags
1093 1093 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
1094 1094 files = [(f, self._files[f], flags(f)) for f in self._files]
1095 1095 return _text(sorted(dirs + files))
1096 1096
1097 1097 def read(self, gettext, readsubtree):
1098 1098 def _load_for_read(s):
1099 1099 s.parse(gettext(), readsubtree)
1100 1100 s._dirty = False
1101 1101 self._loadfunc = _load_for_read
1102 1102
1103 1103 def writesubtrees(self, m1, m2, writesubtree):
1104 1104 self._load() # for consistency; should never have any effect here
1105 1105 m1._load()
1106 1106 m2._load()
1107 1107 emptytree = treemanifest()
1108 1108 for d, subm in self._dirs.iteritems():
1109 1109 subp1 = m1._dirs.get(d, emptytree)._node
1110 1110 subp2 = m2._dirs.get(d, emptytree)._node
1111 1111 if subp1 == revlog.nullid:
1112 1112 subp1, subp2 = subp2, subp1
1113 1113 writesubtree(subm, subp1, subp2)
1114 1114
1115 1115 def walksubtrees(self, matcher=None):
1116 1116 """Returns an iterator of the subtrees of this manifest, including this
1117 1117 manifest itself.
1118 1118
1119 1119 If `matcher` is provided, it only returns subtrees that match.
1120 1120 """
1121 1121 if matcher and not matcher.visitdir(self._dir[:-1] or '.'):
1122 1122 return
1123 1123 if not matcher or matcher(self._dir[:-1]):
1124 1124 yield self
1125 1125
1126 1126 self._load()
1127 1127 for d, subm in self._dirs.iteritems():
1128 1128 for subtree in subm.walksubtrees(matcher=matcher):
1129 1129 yield subtree
1130 1130
1131 1131 class manifestrevlog(revlog.revlog):
1132 1132 '''A revlog that stores manifest texts. This is responsible for caching the
1133 1133 full-text manifest contents.
1134 1134 '''
1135 1135 def __init__(self, opener, dir='', dirlogcache=None, indexfile=None,
1136 1136 treemanifest=False):
1137 1137 """Constructs a new manifest revlog
1138 1138
1139 1139 `indexfile` - used by extensions to have two manifests at once, like
1140 1140 when transitioning between flatmanifeset and treemanifests.
1141 1141
1142 1142 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1143 1143 options can also be used to make this a tree manifest revlog. The opener
1144 1144 option takes precedence, so if it is set to True, we ignore whatever
1145 1145 value is passed in to the constructor.
1146 1146 """
1147 1147 # During normal operations, we expect to deal with not more than four
1148 1148 # revs at a time (such as during commit --amend). When rebasing large
1149 1149 # stacks of commits, the number can go up, hence the config knob below.
1150 1150 cachesize = 4
1151 1151 optiontreemanifest = False
1152 1152 opts = getattr(opener, 'options', None)
1153 1153 if opts is not None:
1154 1154 cachesize = opts.get('manifestcachesize', cachesize)
1155 1155 optiontreemanifest = opts.get('treemanifest', False)
1156 1156
1157 1157 self._treeondisk = optiontreemanifest or treemanifest
1158 1158
1159 1159 self._fulltextcache = util.lrucachedict(cachesize)
1160 1160
1161 1161 if dir:
1162 1162 assert self._treeondisk, 'opts is %r' % opts
1163 1163
1164 1164 if indexfile is None:
1165 1165 indexfile = '00manifest.i'
1166 1166 if dir:
1167 1167 indexfile = "meta/" + dir + indexfile
1168 1168
1169 1169 self._dir = dir
1170 1170 # The dirlogcache is kept on the root manifest log
1171 1171 if dir:
1172 1172 self._dirlogcache = dirlogcache
1173 1173 else:
1174 1174 self._dirlogcache = {'': self}
1175 1175
1176 1176 super(manifestrevlog, self).__init__(opener, indexfile,
1177 1177 # only root indexfile is cached
1178 1178 checkambig=not bool(dir),
1179 1179 mmaplargeindex=True)
1180 1180
1181 1181 @property
1182 1182 def fulltextcache(self):
1183 1183 return self._fulltextcache
1184 1184
1185 1185 def clearcaches(self):
1186 1186 super(manifestrevlog, self).clearcaches()
1187 1187 self._fulltextcache.clear()
1188 1188 self._dirlogcache = {'': self}
1189 1189
1190 1190 def dirlog(self, d):
1191 1191 if d:
1192 1192 assert self._treeondisk
1193 1193 if d not in self._dirlogcache:
1194 1194 mfrevlog = manifestrevlog(self.opener, d,
1195 1195 self._dirlogcache,
1196 1196 treemanifest=self._treeondisk)
1197 1197 self._dirlogcache[d] = mfrevlog
1198 1198 return self._dirlogcache[d]
1199 1199
1200 1200 def add(self, m, transaction, link, p1, p2, added, removed, readtree=None):
1201 1201 if p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta'):
1202 1202 # If our first parent is in the manifest cache, we can
1203 1203 # compute a delta here using properties we know about the
1204 1204 # manifest up-front, which may save time later for the
1205 1205 # revlog layer.
1206 1206
1207 1207 _checkforbidden(added)
1208 1208 # combine the changed lists into one sorted iterator
1209 1209 work = heapq.merge([(x, False) for x in added],
1210 1210 [(x, True) for x in removed])
1211 1211
1212 1212 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1213 1213 cachedelta = self.rev(p1), deltatext
1214 1214 text = util.buffer(arraytext)
1215 1215 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
1216 1216 else:
1217 1217 # The first parent manifest isn't already loaded, so we'll
1218 1218 # just encode a fulltext of the manifest and pass that
1219 1219 # through to the revlog layer, and let it handle the delta
1220 1220 # process.
1221 1221 if self._treeondisk:
1222 1222 assert readtree, "readtree must be set for treemanifest writes"
1223 1223 m1 = readtree(self._dir, p1)
1224 1224 m2 = readtree(self._dir, p2)
1225 1225 n = self._addtree(m, transaction, link, m1, m2, readtree)
1226 1226 arraytext = None
1227 1227 else:
1228 1228 text = m.text()
1229 1229 n = self.addrevision(text, transaction, link, p1, p2)
1230 1230 arraytext = bytearray(text)
1231 1231
1232 1232 if arraytext is not None:
1233 1233 self.fulltextcache[n] = arraytext
1234 1234
1235 1235 return n
1236 1236
1237 1237 def _addtree(self, m, transaction, link, m1, m2, readtree):
1238 1238 # If the manifest is unchanged compared to one parent,
1239 1239 # don't write a new revision
1240 1240 if self._dir != '' and (m.unmodifiedsince(m1) or m.unmodifiedsince(m2)):
1241 1241 return m.node()
1242 1242 def writesubtree(subm, subp1, subp2):
1243 1243 sublog = self.dirlog(subm.dir())
1244 1244 sublog.add(subm, transaction, link, subp1, subp2, None, None,
1245 1245 readtree=readtree)
1246 1246 m.writesubtrees(m1, m2, writesubtree)
1247 1247 text = m.dirtext()
1248 1248 n = None
1249 1249 if self._dir != '':
1250 1250 # Double-check whether contents are unchanged to one parent
1251 1251 if text == m1.dirtext():
1252 1252 n = m1.node()
1253 1253 elif text == m2.dirtext():
1254 1254 n = m2.node()
1255 1255
1256 1256 if not n:
1257 1257 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
1258 1258
1259 1259 # Save nodeid so parent manifest can calculate its nodeid
1260 1260 m.setnode(n)
1261 1261 return n
1262 1262
1263 1263 class manifestlog(object):
1264 1264 """A collection class representing the collection of manifest snapshots
1265 1265 referenced by commits in the repository.
1266 1266
1267 1267 In this situation, 'manifest' refers to the abstract concept of a snapshot
1268 1268 of the list of files in the given commit. Consumers of the output of this
1269 1269 class do not care about the implementation details of the actual manifests
1270 1270 they receive (i.e. tree or flat or lazily loaded, etc)."""
1271 1271 def __init__(self, opener, repo):
1272 1272 usetreemanifest = False
1273 1273 cachesize = 4
1274 1274
1275 1275 opts = getattr(opener, 'options', None)
1276 1276 if opts is not None:
1277 1277 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1278 1278 cachesize = opts.get('manifestcachesize', cachesize)
1279 1279 self._treeinmem = usetreemanifest
1280 1280
1281 1281 self._revlog = repo._constructmanifest()
1282 1282
1283 1283 # A cache of the manifestctx or treemanifestctx for each directory
1284 1284 self._dirmancache = {}
1285 1285 self._dirmancache[''] = util.lrucachedict(cachesize)
1286 1286
1287 1287 self.cachesize = cachesize
1288 1288
1289 1289 def __getitem__(self, node):
1290 1290 """Retrieves the manifest instance for the given node. Throws a
1291 1291 LookupError if not found.
1292 1292 """
1293 1293 return self.get('', node)
1294 1294
1295 1295 def get(self, dir, node, verify=True):
1296 1296 """Retrieves the manifest instance for the given node. Throws a
1297 1297 LookupError if not found.
1298 1298
1299 1299 `verify` - if True an exception will be thrown if the node is not in
1300 1300 the revlog
1301 1301 """
1302 1302 if node in self._dirmancache.get(dir, ()):
1303 1303 return self._dirmancache[dir][node]
1304 1304
1305 1305 if dir:
1306 1306 if self._revlog._treeondisk:
1307 1307 if verify:
1308 1308 dirlog = self._revlog.dirlog(dir)
1309 1309 if node not in dirlog.nodemap:
1310 1310 raise LookupError(node, dirlog.indexfile,
1311 1311 _('no node'))
1312 1312 m = treemanifestctx(self, dir, node)
1313 1313 else:
1314 1314 raise error.Abort(
1315 1315 _("cannot ask for manifest directory '%s' in a flat "
1316 1316 "manifest") % dir)
1317 1317 else:
1318 1318 if verify:
1319 1319 if node not in self._revlog.nodemap:
1320 1320 raise LookupError(node, self._revlog.indexfile,
1321 1321 _('no node'))
1322 1322 if self._treeinmem:
1323 1323 m = treemanifestctx(self, '', node)
1324 1324 else:
1325 1325 m = manifestctx(self, node)
1326 1326
1327 1327 if node != revlog.nullid:
1328 1328 mancache = self._dirmancache.get(dir)
1329 1329 if not mancache:
1330 1330 mancache = util.lrucachedict(self.cachesize)
1331 1331 self._dirmancache[dir] = mancache
1332 1332 mancache[node] = m
1333 1333 return m
1334 1334
1335 1335 def clearcaches(self):
1336 1336 self._dirmancache.clear()
1337 1337 self._revlog.clearcaches()
1338 1338
1339 1339 class memmanifestctx(object):
1340 1340 def __init__(self, manifestlog):
1341 1341 self._manifestlog = manifestlog
1342 1342 self._manifestdict = manifestdict()
1343 1343
1344 1344 def _revlog(self):
1345 1345 return self._manifestlog._revlog
1346 1346
1347 1347 def new(self):
1348 1348 return memmanifestctx(self._manifestlog)
1349 1349
1350 1350 def copy(self):
1351 1351 memmf = memmanifestctx(self._manifestlog)
1352 1352 memmf._manifestdict = self.read().copy()
1353 1353 return memmf
1354 1354
1355 1355 def read(self):
1356 1356 return self._manifestdict
1357 1357
1358 1358 def write(self, transaction, link, p1, p2, added, removed):
1359 1359 return self._revlog().add(self._manifestdict, transaction, link, p1, p2,
1360 1360 added, removed)
1361 1361
1362 1362 class manifestctx(object):
1363 1363 """A class representing a single revision of a manifest, including its
1364 1364 contents, its parent revs, and its linkrev.
1365 1365 """
1366 1366 def __init__(self, manifestlog, node):
1367 1367 self._manifestlog = manifestlog
1368 1368 self._data = None
1369 1369
1370 1370 self._node = node
1371 1371
1372 1372 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1373 1373 # but let's add it later when something needs it and we can load it
1374 1374 # lazily.
1375 1375 #self.p1, self.p2 = revlog.parents(node)
1376 1376 #rev = revlog.rev(node)
1377 1377 #self.linkrev = revlog.linkrev(rev)
1378 1378
1379 1379 def _revlog(self):
1380 1380 return self._manifestlog._revlog
1381 1381
1382 1382 def node(self):
1383 1383 return self._node
1384 1384
1385 1385 def new(self):
1386 1386 return memmanifestctx(self._manifestlog)
1387 1387
1388 1388 def copy(self):
1389 1389 memmf = memmanifestctx(self._manifestlog)
1390 1390 memmf._manifestdict = self.read().copy()
1391 1391 return memmf
1392 1392
1393 1393 @propertycache
1394 1394 def parents(self):
1395 1395 return self._revlog().parents(self._node)
1396 1396
1397 1397 def read(self):
1398 1398 if self._data is None:
1399 1399 if self._node == revlog.nullid:
1400 1400 self._data = manifestdict()
1401 1401 else:
1402 1402 rl = self._revlog()
1403 1403 text = rl.revision(self._node)
1404 1404 arraytext = bytearray(text)
1405 1405 rl._fulltextcache[self._node] = arraytext
1406 1406 self._data = manifestdict(text)
1407 1407 return self._data
1408 1408
1409 1409 def readfast(self, shallow=False):
1410 1410 '''Calls either readdelta or read, based on which would be less work.
1411 1411 readdelta is called if the delta is against the p1, and therefore can be
1412 1412 read quickly.
1413 1413
1414 1414 If `shallow` is True, nothing changes since this is a flat manifest.
1415 1415 '''
1416 1416 rl = self._revlog()
1417 1417 r = rl.rev(self._node)
1418 1418 deltaparent = rl.deltaparent(r)
1419 1419 if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
1420 1420 return self.readdelta()
1421 1421 return self.read()
1422 1422
1423 1423 def readdelta(self, shallow=False):
1424 1424 '''Returns a manifest containing just the entries that are present
1425 1425 in this manifest, but not in its p1 manifest. This is efficient to read
1426 1426 if the revlog delta is already p1.
1427 1427
1428 1428 Changing the value of `shallow` has no effect on flat manifests.
1429 1429 '''
1430 1430 revlog = self._revlog()
1431 1431 r = revlog.rev(self._node)
1432 1432 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1433 1433 return manifestdict(d)
1434 1434
1435 1435 def find(self, key):
1436 1436 return self.read().find(key)
1437 1437
1438 1438 class memtreemanifestctx(object):
1439 1439 def __init__(self, manifestlog, dir=''):
1440 1440 self._manifestlog = manifestlog
1441 1441 self._dir = dir
1442 1442 self._treemanifest = treemanifest()
1443 1443
1444 1444 def _revlog(self):
1445 1445 return self._manifestlog._revlog
1446 1446
1447 1447 def new(self, dir=''):
1448 1448 return memtreemanifestctx(self._manifestlog, dir=dir)
1449 1449
1450 1450 def copy(self):
1451 1451 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1452 1452 memmf._treemanifest = self._treemanifest.copy()
1453 1453 return memmf
1454 1454
1455 1455 def read(self):
1456 1456 return self._treemanifest
1457 1457
1458 1458 def write(self, transaction, link, p1, p2, added, removed):
1459 1459 def readtree(dir, node):
1460 1460 return self._manifestlog.get(dir, node).read()
1461 1461 return self._revlog().add(self._treemanifest, transaction, link, p1, p2,
1462 1462 added, removed, readtree=readtree)
1463 1463
1464 1464 class treemanifestctx(object):
1465 1465 def __init__(self, manifestlog, dir, node):
1466 1466 self._manifestlog = manifestlog
1467 1467 self._dir = dir
1468 1468 self._data = None
1469 1469
1470 1470 self._node = node
1471 1471
1472 1472 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
1473 1473 # we can instantiate treemanifestctx objects for directories we don't
1474 1474 # have on disk.
1475 1475 #self.p1, self.p2 = revlog.parents(node)
1476 1476 #rev = revlog.rev(node)
1477 1477 #self.linkrev = revlog.linkrev(rev)
1478 1478
1479 1479 def _revlog(self):
1480 1480 return self._manifestlog._revlog.dirlog(self._dir)
1481 1481
1482 1482 def read(self):
1483 1483 if self._data is None:
1484 1484 rl = self._revlog()
1485 1485 if self._node == revlog.nullid:
1486 1486 self._data = treemanifest()
1487 1487 elif rl._treeondisk:
1488 1488 m = treemanifest(dir=self._dir)
1489 1489 def gettext():
1490 1490 return rl.revision(self._node)
1491 1491 def readsubtree(dir, subm):
1492 1492 # Set verify to False since we need to be able to create
1493 1493 # subtrees for trees that don't exist on disk.
1494 1494 return self._manifestlog.get(dir, subm, verify=False).read()
1495 1495 m.read(gettext, readsubtree)
1496 1496 m.setnode(self._node)
1497 1497 self._data = m
1498 1498 else:
1499 1499 text = rl.revision(self._node)
1500 1500 arraytext = bytearray(text)
1501 1501 rl.fulltextcache[self._node] = arraytext
1502 1502 self._data = treemanifest(dir=self._dir, text=text)
1503 1503
1504 1504 return self._data
1505 1505
1506 1506 def node(self):
1507 1507 return self._node
1508 1508
1509 1509 def new(self, dir=''):
1510 1510 return memtreemanifestctx(self._manifestlog, dir=dir)
1511 1511
1512 1512 def copy(self):
1513 1513 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1514 1514 memmf._treemanifest = self.read().copy()
1515 1515 return memmf
1516 1516
1517 1517 @propertycache
1518 1518 def parents(self):
1519 1519 return self._revlog().parents(self._node)
1520 1520
1521 1521 def readdelta(self, shallow=False):
1522 1522 '''Returns a manifest containing just the entries that are present
1523 1523 in this manifest, but not in its p1 manifest. This is efficient to read
1524 1524 if the revlog delta is already p1.
1525 1525
1526 1526 If `shallow` is True, this will read the delta for this directory,
1527 1527 without recursively reading subdirectory manifests. Instead, any
1528 1528 subdirectory entry will be reported as it appears in the manifest, i.e.
1529 1529 the subdirectory will be reported among files and distinguished only by
1530 1530 its 't' flag.
1531 1531 '''
1532 1532 revlog = self._revlog()
1533 1533 if shallow:
1534 1534 r = revlog.rev(self._node)
1535 1535 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1536 1536 return manifestdict(d)
1537 1537 else:
1538 1538 # Need to perform a slow delta
1539 1539 r0 = revlog.deltaparent(revlog.rev(self._node))
1540 1540 m0 = self._manifestlog.get(self._dir, revlog.node(r0)).read()
1541 1541 m1 = self.read()
1542 1542 md = treemanifest(dir=self._dir)
1543 1543 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1544 1544 if n1:
1545 1545 md[f] = n1
1546 1546 if fl1:
1547 1547 md.setflag(f, fl1)
1548 1548 return md
1549 1549
1550 1550 def readfast(self, shallow=False):
1551 1551 '''Calls either readdelta or read, based on which would be less work.
1552 1552 readdelta is called if the delta is against the p1, and therefore can be
1553 1553 read quickly.
1554 1554
1555 1555 If `shallow` is True, it only returns the entries from this manifest,
1556 1556 and not any submanifests.
1557 1557 '''
1558 1558 rl = self._revlog()
1559 1559 r = rl.rev(self._node)
1560 1560 deltaparent = rl.deltaparent(r)
1561 1561 if (deltaparent != revlog.nullrev and
1562 1562 deltaparent in rl.parentrevs(r)):
1563 1563 return self.readdelta(shallow=shallow)
1564 1564
1565 1565 if shallow:
1566 1566 return manifestdict(rl.revision(self._node))
1567 1567 else:
1568 1568 return self.read()
1569 1569
1570 1570 def find(self, key):
1571 1571 return self.read().find(key)
1572
1573 class excludeddir(treemanifest):
1574 """Stand-in for a directory that is excluded from the repository.
1575
1576 With narrowing active on a repository that uses treemanifests,
1577 some of the directory revlogs will be excluded from the resulting
1578 clone. This is a huge storage win for clients, but means we need
1579 some sort of pseudo-manifest to surface to internals so we can
1580 detect a merge conflict outside the narrowspec. That's what this
1581 class is: it stands in for a directory whose node is known, but
1582 whose contents are unknown.
1583 """
1584 def __init__(self, dir, node):
1585 super(excludeddir, self).__init__(dir)
1586 self._node = node
1587 # Add an empty file, which will be included by iterators and such,
1588 # appearing as the directory itself (i.e. something like "dir/")
1589 self._files[''] = node
1590 self._flags[''] = 't'
1591
1592 # Manifests outside the narrowspec should never be modified, so avoid
1593 # copying. This makes a noticeable difference when there are very many
1594 # directories outside the narrowspec. Also, it makes sense for the copy to
1595 # be of the same type as the original, which would not happen with the
1596 # super type's copy().
1597 def copy(self):
1598 return self
1599
1600 class excludeddirmanifestctx(treemanifestctx):
1601 """context wrapper for excludeddir - see that docstring for rationale"""
1602 def __init__(self, dir, node):
1603 self._dir = dir
1604 self._node = node
1605
1606 def read(self):
1607 return excludeddir(self._dir, self._node)
1608
1609 def write(self, *args):
1610 raise error.ProgrammingError(
1611 'attempt to write manifest from excluded dir %s' % self._dir)
1612
1613 class excludedmanifestrevlog(manifestrevlog):
1614 """Stand-in for excluded treemanifest revlogs.
1615
1616 When narrowing is active on a treemanifest repository, we'll have
1617 references to directories we can't see due to the revlog being
1618 skipped. This class exists to conform to the manifestrevlog
1619 interface for those directories and proactively prevent writes to
1620 outside the narrowspec.
1621 """
1622
1623 def __init__(self, dir):
1624 self._dir = dir
1625
1626 def __len__(self):
1627 raise error.ProgrammingError(
1628 'attempt to get length of excluded dir %s' % self._dir)
1629
1630 def rev(self, node):
1631 raise error.ProgrammingError(
1632 'attempt to get rev from excluded dir %s' % self._dir)
1633
1634 def linkrev(self, node):
1635 raise error.ProgrammingError(
1636 'attempt to get linkrev from excluded dir %s' % self._dir)
1637
1638 def node(self, rev):
1639 raise error.ProgrammingError(
1640 'attempt to get node from excluded dir %s' % self._dir)
1641
1642 def add(self, *args, **kwargs):
1643 # We should never write entries in dirlogs outside the narrow clone.
1644 # However, the method still gets called from writesubtree() in
1645 # _addtree(), so we need to handle it. We should possibly make that
1646 # avoid calling add() with a clean manifest (_dirty is always False
1647 # in excludeddir instances).
1648 pass
General Comments 0
You need to be logged in to leave comments. Login now