##// END OF EJS Templates
narrow: move manifestrevlog overrides to core...
Martin von Zweigbergk -
r37391:c50078fc default
parent child Browse files
Show More
@@ -1,86 +1,81 b''
1 1 # narrowrepo.py - repository which supports narrow revlogs, lazy loading
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from mercurial import (
11 11 bundlerepo,
12 12 changegroup,
13 13 hg,
14 14 localrepo,
15 15 narrowspec,
16 16 scmutil,
17 17 )
18 18
19 19 from . import (
20 20 narrowrevlog,
21 21 )
22 22
23 23 def wrappostshare(orig, sourcerepo, destrepo, **kwargs):
24 24 orig(sourcerepo, destrepo, **kwargs)
25 25 if changegroup.NARROW_REQUIREMENT in sourcerepo.requirements:
26 26 with destrepo.wlock():
27 27 with destrepo.vfs('shared', 'a') as fp:
28 28 fp.write(narrowspec.FILENAME + '\n')
29 29
30 30 def unsharenarrowspec(orig, ui, repo, repopath):
31 31 if (changegroup.NARROW_REQUIREMENT in repo.requirements
32 32 and repo.path == repopath and repo.shared()):
33 33 srcrepo = hg.sharedreposource(repo)
34 34 with srcrepo.vfs(narrowspec.FILENAME) as f:
35 35 spec = f.read()
36 36 with repo.vfs(narrowspec.FILENAME, 'w') as f:
37 37 f.write(spec)
38 38 return orig(ui, repo, repopath)
39 39
40 40 def wraprepo(repo):
41 41 """Enables narrow clone functionality on a single local repository."""
42 42
43 43 cacheprop = localrepo.storecache
44 44 if isinstance(repo, bundlerepo.bundlerepository):
45 45 # We have to use a different caching property decorator for
46 46 # bundlerepo because storecache blows up in strange ways on a
47 47 # bundlerepo. Fortunately, there's no risk of data changing in
48 48 # a bundlerepo.
49 49 cacheprop = lambda name: localrepo.unfilteredpropertycache
50 50
51 51 class narrowrepository(repo.__class__):
52 52
53 def _constructmanifest(self):
54 manifest = super(narrowrepository, self)._constructmanifest()
55 narrowrevlog.makenarrowmanifestrevlog(manifest, repo)
56 return manifest
57
58 53 @cacheprop('00manifest.i')
59 54 def manifestlog(self):
60 55 mfl = super(narrowrepository, self).manifestlog
61 56 narrowrevlog.makenarrowmanifestlog(mfl, self)
62 57 return mfl
63 58
64 59 def file(self, f):
65 60 fl = super(narrowrepository, self).file(f)
66 61 narrowrevlog.makenarrowfilelog(fl, self.narrowmatch())
67 62 return fl
68 63
69 64 # I'm not sure this is the right place to do this filter.
70 65 # context._manifestmatches() would probably be better, or perhaps
71 66 # move it to a later place, in case some of the callers do want to know
72 67 # which directories changed. This seems to work for now, though.
73 68 def status(self, *args, **kwargs):
74 69 s = super(narrowrepository, self).status(*args, **kwargs)
75 70 narrowmatch = self.narrowmatch()
76 71 modified = list(filter(narrowmatch, s.modified))
77 72 added = list(filter(narrowmatch, s.added))
78 73 removed = list(filter(narrowmatch, s.removed))
79 74 deleted = list(filter(narrowmatch, s.deleted))
80 75 unknown = list(filter(narrowmatch, s.unknown))
81 76 ignored = list(filter(narrowmatch, s.ignored))
82 77 clean = list(filter(narrowmatch, s.clean))
83 78 return scmutil.status(modified, added, removed, deleted, unknown,
84 79 ignored, clean)
85 80
86 81 repo.__class__ = narrowrepository
@@ -1,107 +1,89 b''
1 1 # narrowrevlog.py - revlog storing irrelevant nodes as "ellipsis" nodes
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from mercurial import (
11 11 manifest,
12 12 revlog,
13 13 util,
14 14 )
15 15
16 16 def readtransform(self, text):
17 17 return text, False
18 18
19 19 def writetransform(self, text):
20 20 return text, False
21 21
22 22 def rawtransform(self, text):
23 23 return False
24 24
25 25 revlog.addflagprocessor(revlog.REVIDX_ELLIPSIS,
26 26 (readtransform, writetransform, rawtransform))
27 27
28 28 def setup():
29 29 # We just wanted to add the flag processor, which is done at module
30 30 # load time.
31 31 pass
32 32
33 def makenarrowmanifestrevlog(mfrevlog, repo):
34 if util.safehasattr(mfrevlog, '_narrowed'):
35 return
36
37 class narrowmanifestrevlog(mfrevlog.__class__):
38 # This function is called via debug{revlog,index,data}, but also during
39 # at least some push operations. This will be used to wrap/exclude the
40 # child directories when using treemanifests.
41 def dirlog(self, d):
42 if not repo.narrowmatch().visitdir(d[:-1] or '.'):
43 return manifest.excludedmanifestrevlog(d)
44 result = super(narrowmanifestrevlog, self).dirlog(d)
45 makenarrowmanifestrevlog(result, repo)
46 return result
47
48 mfrevlog.__class__ = narrowmanifestrevlog
49 mfrevlog._narrowed = True
50
51 33 def makenarrowmanifestlog(mfl, repo):
52 34 class narrowmanifestlog(mfl.__class__):
53 35 def get(self, dir, node, verify=True):
54 36 if not repo.narrowmatch().visitdir(dir[:-1] or '.'):
55 37 return manifest.excludeddirmanifestctx(dir, node)
56 38 return super(narrowmanifestlog, self).get(dir, node, verify=verify)
57 39 mfl.__class__ = narrowmanifestlog
58 40
59 41 def makenarrowfilelog(fl, narrowmatch):
60 42 class narrowfilelog(fl.__class__):
61 43 def renamed(self, node):
62 44 # Renames that come from outside the narrowspec are
63 45 # problematic at least for git-diffs, because we lack the
64 46 # base text for the rename. This logic was introduced in
65 47 # 3cd72b1 of narrowhg (authored by martinvonz, reviewed by
66 48 # adgar), but that revision doesn't have any additional
67 49 # commentary on what problems we can encounter.
68 50 m = super(narrowfilelog, self).renamed(node)
69 51 if m and not narrowmatch(m[0]):
70 52 return None
71 53 return m
72 54
73 55 def size(self, rev):
74 56 # We take advantage of the fact that remotefilelog
75 57 # lacks a node() method to just skip the
76 58 # rename-checking logic when on remotefilelog. This
77 59 # might be incorrect on other non-revlog-based storage
78 60 # engines, but for now this seems to be fine.
79 61 #
80 62 # TODO: when remotefilelog is in core, improve this to
81 63 # explicitly look for remotefilelog instead of cheating
82 64 # with a hasattr check.
83 65 if util.safehasattr(self, 'node'):
84 66 node = self.node(rev)
85 67 # Because renamed() is overridden above to
86 68 # sometimes return None even if there is metadata
87 69 # in the revlog, size can be incorrect for
88 70 # copies/renames, so we need to make sure we call
89 71 # the super class's implementation of renamed()
90 72 # for the purpose of size calculation.
91 73 if super(narrowfilelog, self).renamed(node):
92 74 return len(self.read(node))
93 75 return super(narrowfilelog, self).size(rev)
94 76
95 77 def cmp(self, node, text):
96 78 different = super(narrowfilelog, self).cmp(node, text)
97 79 if different:
98 80 # Similar to size() above, if the file was copied from
99 81 # a file outside the narrowspec, the super class's
100 82 # would have returned True because we tricked it into
101 83 # thinking that the file was not renamed.
102 84 if super(narrowfilelog, self).renamed(node):
103 85 t2 = self.read(node)
104 86 return t2 != text
105 87 return different
106 88
107 89 fl.__class__ = narrowfilelog
@@ -1,1648 +1,1653 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 bin,
17 17 hex,
18 18 )
19 19 from . import (
20 20 error,
21 21 mdiff,
22 22 policy,
23 23 revlog,
24 24 util,
25 25 )
26 26
27 27 parsers = policy.importmod(r'parsers')
28 28 propertycache = util.propertycache
29 29
30 30 def _parse(data):
31 31 # This method does a little bit of excessive-looking
32 32 # precondition checking. This is so that the behavior of this
33 33 # class exactly matches its C counterpart to try and help
34 34 # prevent surprise breakage for anyone that develops against
35 35 # the pure version.
36 36 if data and data[-1:] != '\n':
37 37 raise ValueError('Manifest did not end in a newline.')
38 38 prev = None
39 39 for l in data.splitlines():
40 40 if prev is not None and prev > l:
41 41 raise ValueError('Manifest lines not in sorted order.')
42 42 prev = l
43 43 f, n = l.split('\0')
44 44 if len(n) > 40:
45 45 yield f, bin(n[:40]), n[40:]
46 46 else:
47 47 yield f, bin(n), ''
48 48
49 49 def _text(it):
50 50 files = []
51 51 lines = []
52 52 _hex = revlog.hex
53 53 for f, n, fl in it:
54 54 files.append(f)
55 55 # if this is changed to support newlines in filenames,
56 56 # be sure to check the templates/ dir again (especially *-raw.tmpl)
57 57 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
58 58
59 59 _checkforbidden(files)
60 60 return ''.join(lines)
61 61
62 62 class lazymanifestiter(object):
63 63 def __init__(self, lm):
64 64 self.pos = 0
65 65 self.lm = lm
66 66
67 67 def __iter__(self):
68 68 return self
69 69
70 70 def next(self):
71 71 try:
72 72 data, pos = self.lm._get(self.pos)
73 73 except IndexError:
74 74 raise StopIteration
75 75 if pos == -1:
76 76 self.pos += 1
77 77 return data[0]
78 78 self.pos += 1
79 79 zeropos = data.find('\x00', pos)
80 80 return data[pos:zeropos]
81 81
82 82 __next__ = next
83 83
84 84 class lazymanifestiterentries(object):
85 85 def __init__(self, lm):
86 86 self.lm = lm
87 87 self.pos = 0
88 88
89 89 def __iter__(self):
90 90 return self
91 91
92 92 def next(self):
93 93 try:
94 94 data, pos = self.lm._get(self.pos)
95 95 except IndexError:
96 96 raise StopIteration
97 97 if pos == -1:
98 98 self.pos += 1
99 99 return data
100 100 zeropos = data.find('\x00', pos)
101 101 hashval = unhexlify(data, self.lm.extrainfo[self.pos],
102 102 zeropos + 1, 40)
103 103 flags = self.lm._getflags(data, self.pos, zeropos)
104 104 self.pos += 1
105 105 return (data[pos:zeropos], hashval, flags)
106 106
107 107 __next__ = next
108 108
109 109 def unhexlify(data, extra, pos, length):
110 110 s = bin(data[pos:pos + length])
111 111 if extra:
112 112 s += chr(extra & 0xff)
113 113 return s
114 114
115 115 def _cmp(a, b):
116 116 return (a > b) - (a < b)
117 117
118 118 class _lazymanifest(object):
119 119 def __init__(self, data, positions=None, extrainfo=None, extradata=None):
120 120 if positions is None:
121 121 self.positions = self.findlines(data)
122 122 self.extrainfo = [0] * len(self.positions)
123 123 self.data = data
124 124 self.extradata = []
125 125 else:
126 126 self.positions = positions[:]
127 127 self.extrainfo = extrainfo[:]
128 128 self.extradata = extradata[:]
129 129 self.data = data
130 130
131 131 def findlines(self, data):
132 132 if not data:
133 133 return []
134 134 pos = data.find("\n")
135 135 if pos == -1 or data[-1:] != '\n':
136 136 raise ValueError("Manifest did not end in a newline.")
137 137 positions = [0]
138 138 prev = data[:data.find('\x00')]
139 139 while pos < len(data) - 1 and pos != -1:
140 140 positions.append(pos + 1)
141 141 nexts = data[pos + 1:data.find('\x00', pos + 1)]
142 142 if nexts < prev:
143 143 raise ValueError("Manifest lines not in sorted order.")
144 144 prev = nexts
145 145 pos = data.find("\n", pos + 1)
146 146 return positions
147 147
148 148 def _get(self, index):
149 149 # get the position encoded in pos:
150 150 # positive number is an index in 'data'
151 151 # negative number is in extrapieces
152 152 pos = self.positions[index]
153 153 if pos >= 0:
154 154 return self.data, pos
155 155 return self.extradata[-pos - 1], -1
156 156
157 157 def _getkey(self, pos):
158 158 if pos >= 0:
159 159 return self.data[pos:self.data.find('\x00', pos + 1)]
160 160 return self.extradata[-pos - 1][0]
161 161
162 162 def bsearch(self, key):
163 163 first = 0
164 164 last = len(self.positions) - 1
165 165
166 166 while first <= last:
167 167 midpoint = (first + last)//2
168 168 nextpos = self.positions[midpoint]
169 169 candidate = self._getkey(nextpos)
170 170 r = _cmp(key, candidate)
171 171 if r == 0:
172 172 return midpoint
173 173 else:
174 174 if r < 0:
175 175 last = midpoint - 1
176 176 else:
177 177 first = midpoint + 1
178 178 return -1
179 179
180 180 def bsearch2(self, key):
181 181 # same as the above, but will always return the position
182 182 # done for performance reasons
183 183 first = 0
184 184 last = len(self.positions) - 1
185 185
186 186 while first <= last:
187 187 midpoint = (first + last)//2
188 188 nextpos = self.positions[midpoint]
189 189 candidate = self._getkey(nextpos)
190 190 r = _cmp(key, candidate)
191 191 if r == 0:
192 192 return (midpoint, True)
193 193 else:
194 194 if r < 0:
195 195 last = midpoint - 1
196 196 else:
197 197 first = midpoint + 1
198 198 return (first, False)
199 199
200 200 def __contains__(self, key):
201 201 return self.bsearch(key) != -1
202 202
203 203 def _getflags(self, data, needle, pos):
204 204 start = pos + 41
205 205 end = data.find("\n", start)
206 206 if end == -1:
207 207 end = len(data) - 1
208 208 if start == end:
209 209 return ''
210 210 return self.data[start:end]
211 211
212 212 def __getitem__(self, key):
213 213 if not isinstance(key, bytes):
214 214 raise TypeError("getitem: manifest keys must be a bytes.")
215 215 needle = self.bsearch(key)
216 216 if needle == -1:
217 217 raise KeyError
218 218 data, pos = self._get(needle)
219 219 if pos == -1:
220 220 return (data[1], data[2])
221 221 zeropos = data.find('\x00', pos)
222 222 assert 0 <= needle <= len(self.positions)
223 223 assert len(self.extrainfo) == len(self.positions)
224 224 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
225 225 flags = self._getflags(data, needle, zeropos)
226 226 return (hashval, flags)
227 227
228 228 def __delitem__(self, key):
229 229 needle, found = self.bsearch2(key)
230 230 if not found:
231 231 raise KeyError
232 232 cur = self.positions[needle]
233 233 self.positions = self.positions[:needle] + self.positions[needle + 1:]
234 234 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
235 235 if cur >= 0:
236 236 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
237 237
238 238 def __setitem__(self, key, value):
239 239 if not isinstance(key, bytes):
240 240 raise TypeError("setitem: manifest keys must be a byte string.")
241 241 if not isinstance(value, tuple) or len(value) != 2:
242 242 raise TypeError("Manifest values must be a tuple of (node, flags).")
243 243 hashval = value[0]
244 244 if not isinstance(hashval, bytes) or not 20 <= len(hashval) <= 22:
245 245 raise TypeError("node must be a 20-byte byte string")
246 246 flags = value[1]
247 247 if len(hashval) == 22:
248 248 hashval = hashval[:-1]
249 249 if not isinstance(flags, bytes) or len(flags) > 1:
250 250 raise TypeError("flags must a 0 or 1 byte string, got %r", flags)
251 251 needle, found = self.bsearch2(key)
252 252 if found:
253 253 # put the item
254 254 pos = self.positions[needle]
255 255 if pos < 0:
256 256 self.extradata[-pos - 1] = (key, hashval, value[1])
257 257 else:
258 258 # just don't bother
259 259 self.extradata.append((key, hashval, value[1]))
260 260 self.positions[needle] = -len(self.extradata)
261 261 else:
262 262 # not found, put it in with extra positions
263 263 self.extradata.append((key, hashval, value[1]))
264 264 self.positions = (self.positions[:needle] + [-len(self.extradata)]
265 265 + self.positions[needle:])
266 266 self.extrainfo = (self.extrainfo[:needle] + [0] +
267 267 self.extrainfo[needle:])
268 268
269 269 def copy(self):
270 270 # XXX call _compact like in C?
271 271 return _lazymanifest(self.data, self.positions, self.extrainfo,
272 272 self.extradata)
273 273
274 274 def _compact(self):
275 275 # hopefully not called TOO often
276 276 if len(self.extradata) == 0:
277 277 return
278 278 l = []
279 279 last_cut = 0
280 280 i = 0
281 281 offset = 0
282 282 self.extrainfo = [0] * len(self.positions)
283 283 while i < len(self.positions):
284 284 if self.positions[i] >= 0:
285 285 cur = self.positions[i]
286 286 last_cut = cur
287 287 while True:
288 288 self.positions[i] = offset
289 289 i += 1
290 290 if i == len(self.positions) or self.positions[i] < 0:
291 291 break
292 292 offset += self.positions[i] - cur
293 293 cur = self.positions[i]
294 294 end_cut = self.data.find('\n', cur)
295 295 if end_cut != -1:
296 296 end_cut += 1
297 297 offset += end_cut - cur
298 298 l.append(self.data[last_cut:end_cut])
299 299 else:
300 300 while i < len(self.positions) and self.positions[i] < 0:
301 301 cur = self.positions[i]
302 302 t = self.extradata[-cur - 1]
303 303 l.append(self._pack(t))
304 304 self.positions[i] = offset
305 305 if len(t[1]) > 20:
306 306 self.extrainfo[i] = ord(t[1][21])
307 307 offset += len(l[-1])
308 308 i += 1
309 309 self.data = ''.join(l)
310 310 self.extradata = []
311 311
312 312 def _pack(self, d):
313 313 return d[0] + '\x00' + hex(d[1][:20]) + d[2] + '\n'
314 314
315 315 def text(self):
316 316 self._compact()
317 317 return self.data
318 318
319 319 def diff(self, m2, clean=False):
320 320 '''Finds changes between the current manifest and m2.'''
321 321 # XXX think whether efficiency matters here
322 322 diff = {}
323 323
324 324 for fn, e1, flags in self.iterentries():
325 325 if fn not in m2:
326 326 diff[fn] = (e1, flags), (None, '')
327 327 else:
328 328 e2 = m2[fn]
329 329 if (e1, flags) != e2:
330 330 diff[fn] = (e1, flags), e2
331 331 elif clean:
332 332 diff[fn] = None
333 333
334 334 for fn, e2, flags in m2.iterentries():
335 335 if fn not in self:
336 336 diff[fn] = (None, ''), (e2, flags)
337 337
338 338 return diff
339 339
340 340 def iterentries(self):
341 341 return lazymanifestiterentries(self)
342 342
343 343 def iterkeys(self):
344 344 return lazymanifestiter(self)
345 345
346 346 def __iter__(self):
347 347 return lazymanifestiter(self)
348 348
349 349 def __len__(self):
350 350 return len(self.positions)
351 351
352 352 def filtercopy(self, filterfn):
353 353 # XXX should be optimized
354 354 c = _lazymanifest('')
355 355 for f, n, fl in self.iterentries():
356 356 if filterfn(f):
357 357 c[f] = n, fl
358 358 return c
359 359
360 360 try:
361 361 _lazymanifest = parsers.lazymanifest
362 362 except AttributeError:
363 363 pass
364 364
365 365 class manifestdict(object):
366 366 def __init__(self, data=''):
367 367 self._lm = _lazymanifest(data)
368 368
369 369 def __getitem__(self, key):
370 370 return self._lm[key][0]
371 371
372 372 def find(self, key):
373 373 return self._lm[key]
374 374
375 375 def __len__(self):
376 376 return len(self._lm)
377 377
378 378 def __nonzero__(self):
379 379 # nonzero is covered by the __len__ function, but implementing it here
380 380 # makes it easier for extensions to override.
381 381 return len(self._lm) != 0
382 382
383 383 __bool__ = __nonzero__
384 384
385 385 def __setitem__(self, key, node):
386 386 self._lm[key] = node, self.flags(key, '')
387 387
388 388 def __contains__(self, key):
389 389 if key is None:
390 390 return False
391 391 return key in self._lm
392 392
393 393 def __delitem__(self, key):
394 394 del self._lm[key]
395 395
396 396 def __iter__(self):
397 397 return self._lm.__iter__()
398 398
399 399 def iterkeys(self):
400 400 return self._lm.iterkeys()
401 401
402 402 def keys(self):
403 403 return list(self.iterkeys())
404 404
405 405 def filesnotin(self, m2, match=None):
406 406 '''Set of files in this manifest that are not in the other'''
407 407 if match:
408 408 m1 = self.matches(match)
409 409 m2 = m2.matches(match)
410 410 return m1.filesnotin(m2)
411 411 diff = self.diff(m2)
412 412 files = set(filepath
413 413 for filepath, hashflags in diff.iteritems()
414 414 if hashflags[1][0] is None)
415 415 return files
416 416
417 417 @propertycache
418 418 def _dirs(self):
419 419 return util.dirs(self)
420 420
421 421 def dirs(self):
422 422 return self._dirs
423 423
424 424 def hasdir(self, dir):
425 425 return dir in self._dirs
426 426
427 427 def _filesfastpath(self, match):
428 428 '''Checks whether we can correctly and quickly iterate over matcher
429 429 files instead of over manifest files.'''
430 430 files = match.files()
431 431 return (len(files) < 100 and (match.isexact() or
432 432 (match.prefix() and all(fn in self for fn in files))))
433 433
434 434 def walk(self, match):
435 435 '''Generates matching file names.
436 436
437 437 Equivalent to manifest.matches(match).iterkeys(), but without creating
438 438 an entirely new manifest.
439 439
440 440 It also reports nonexistent files by marking them bad with match.bad().
441 441 '''
442 442 if match.always():
443 443 for f in iter(self):
444 444 yield f
445 445 return
446 446
447 447 fset = set(match.files())
448 448
449 449 # avoid the entire walk if we're only looking for specific files
450 450 if self._filesfastpath(match):
451 451 for fn in sorted(fset):
452 452 yield fn
453 453 return
454 454
455 455 for fn in self:
456 456 if fn in fset:
457 457 # specified pattern is the exact name
458 458 fset.remove(fn)
459 459 if match(fn):
460 460 yield fn
461 461
462 462 # for dirstate.walk, files=['.'] means "walk the whole tree".
463 463 # follow that here, too
464 464 fset.discard('.')
465 465
466 466 for fn in sorted(fset):
467 467 if not self.hasdir(fn):
468 468 match.bad(fn, None)
469 469
470 470 def matches(self, match):
471 471 '''generate a new manifest filtered by the match argument'''
472 472 if match.always():
473 473 return self.copy()
474 474
475 475 if self._filesfastpath(match):
476 476 m = manifestdict()
477 477 lm = self._lm
478 478 for fn in match.files():
479 479 if fn in lm:
480 480 m._lm[fn] = lm[fn]
481 481 return m
482 482
483 483 m = manifestdict()
484 484 m._lm = self._lm.filtercopy(match)
485 485 return m
486 486
487 487 def diff(self, m2, match=None, clean=False):
488 488 '''Finds changes between the current manifest and m2.
489 489
490 490 Args:
491 491 m2: the manifest to which this manifest should be compared.
492 492 clean: if true, include files unchanged between these manifests
493 493 with a None value in the returned dictionary.
494 494
495 495 The result is returned as a dict with filename as key and
496 496 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
497 497 nodeid in the current/other manifest and fl1/fl2 is the flag
498 498 in the current/other manifest. Where the file does not exist,
499 499 the nodeid will be None and the flags will be the empty
500 500 string.
501 501 '''
502 502 if match:
503 503 m1 = self.matches(match)
504 504 m2 = m2.matches(match)
505 505 return m1.diff(m2, clean=clean)
506 506 return self._lm.diff(m2._lm, clean)
507 507
508 508 def setflag(self, key, flag):
509 509 self._lm[key] = self[key], flag
510 510
511 511 def get(self, key, default=None):
512 512 try:
513 513 return self._lm[key][0]
514 514 except KeyError:
515 515 return default
516 516
517 517 def flags(self, key, default=''):
518 518 try:
519 519 return self._lm[key][1]
520 520 except KeyError:
521 521 return default
522 522
523 523 def copy(self):
524 524 c = manifestdict()
525 525 c._lm = self._lm.copy()
526 526 return c
527 527
528 528 def items(self):
529 529 return (x[:2] for x in self._lm.iterentries())
530 530
531 531 iteritems = items
532 532
533 533 def iterentries(self):
534 534 return self._lm.iterentries()
535 535
536 536 def text(self):
537 537 # most likely uses native version
538 538 return self._lm.text()
539 539
540 540 def fastdelta(self, base, changes):
541 541 """Given a base manifest text as a bytearray and a list of changes
542 542 relative to that text, compute a delta that can be used by revlog.
543 543 """
544 544 delta = []
545 545 dstart = None
546 546 dend = None
547 547 dline = [""]
548 548 start = 0
549 549 # zero copy representation of base as a buffer
550 550 addbuf = util.buffer(base)
551 551
552 552 changes = list(changes)
553 553 if len(changes) < 1000:
554 554 # start with a readonly loop that finds the offset of
555 555 # each line and creates the deltas
556 556 for f, todelete in changes:
557 557 # bs will either be the index of the item or the insert point
558 558 start, end = _msearch(addbuf, f, start)
559 559 if not todelete:
560 560 h, fl = self._lm[f]
561 561 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
562 562 else:
563 563 if start == end:
564 564 # item we want to delete was not found, error out
565 565 raise AssertionError(
566 566 _("failed to remove %s from manifest") % f)
567 567 l = ""
568 568 if dstart is not None and dstart <= start and dend >= start:
569 569 if dend < end:
570 570 dend = end
571 571 if l:
572 572 dline.append(l)
573 573 else:
574 574 if dstart is not None:
575 575 delta.append([dstart, dend, "".join(dline)])
576 576 dstart = start
577 577 dend = end
578 578 dline = [l]
579 579
580 580 if dstart is not None:
581 581 delta.append([dstart, dend, "".join(dline)])
582 582 # apply the delta to the base, and get a delta for addrevision
583 583 deltatext, arraytext = _addlistdelta(base, delta)
584 584 else:
585 585 # For large changes, it's much cheaper to just build the text and
586 586 # diff it.
587 587 arraytext = bytearray(self.text())
588 588 deltatext = mdiff.textdiff(
589 589 util.buffer(base), util.buffer(arraytext))
590 590
591 591 return arraytext, deltatext
592 592
593 593 def _msearch(m, s, lo=0, hi=None):
594 594 '''return a tuple (start, end) that says where to find s within m.
595 595
596 596 If the string is found m[start:end] are the line containing
597 597 that string. If start == end the string was not found and
598 598 they indicate the proper sorted insertion point.
599 599
600 600 m should be a buffer, a memoryview or a byte string.
601 601 s is a byte string'''
602 602 def advance(i, c):
603 603 while i < lenm and m[i:i + 1] != c:
604 604 i += 1
605 605 return i
606 606 if not s:
607 607 return (lo, lo)
608 608 lenm = len(m)
609 609 if not hi:
610 610 hi = lenm
611 611 while lo < hi:
612 612 mid = (lo + hi) // 2
613 613 start = mid
614 614 while start > 0 and m[start - 1:start] != '\n':
615 615 start -= 1
616 616 end = advance(start, '\0')
617 617 if bytes(m[start:end]) < s:
618 618 # we know that after the null there are 40 bytes of sha1
619 619 # this translates to the bisect lo = mid + 1
620 620 lo = advance(end + 40, '\n') + 1
621 621 else:
622 622 # this translates to the bisect hi = mid
623 623 hi = start
624 624 end = advance(lo, '\0')
625 625 found = m[lo:end]
626 626 if s == found:
627 627 # we know that after the null there are 40 bytes of sha1
628 628 end = advance(end + 40, '\n')
629 629 return (lo, end + 1)
630 630 else:
631 631 return (lo, lo)
632 632
633 633 def _checkforbidden(l):
634 634 """Check filenames for illegal characters."""
635 635 for f in l:
636 636 if '\n' in f or '\r' in f:
637 637 raise error.RevlogError(
638 638 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
639 639
640 640
641 641 # apply the changes collected during the bisect loop to our addlist
642 642 # return a delta suitable for addrevision
643 643 def _addlistdelta(addlist, x):
644 644 # for large addlist arrays, building a new array is cheaper
645 645 # than repeatedly modifying the existing one
646 646 currentposition = 0
647 647 newaddlist = bytearray()
648 648
649 649 for start, end, content in x:
650 650 newaddlist += addlist[currentposition:start]
651 651 if content:
652 652 newaddlist += bytearray(content)
653 653
654 654 currentposition = end
655 655
656 656 newaddlist += addlist[currentposition:]
657 657
658 658 deltatext = "".join(struct.pack(">lll", start, end, len(content))
659 659 + content for start, end, content in x)
660 660 return deltatext, newaddlist
661 661
662 662 def _splittopdir(f):
663 663 if '/' in f:
664 664 dir, subpath = f.split('/', 1)
665 665 return dir + '/', subpath
666 666 else:
667 667 return '', f
668 668
669 669 _noop = lambda s: None
670 670
671 671 class treemanifest(object):
672 672 def __init__(self, dir='', text=''):
673 673 self._dir = dir
674 674 self._node = revlog.nullid
675 675 self._loadfunc = _noop
676 676 self._copyfunc = _noop
677 677 self._dirty = False
678 678 self._dirs = {}
679 679 # Using _lazymanifest here is a little slower than plain old dicts
680 680 self._files = {}
681 681 self._flags = {}
682 682 if text:
683 683 def readsubtree(subdir, subm):
684 684 raise AssertionError('treemanifest constructor only accepts '
685 685 'flat manifests')
686 686 self.parse(text, readsubtree)
687 687 self._dirty = True # Mark flat manifest dirty after parsing
688 688
689 689 def _subpath(self, path):
690 690 return self._dir + path
691 691
692 692 def __len__(self):
693 693 self._load()
694 694 size = len(self._files)
695 695 for m in self._dirs.values():
696 696 size += m.__len__()
697 697 return size
698 698
699 699 def __nonzero__(self):
700 700 # Faster than "__len() != 0" since it avoids loading sub-manifests
701 701 return not self._isempty()
702 702
703 703 __bool__ = __nonzero__
704 704
705 705 def _isempty(self):
706 706 self._load() # for consistency; already loaded by all callers
707 707 return (not self._files and (not self._dirs or
708 708 all(m._isempty() for m in self._dirs.values())))
709 709
710 710 def __repr__(self):
711 711 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
712 712 (self._dir, revlog.hex(self._node),
713 713 bool(self._loadfunc is _noop),
714 714 self._dirty, id(self)))
715 715
716 716 def dir(self):
717 717 '''The directory that this tree manifest represents, including a
718 718 trailing '/'. Empty string for the repo root directory.'''
719 719 return self._dir
720 720
721 721 def node(self):
722 722 '''This node of this instance. nullid for unsaved instances. Should
723 723 be updated when the instance is read or written from a revlog.
724 724 '''
725 725 assert not self._dirty
726 726 return self._node
727 727
728 728 def setnode(self, node):
729 729 self._node = node
730 730 self._dirty = False
731 731
732 732 def iterentries(self):
733 733 self._load()
734 734 for p, n in sorted(itertools.chain(self._dirs.items(),
735 735 self._files.items())):
736 736 if p in self._files:
737 737 yield self._subpath(p), n, self._flags.get(p, '')
738 738 else:
739 739 for x in n.iterentries():
740 740 yield x
741 741
742 742 def items(self):
743 743 self._load()
744 744 for p, n in sorted(itertools.chain(self._dirs.items(),
745 745 self._files.items())):
746 746 if p in self._files:
747 747 yield self._subpath(p), n
748 748 else:
749 749 for f, sn in n.iteritems():
750 750 yield f, sn
751 751
752 752 iteritems = items
753 753
754 754 def iterkeys(self):
755 755 self._load()
756 756 for p in sorted(itertools.chain(self._dirs, self._files)):
757 757 if p in self._files:
758 758 yield self._subpath(p)
759 759 else:
760 760 for f in self._dirs[p]:
761 761 yield f
762 762
763 763 def keys(self):
764 764 return list(self.iterkeys())
765 765
766 766 def __iter__(self):
767 767 return self.iterkeys()
768 768
769 769 def __contains__(self, f):
770 770 if f is None:
771 771 return False
772 772 self._load()
773 773 dir, subpath = _splittopdir(f)
774 774 if dir:
775 775 if dir not in self._dirs:
776 776 return False
777 777 return self._dirs[dir].__contains__(subpath)
778 778 else:
779 779 return f in self._files
780 780
781 781 def get(self, f, default=None):
782 782 self._load()
783 783 dir, subpath = _splittopdir(f)
784 784 if dir:
785 785 if dir not in self._dirs:
786 786 return default
787 787 return self._dirs[dir].get(subpath, default)
788 788 else:
789 789 return self._files.get(f, default)
790 790
791 791 def __getitem__(self, f):
792 792 self._load()
793 793 dir, subpath = _splittopdir(f)
794 794 if dir:
795 795 return self._dirs[dir].__getitem__(subpath)
796 796 else:
797 797 return self._files[f]
798 798
799 799 def flags(self, f):
800 800 self._load()
801 801 dir, subpath = _splittopdir(f)
802 802 if dir:
803 803 if dir not in self._dirs:
804 804 return ''
805 805 return self._dirs[dir].flags(subpath)
806 806 else:
807 807 if f in self._dirs:
808 808 return ''
809 809 return self._flags.get(f, '')
810 810
811 811 def find(self, f):
812 812 self._load()
813 813 dir, subpath = _splittopdir(f)
814 814 if dir:
815 815 return self._dirs[dir].find(subpath)
816 816 else:
817 817 return self._files[f], self._flags.get(f, '')
818 818
819 819 def __delitem__(self, f):
820 820 self._load()
821 821 dir, subpath = _splittopdir(f)
822 822 if dir:
823 823 self._dirs[dir].__delitem__(subpath)
824 824 # If the directory is now empty, remove it
825 825 if self._dirs[dir]._isempty():
826 826 del self._dirs[dir]
827 827 else:
828 828 del self._files[f]
829 829 if f in self._flags:
830 830 del self._flags[f]
831 831 self._dirty = True
832 832
833 833 def __setitem__(self, f, n):
834 834 assert n is not None
835 835 self._load()
836 836 dir, subpath = _splittopdir(f)
837 837 if dir:
838 838 if dir not in self._dirs:
839 839 self._dirs[dir] = treemanifest(self._subpath(dir))
840 840 self._dirs[dir].__setitem__(subpath, n)
841 841 else:
842 842 self._files[f] = n[:21] # to match manifestdict's behavior
843 843 self._dirty = True
844 844
845 845 def _load(self):
846 846 if self._loadfunc is not _noop:
847 847 lf, self._loadfunc = self._loadfunc, _noop
848 848 lf(self)
849 849 elif self._copyfunc is not _noop:
850 850 cf, self._copyfunc = self._copyfunc, _noop
851 851 cf(self)
852 852
853 853 def setflag(self, f, flags):
854 854 """Set the flags (symlink, executable) for path f."""
855 855 self._load()
856 856 dir, subpath = _splittopdir(f)
857 857 if dir:
858 858 if dir not in self._dirs:
859 859 self._dirs[dir] = treemanifest(self._subpath(dir))
860 860 self._dirs[dir].setflag(subpath, flags)
861 861 else:
862 862 self._flags[f] = flags
863 863 self._dirty = True
864 864
865 865 def copy(self):
866 866 copy = treemanifest(self._dir)
867 867 copy._node = self._node
868 868 copy._dirty = self._dirty
869 869 if self._copyfunc is _noop:
870 870 def _copyfunc(s):
871 871 self._load()
872 872 for d in self._dirs:
873 873 s._dirs[d] = self._dirs[d].copy()
874 874 s._files = dict.copy(self._files)
875 875 s._flags = dict.copy(self._flags)
876 876 if self._loadfunc is _noop:
877 877 _copyfunc(copy)
878 878 else:
879 879 copy._copyfunc = _copyfunc
880 880 else:
881 881 copy._copyfunc = self._copyfunc
882 882 return copy
883 883
884 884 def filesnotin(self, m2, match=None):
885 885 '''Set of files in this manifest that are not in the other'''
886 886 if match:
887 887 m1 = self.matches(match)
888 888 m2 = m2.matches(match)
889 889 return m1.filesnotin(m2)
890 890
891 891 files = set()
892 892 def _filesnotin(t1, t2):
893 893 if t1._node == t2._node and not t1._dirty and not t2._dirty:
894 894 return
895 895 t1._load()
896 896 t2._load()
897 897 for d, m1 in t1._dirs.iteritems():
898 898 if d in t2._dirs:
899 899 m2 = t2._dirs[d]
900 900 _filesnotin(m1, m2)
901 901 else:
902 902 files.update(m1.iterkeys())
903 903
904 904 for fn in t1._files:
905 905 if fn not in t2._files:
906 906 files.add(t1._subpath(fn))
907 907
908 908 _filesnotin(self, m2)
909 909 return files
910 910
911 911 @propertycache
912 912 def _alldirs(self):
913 913 return util.dirs(self)
914 914
915 915 def dirs(self):
916 916 return self._alldirs
917 917
918 918 def hasdir(self, dir):
919 919 self._load()
920 920 topdir, subdir = _splittopdir(dir)
921 921 if topdir:
922 922 if topdir in self._dirs:
923 923 return self._dirs[topdir].hasdir(subdir)
924 924 return False
925 925 return (dir + '/') in self._dirs
926 926
927 927 def walk(self, match):
928 928 '''Generates matching file names.
929 929
930 930 Equivalent to manifest.matches(match).iterkeys(), but without creating
931 931 an entirely new manifest.
932 932
933 933 It also reports nonexistent files by marking them bad with match.bad().
934 934 '''
935 935 if match.always():
936 936 for f in iter(self):
937 937 yield f
938 938 return
939 939
940 940 fset = set(match.files())
941 941
942 942 for fn in self._walk(match):
943 943 if fn in fset:
944 944 # specified pattern is the exact name
945 945 fset.remove(fn)
946 946 yield fn
947 947
948 948 # for dirstate.walk, files=['.'] means "walk the whole tree".
949 949 # follow that here, too
950 950 fset.discard('.')
951 951
952 952 for fn in sorted(fset):
953 953 if not self.hasdir(fn):
954 954 match.bad(fn, None)
955 955
956 956 def _walk(self, match):
957 957 '''Recursively generates matching file names for walk().'''
958 958 if not match.visitdir(self._dir[:-1] or '.'):
959 959 return
960 960
961 961 # yield this dir's files and walk its submanifests
962 962 self._load()
963 963 for p in sorted(list(self._dirs) + list(self._files)):
964 964 if p in self._files:
965 965 fullp = self._subpath(p)
966 966 if match(fullp):
967 967 yield fullp
968 968 else:
969 969 for f in self._dirs[p]._walk(match):
970 970 yield f
971 971
972 972 def matches(self, match):
973 973 '''generate a new manifest filtered by the match argument'''
974 974 if match.always():
975 975 return self.copy()
976 976
977 977 return self._matches(match)
978 978
979 979 def _matches(self, match):
980 980 '''recursively generate a new manifest filtered by the match argument.
981 981 '''
982 982
983 983 visit = match.visitdir(self._dir[:-1] or '.')
984 984 if visit == 'all':
985 985 return self.copy()
986 986 ret = treemanifest(self._dir)
987 987 if not visit:
988 988 return ret
989 989
990 990 self._load()
991 991 for fn in self._files:
992 992 fullp = self._subpath(fn)
993 993 if not match(fullp):
994 994 continue
995 995 ret._files[fn] = self._files[fn]
996 996 if fn in self._flags:
997 997 ret._flags[fn] = self._flags[fn]
998 998
999 999 for dir, subm in self._dirs.iteritems():
1000 1000 m = subm._matches(match)
1001 1001 if not m._isempty():
1002 1002 ret._dirs[dir] = m
1003 1003
1004 1004 if not ret._isempty():
1005 1005 ret._dirty = True
1006 1006 return ret
1007 1007
1008 1008 def diff(self, m2, match=None, clean=False):
1009 1009 '''Finds changes between the current manifest and m2.
1010 1010
1011 1011 Args:
1012 1012 m2: the manifest to which this manifest should be compared.
1013 1013 clean: if true, include files unchanged between these manifests
1014 1014 with a None value in the returned dictionary.
1015 1015
1016 1016 The result is returned as a dict with filename as key and
1017 1017 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1018 1018 nodeid in the current/other manifest and fl1/fl2 is the flag
1019 1019 in the current/other manifest. Where the file does not exist,
1020 1020 the nodeid will be None and the flags will be the empty
1021 1021 string.
1022 1022 '''
1023 1023 if match:
1024 1024 m1 = self.matches(match)
1025 1025 m2 = m2.matches(match)
1026 1026 return m1.diff(m2, clean=clean)
1027 1027 result = {}
1028 1028 emptytree = treemanifest()
1029 1029 def _diff(t1, t2):
1030 1030 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1031 1031 return
1032 1032 t1._load()
1033 1033 t2._load()
1034 1034 for d, m1 in t1._dirs.iteritems():
1035 1035 m2 = t2._dirs.get(d, emptytree)
1036 1036 _diff(m1, m2)
1037 1037
1038 1038 for d, m2 in t2._dirs.iteritems():
1039 1039 if d not in t1._dirs:
1040 1040 _diff(emptytree, m2)
1041 1041
1042 1042 for fn, n1 in t1._files.iteritems():
1043 1043 fl1 = t1._flags.get(fn, '')
1044 1044 n2 = t2._files.get(fn, None)
1045 1045 fl2 = t2._flags.get(fn, '')
1046 1046 if n1 != n2 or fl1 != fl2:
1047 1047 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1048 1048 elif clean:
1049 1049 result[t1._subpath(fn)] = None
1050 1050
1051 1051 for fn, n2 in t2._files.iteritems():
1052 1052 if fn not in t1._files:
1053 1053 fl2 = t2._flags.get(fn, '')
1054 1054 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
1055 1055
1056 1056 _diff(self, m2)
1057 1057 return result
1058 1058
1059 1059 def unmodifiedsince(self, m2):
1060 1060 return not self._dirty and not m2._dirty and self._node == m2._node
1061 1061
1062 1062 def parse(self, text, readsubtree):
1063 1063 for f, n, fl in _parse(text):
1064 1064 if fl == 't':
1065 1065 f = f + '/'
1066 1066 self._dirs[f] = readsubtree(self._subpath(f), n)
1067 1067 elif '/' in f:
1068 1068 # This is a flat manifest, so use __setitem__ and setflag rather
1069 1069 # than assigning directly to _files and _flags, so we can
1070 1070 # assign a path in a subdirectory, and to mark dirty (compared
1071 1071 # to nullid).
1072 1072 self[f] = n
1073 1073 if fl:
1074 1074 self.setflag(f, fl)
1075 1075 else:
1076 1076 # Assigning to _files and _flags avoids marking as dirty,
1077 1077 # and should be a little faster.
1078 1078 self._files[f] = n
1079 1079 if fl:
1080 1080 self._flags[f] = fl
1081 1081
1082 1082 def text(self):
1083 1083 """Get the full data of this manifest as a bytestring."""
1084 1084 self._load()
1085 1085 return _text(self.iterentries())
1086 1086
1087 1087 def dirtext(self):
1088 1088 """Get the full data of this directory as a bytestring. Make sure that
1089 1089 any submanifests have been written first, so their nodeids are correct.
1090 1090 """
1091 1091 self._load()
1092 1092 flags = self.flags
1093 1093 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
1094 1094 files = [(f, self._files[f], flags(f)) for f in self._files]
1095 1095 return _text(sorted(dirs + files))
1096 1096
1097 1097 def read(self, gettext, readsubtree):
1098 1098 def _load_for_read(s):
1099 1099 s.parse(gettext(), readsubtree)
1100 1100 s._dirty = False
1101 1101 self._loadfunc = _load_for_read
1102 1102
1103 1103 def writesubtrees(self, m1, m2, writesubtree):
1104 1104 self._load() # for consistency; should never have any effect here
1105 1105 m1._load()
1106 1106 m2._load()
1107 1107 emptytree = treemanifest()
1108 1108 for d, subm in self._dirs.iteritems():
1109 1109 subp1 = m1._dirs.get(d, emptytree)._node
1110 1110 subp2 = m2._dirs.get(d, emptytree)._node
1111 1111 if subp1 == revlog.nullid:
1112 1112 subp1, subp2 = subp2, subp1
1113 1113 writesubtree(subm, subp1, subp2)
1114 1114
1115 1115 def walksubtrees(self, matcher=None):
1116 1116 """Returns an iterator of the subtrees of this manifest, including this
1117 1117 manifest itself.
1118 1118
1119 1119 If `matcher` is provided, it only returns subtrees that match.
1120 1120 """
1121 1121 if matcher and not matcher.visitdir(self._dir[:-1] or '.'):
1122 1122 return
1123 1123 if not matcher or matcher(self._dir[:-1]):
1124 1124 yield self
1125 1125
1126 1126 self._load()
1127 1127 for d, subm in self._dirs.iteritems():
1128 1128 for subtree in subm.walksubtrees(matcher=matcher):
1129 1129 yield subtree
1130 1130
1131 1131 class manifestrevlog(revlog.revlog):
1132 1132 '''A revlog that stores manifest texts. This is responsible for caching the
1133 1133 full-text manifest contents.
1134 1134 '''
1135 1135 def __init__(self, opener, dir='', dirlogcache=None, indexfile=None,
1136 1136 treemanifest=False):
1137 1137 """Constructs a new manifest revlog
1138 1138
1139 1139 `indexfile` - used by extensions to have two manifests at once, like
1140 1140 when transitioning between flatmanifeset and treemanifests.
1141 1141
1142 1142 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1143 1143 options can also be used to make this a tree manifest revlog. The opener
1144 1144 option takes precedence, so if it is set to True, we ignore whatever
1145 1145 value is passed in to the constructor.
1146 1146 """
1147 1147 # During normal operations, we expect to deal with not more than four
1148 1148 # revs at a time (such as during commit --amend). When rebasing large
1149 1149 # stacks of commits, the number can go up, hence the config knob below.
1150 1150 cachesize = 4
1151 1151 optiontreemanifest = False
1152 1152 opts = getattr(opener, 'options', None)
1153 1153 if opts is not None:
1154 1154 cachesize = opts.get('manifestcachesize', cachesize)
1155 1155 optiontreemanifest = opts.get('treemanifest', False)
1156 1156
1157 1157 self._treeondisk = optiontreemanifest or treemanifest
1158 1158
1159 1159 self._fulltextcache = util.lrucachedict(cachesize)
1160 1160
1161 1161 if dir:
1162 1162 assert self._treeondisk, 'opts is %r' % opts
1163 1163
1164 1164 if indexfile is None:
1165 1165 indexfile = '00manifest.i'
1166 1166 if dir:
1167 1167 indexfile = "meta/" + dir + indexfile
1168 1168
1169 1169 self._dir = dir
1170 1170 # The dirlogcache is kept on the root manifest log
1171 1171 if dir:
1172 1172 self._dirlogcache = dirlogcache
1173 1173 else:
1174 1174 self._dirlogcache = {'': self}
1175 1175
1176 1176 super(manifestrevlog, self).__init__(opener, indexfile,
1177 1177 # only root indexfile is cached
1178 1178 checkambig=not bool(dir),
1179 1179 mmaplargeindex=True)
1180 1180
1181 1181 @property
1182 1182 def fulltextcache(self):
1183 1183 return self._fulltextcache
1184 1184
1185 1185 def clearcaches(self):
1186 1186 super(manifestrevlog, self).clearcaches()
1187 1187 self._fulltextcache.clear()
1188 1188 self._dirlogcache = {'': self}
1189 1189
1190 1190 def dirlog(self, d):
1191 1191 if d:
1192 1192 assert self._treeondisk
1193 1193 if d not in self._dirlogcache:
1194 1194 mfrevlog = manifestrevlog(self.opener, d,
1195 1195 self._dirlogcache,
1196 1196 treemanifest=self._treeondisk)
1197 1197 self._dirlogcache[d] = mfrevlog
1198 1198 return self._dirlogcache[d]
1199 1199
1200 1200 def add(self, m, transaction, link, p1, p2, added, removed, readtree=None):
1201 1201 if p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta'):
1202 1202 # If our first parent is in the manifest cache, we can
1203 1203 # compute a delta here using properties we know about the
1204 1204 # manifest up-front, which may save time later for the
1205 1205 # revlog layer.
1206 1206
1207 1207 _checkforbidden(added)
1208 1208 # combine the changed lists into one sorted iterator
1209 1209 work = heapq.merge([(x, False) for x in added],
1210 1210 [(x, True) for x in removed])
1211 1211
1212 1212 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1213 1213 cachedelta = self.rev(p1), deltatext
1214 1214 text = util.buffer(arraytext)
1215 1215 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
1216 1216 else:
1217 1217 # The first parent manifest isn't already loaded, so we'll
1218 1218 # just encode a fulltext of the manifest and pass that
1219 1219 # through to the revlog layer, and let it handle the delta
1220 1220 # process.
1221 1221 if self._treeondisk:
1222 1222 assert readtree, "readtree must be set for treemanifest writes"
1223 1223 m1 = readtree(self._dir, p1)
1224 1224 m2 = readtree(self._dir, p2)
1225 1225 n = self._addtree(m, transaction, link, m1, m2, readtree)
1226 1226 arraytext = None
1227 1227 else:
1228 1228 text = m.text()
1229 1229 n = self.addrevision(text, transaction, link, p1, p2)
1230 1230 arraytext = bytearray(text)
1231 1231
1232 1232 if arraytext is not None:
1233 1233 self.fulltextcache[n] = arraytext
1234 1234
1235 1235 return n
1236 1236
1237 1237 def _addtree(self, m, transaction, link, m1, m2, readtree):
1238 1238 # If the manifest is unchanged compared to one parent,
1239 1239 # don't write a new revision
1240 1240 if self._dir != '' and (m.unmodifiedsince(m1) or m.unmodifiedsince(m2)):
1241 1241 return m.node()
1242 1242 def writesubtree(subm, subp1, subp2):
1243 1243 sublog = self.dirlog(subm.dir())
1244 1244 sublog.add(subm, transaction, link, subp1, subp2, None, None,
1245 1245 readtree=readtree)
1246 1246 m.writesubtrees(m1, m2, writesubtree)
1247 1247 text = m.dirtext()
1248 1248 n = None
1249 1249 if self._dir != '':
1250 1250 # Double-check whether contents are unchanged to one parent
1251 1251 if text == m1.dirtext():
1252 1252 n = m1.node()
1253 1253 elif text == m2.dirtext():
1254 1254 n = m2.node()
1255 1255
1256 1256 if not n:
1257 1257 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
1258 1258
1259 1259 # Save nodeid so parent manifest can calculate its nodeid
1260 1260 m.setnode(n)
1261 1261 return n
1262 1262
1263 1263 class manifestlog(object):
1264 1264 """A collection class representing the collection of manifest snapshots
1265 1265 referenced by commits in the repository.
1266 1266
1267 1267 In this situation, 'manifest' refers to the abstract concept of a snapshot
1268 1268 of the list of files in the given commit. Consumers of the output of this
1269 1269 class do not care about the implementation details of the actual manifests
1270 1270 they receive (i.e. tree or flat or lazily loaded, etc)."""
1271 1271 def __init__(self, opener, repo):
1272 1272 usetreemanifest = False
1273 1273 cachesize = 4
1274 1274
1275 1275 opts = getattr(opener, 'options', None)
1276 1276 if opts is not None:
1277 1277 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1278 1278 cachesize = opts.get('manifestcachesize', cachesize)
1279 1279 self._treeinmem = usetreemanifest
1280 1280
1281 1281 self._revlog = repo._constructmanifest()
1282 self._narrowmatch = repo.narrowmatch()
1282 1283
1283 1284 # A cache of the manifestctx or treemanifestctx for each directory
1284 1285 self._dirmancache = {}
1285 1286 self._dirmancache[''] = util.lrucachedict(cachesize)
1286 1287
1287 1288 self.cachesize = cachesize
1288 1289
1289 1290 def __getitem__(self, node):
1290 1291 """Retrieves the manifest instance for the given node. Throws a
1291 1292 LookupError if not found.
1292 1293 """
1293 1294 return self.get('', node)
1294 1295
1295 1296 def get(self, dir, node, verify=True):
1296 1297 """Retrieves the manifest instance for the given node. Throws a
1297 1298 LookupError if not found.
1298 1299
1299 1300 `verify` - if True an exception will be thrown if the node is not in
1300 1301 the revlog
1301 1302 """
1302 1303 if node in self._dirmancache.get(dir, ()):
1303 1304 return self._dirmancache[dir][node]
1304 1305
1305 1306 if dir:
1306 1307 if self._revlog._treeondisk:
1307 1308 if verify:
1308 1309 dirlog = self._revlog.dirlog(dir)
1309 1310 if node not in dirlog.nodemap:
1310 1311 raise LookupError(node, dirlog.indexfile,
1311 1312 _('no node'))
1312 1313 m = treemanifestctx(self, dir, node)
1313 1314 else:
1314 1315 raise error.Abort(
1315 1316 _("cannot ask for manifest directory '%s' in a flat "
1316 1317 "manifest") % dir)
1317 1318 else:
1318 1319 if verify:
1319 1320 if node not in self._revlog.nodemap:
1320 1321 raise LookupError(node, self._revlog.indexfile,
1321 1322 _('no node'))
1322 1323 if self._treeinmem:
1323 1324 m = treemanifestctx(self, '', node)
1324 1325 else:
1325 1326 m = manifestctx(self, node)
1326 1327
1327 1328 if node != revlog.nullid:
1328 1329 mancache = self._dirmancache.get(dir)
1329 1330 if not mancache:
1330 1331 mancache = util.lrucachedict(self.cachesize)
1331 1332 self._dirmancache[dir] = mancache
1332 1333 mancache[node] = m
1333 1334 return m
1334 1335
1335 1336 def clearcaches(self):
1336 1337 self._dirmancache.clear()
1337 1338 self._revlog.clearcaches()
1338 1339
1339 1340 class memmanifestctx(object):
1340 1341 def __init__(self, manifestlog):
1341 1342 self._manifestlog = manifestlog
1342 1343 self._manifestdict = manifestdict()
1343 1344
1344 1345 def _revlog(self):
1345 1346 return self._manifestlog._revlog
1346 1347
1347 1348 def new(self):
1348 1349 return memmanifestctx(self._manifestlog)
1349 1350
1350 1351 def copy(self):
1351 1352 memmf = memmanifestctx(self._manifestlog)
1352 1353 memmf._manifestdict = self.read().copy()
1353 1354 return memmf
1354 1355
1355 1356 def read(self):
1356 1357 return self._manifestdict
1357 1358
1358 1359 def write(self, transaction, link, p1, p2, added, removed):
1359 1360 return self._revlog().add(self._manifestdict, transaction, link, p1, p2,
1360 1361 added, removed)
1361 1362
1362 1363 class manifestctx(object):
1363 1364 """A class representing a single revision of a manifest, including its
1364 1365 contents, its parent revs, and its linkrev.
1365 1366 """
1366 1367 def __init__(self, manifestlog, node):
1367 1368 self._manifestlog = manifestlog
1368 1369 self._data = None
1369 1370
1370 1371 self._node = node
1371 1372
1372 1373 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1373 1374 # but let's add it later when something needs it and we can load it
1374 1375 # lazily.
1375 1376 #self.p1, self.p2 = revlog.parents(node)
1376 1377 #rev = revlog.rev(node)
1377 1378 #self.linkrev = revlog.linkrev(rev)
1378 1379
1379 1380 def _revlog(self):
1380 1381 return self._manifestlog._revlog
1381 1382
1382 1383 def node(self):
1383 1384 return self._node
1384 1385
1385 1386 def new(self):
1386 1387 return memmanifestctx(self._manifestlog)
1387 1388
1388 1389 def copy(self):
1389 1390 memmf = memmanifestctx(self._manifestlog)
1390 1391 memmf._manifestdict = self.read().copy()
1391 1392 return memmf
1392 1393
1393 1394 @propertycache
1394 1395 def parents(self):
1395 1396 return self._revlog().parents(self._node)
1396 1397
1397 1398 def read(self):
1398 1399 if self._data is None:
1399 1400 if self._node == revlog.nullid:
1400 1401 self._data = manifestdict()
1401 1402 else:
1402 1403 rl = self._revlog()
1403 1404 text = rl.revision(self._node)
1404 1405 arraytext = bytearray(text)
1405 1406 rl._fulltextcache[self._node] = arraytext
1406 1407 self._data = manifestdict(text)
1407 1408 return self._data
1408 1409
1409 1410 def readfast(self, shallow=False):
1410 1411 '''Calls either readdelta or read, based on which would be less work.
1411 1412 readdelta is called if the delta is against the p1, and therefore can be
1412 1413 read quickly.
1413 1414
1414 1415 If `shallow` is True, nothing changes since this is a flat manifest.
1415 1416 '''
1416 1417 rl = self._revlog()
1417 1418 r = rl.rev(self._node)
1418 1419 deltaparent = rl.deltaparent(r)
1419 1420 if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
1420 1421 return self.readdelta()
1421 1422 return self.read()
1422 1423
1423 1424 def readdelta(self, shallow=False):
1424 1425 '''Returns a manifest containing just the entries that are present
1425 1426 in this manifest, but not in its p1 manifest. This is efficient to read
1426 1427 if the revlog delta is already p1.
1427 1428
1428 1429 Changing the value of `shallow` has no effect on flat manifests.
1429 1430 '''
1430 1431 revlog = self._revlog()
1431 1432 r = revlog.rev(self._node)
1432 1433 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1433 1434 return manifestdict(d)
1434 1435
1435 1436 def find(self, key):
1436 1437 return self.read().find(key)
1437 1438
1438 1439 class memtreemanifestctx(object):
1439 1440 def __init__(self, manifestlog, dir=''):
1440 1441 self._manifestlog = manifestlog
1441 1442 self._dir = dir
1442 1443 self._treemanifest = treemanifest()
1443 1444
1444 1445 def _revlog(self):
1445 1446 return self._manifestlog._revlog
1446 1447
1447 1448 def new(self, dir=''):
1448 1449 return memtreemanifestctx(self._manifestlog, dir=dir)
1449 1450
1450 1451 def copy(self):
1451 1452 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1452 1453 memmf._treemanifest = self._treemanifest.copy()
1453 1454 return memmf
1454 1455
1455 1456 def read(self):
1456 1457 return self._treemanifest
1457 1458
1458 1459 def write(self, transaction, link, p1, p2, added, removed):
1459 1460 def readtree(dir, node):
1460 1461 return self._manifestlog.get(dir, node).read()
1461 1462 return self._revlog().add(self._treemanifest, transaction, link, p1, p2,
1462 1463 added, removed, readtree=readtree)
1463 1464
1464 1465 class treemanifestctx(object):
1465 1466 def __init__(self, manifestlog, dir, node):
1466 1467 self._manifestlog = manifestlog
1467 1468 self._dir = dir
1468 1469 self._data = None
1469 1470
1470 1471 self._node = node
1471 1472
1472 1473 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
1473 1474 # we can instantiate treemanifestctx objects for directories we don't
1474 1475 # have on disk.
1475 1476 #self.p1, self.p2 = revlog.parents(node)
1476 1477 #rev = revlog.rev(node)
1477 1478 #self.linkrev = revlog.linkrev(rev)
1478 1479
1479 1480 def _revlog(self):
1481 narrowmatch = self._manifestlog._narrowmatch
1482 if not narrowmatch.always():
1483 if not narrowmatch.visitdir(self._dir[:-1] or '.'):
1484 return excludedmanifestrevlog(self._dir)
1480 1485 return self._manifestlog._revlog.dirlog(self._dir)
1481 1486
1482 1487 def read(self):
1483 1488 if self._data is None:
1484 1489 rl = self._revlog()
1485 1490 if self._node == revlog.nullid:
1486 1491 self._data = treemanifest()
1487 1492 elif rl._treeondisk:
1488 1493 m = treemanifest(dir=self._dir)
1489 1494 def gettext():
1490 1495 return rl.revision(self._node)
1491 1496 def readsubtree(dir, subm):
1492 1497 # Set verify to False since we need to be able to create
1493 1498 # subtrees for trees that don't exist on disk.
1494 1499 return self._manifestlog.get(dir, subm, verify=False).read()
1495 1500 m.read(gettext, readsubtree)
1496 1501 m.setnode(self._node)
1497 1502 self._data = m
1498 1503 else:
1499 1504 text = rl.revision(self._node)
1500 1505 arraytext = bytearray(text)
1501 1506 rl.fulltextcache[self._node] = arraytext
1502 1507 self._data = treemanifest(dir=self._dir, text=text)
1503 1508
1504 1509 return self._data
1505 1510
1506 1511 def node(self):
1507 1512 return self._node
1508 1513
1509 1514 def new(self, dir=''):
1510 1515 return memtreemanifestctx(self._manifestlog, dir=dir)
1511 1516
1512 1517 def copy(self):
1513 1518 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1514 1519 memmf._treemanifest = self.read().copy()
1515 1520 return memmf
1516 1521
1517 1522 @propertycache
1518 1523 def parents(self):
1519 1524 return self._revlog().parents(self._node)
1520 1525
1521 1526 def readdelta(self, shallow=False):
1522 1527 '''Returns a manifest containing just the entries that are present
1523 1528 in this manifest, but not in its p1 manifest. This is efficient to read
1524 1529 if the revlog delta is already p1.
1525 1530
1526 1531 If `shallow` is True, this will read the delta for this directory,
1527 1532 without recursively reading subdirectory manifests. Instead, any
1528 1533 subdirectory entry will be reported as it appears in the manifest, i.e.
1529 1534 the subdirectory will be reported among files and distinguished only by
1530 1535 its 't' flag.
1531 1536 '''
1532 1537 revlog = self._revlog()
1533 1538 if shallow:
1534 1539 r = revlog.rev(self._node)
1535 1540 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1536 1541 return manifestdict(d)
1537 1542 else:
1538 1543 # Need to perform a slow delta
1539 1544 r0 = revlog.deltaparent(revlog.rev(self._node))
1540 1545 m0 = self._manifestlog.get(self._dir, revlog.node(r0)).read()
1541 1546 m1 = self.read()
1542 1547 md = treemanifest(dir=self._dir)
1543 1548 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1544 1549 if n1:
1545 1550 md[f] = n1
1546 1551 if fl1:
1547 1552 md.setflag(f, fl1)
1548 1553 return md
1549 1554
1550 1555 def readfast(self, shallow=False):
1551 1556 '''Calls either readdelta or read, based on which would be less work.
1552 1557 readdelta is called if the delta is against the p1, and therefore can be
1553 1558 read quickly.
1554 1559
1555 1560 If `shallow` is True, it only returns the entries from this manifest,
1556 1561 and not any submanifests.
1557 1562 '''
1558 1563 rl = self._revlog()
1559 1564 r = rl.rev(self._node)
1560 1565 deltaparent = rl.deltaparent(r)
1561 1566 if (deltaparent != revlog.nullrev and
1562 1567 deltaparent in rl.parentrevs(r)):
1563 1568 return self.readdelta(shallow=shallow)
1564 1569
1565 1570 if shallow:
1566 1571 return manifestdict(rl.revision(self._node))
1567 1572 else:
1568 1573 return self.read()
1569 1574
1570 1575 def find(self, key):
1571 1576 return self.read().find(key)
1572 1577
1573 1578 class excludeddir(treemanifest):
1574 1579 """Stand-in for a directory that is excluded from the repository.
1575 1580
1576 1581 With narrowing active on a repository that uses treemanifests,
1577 1582 some of the directory revlogs will be excluded from the resulting
1578 1583 clone. This is a huge storage win for clients, but means we need
1579 1584 some sort of pseudo-manifest to surface to internals so we can
1580 1585 detect a merge conflict outside the narrowspec. That's what this
1581 1586 class is: it stands in for a directory whose node is known, but
1582 1587 whose contents are unknown.
1583 1588 """
1584 1589 def __init__(self, dir, node):
1585 1590 super(excludeddir, self).__init__(dir)
1586 1591 self._node = node
1587 1592 # Add an empty file, which will be included by iterators and such,
1588 1593 # appearing as the directory itself (i.e. something like "dir/")
1589 1594 self._files[''] = node
1590 1595 self._flags[''] = 't'
1591 1596
1592 1597 # Manifests outside the narrowspec should never be modified, so avoid
1593 1598 # copying. This makes a noticeable difference when there are very many
1594 1599 # directories outside the narrowspec. Also, it makes sense for the copy to
1595 1600 # be of the same type as the original, which would not happen with the
1596 1601 # super type's copy().
1597 1602 def copy(self):
1598 1603 return self
1599 1604
1600 1605 class excludeddirmanifestctx(treemanifestctx):
1601 1606 """context wrapper for excludeddir - see that docstring for rationale"""
1602 1607 def __init__(self, dir, node):
1603 1608 self._dir = dir
1604 1609 self._node = node
1605 1610
1606 1611 def read(self):
1607 1612 return excludeddir(self._dir, self._node)
1608 1613
1609 1614 def write(self, *args):
1610 1615 raise error.ProgrammingError(
1611 1616 'attempt to write manifest from excluded dir %s' % self._dir)
1612 1617
1613 1618 class excludedmanifestrevlog(manifestrevlog):
1614 1619 """Stand-in for excluded treemanifest revlogs.
1615 1620
1616 1621 When narrowing is active on a treemanifest repository, we'll have
1617 1622 references to directories we can't see due to the revlog being
1618 1623 skipped. This class exists to conform to the manifestrevlog
1619 1624 interface for those directories and proactively prevent writes to
1620 1625 outside the narrowspec.
1621 1626 """
1622 1627
1623 1628 def __init__(self, dir):
1624 1629 self._dir = dir
1625 1630
1626 1631 def __len__(self):
1627 1632 raise error.ProgrammingError(
1628 1633 'attempt to get length of excluded dir %s' % self._dir)
1629 1634
1630 1635 def rev(self, node):
1631 1636 raise error.ProgrammingError(
1632 1637 'attempt to get rev from excluded dir %s' % self._dir)
1633 1638
1634 1639 def linkrev(self, node):
1635 1640 raise error.ProgrammingError(
1636 1641 'attempt to get linkrev from excluded dir %s' % self._dir)
1637 1642
1638 1643 def node(self, rev):
1639 1644 raise error.ProgrammingError(
1640 1645 'attempt to get node from excluded dir %s' % self._dir)
1641 1646
1642 1647 def add(self, *args, **kwargs):
1643 1648 # We should never write entries in dirlogs outside the narrow clone.
1644 1649 # However, the method still gets called from writesubtree() in
1645 1650 # _addtree(), so we need to handle it. We should possibly make that
1646 1651 # avoid calling add() with a clean manifest (_dirty is always False
1647 1652 # in excludeddir instances).
1648 1653 pass
General Comments 0
You need to be logged in to leave comments. Login now