##// END OF EJS Templates
manifest: clean up dirlog() to take a d parameter to avoid shadowing dir()...
Augie Fackler -
r36112:59adb305 default
parent child Browse files
Show More
@@ -1,177 +1,177 b''
1 1 # narrowrevlog.py - revlog storing irrelevant nodes as "ellipsis" nodes
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from mercurial import (
11 11 error,
12 12 manifest,
13 13 revlog,
14 14 util,
15 15 )
16 16
17 17 def readtransform(self, text):
18 18 return text, False
19 19
20 20 def writetransform(self, text):
21 21 return text, False
22 22
23 23 def rawtransform(self, text):
24 24 return False
25 25
26 26 revlog.addflagprocessor(revlog.REVIDX_ELLIPSIS,
27 27 (readtransform, writetransform, rawtransform))
28 28
29 29 def setup():
30 30 # We just wanted to add the flag processor, which is done at module
31 31 # load time.
32 32 pass
33 33
34 34 class excludeddir(manifest.treemanifest):
35 35 """Stand-in for a directory that is excluded from the repository.
36 36
37 37 With narrowing active on a repository that uses treemanifests,
38 38 some of the directory revlogs will be excluded from the resulting
39 39 clone. This is a huge storage win for clients, but means we need
40 40 some sort of pseudo-manifest to surface to internals so we can
41 41 detect a merge conflict outside the narrowspec. That's what this
42 42 class is: it stands in for a directory whose node is known, but
43 43 whose contents are unknown.
44 44 """
45 45 def __init__(self, dir, node):
46 46 super(excludeddir, self).__init__(dir)
47 47 self._node = node
48 48 # Add an empty file, which will be included by iterators and such,
49 49 # appearing as the directory itself (i.e. something like "dir/")
50 50 self._files[''] = node
51 51 self._flags[''] = 't'
52 52
53 53 # Manifests outside the narrowspec should never be modified, so avoid
54 54 # copying. This makes a noticeable difference when there are very many
55 55 # directories outside the narrowspec. Also, it makes sense for the copy to
56 56 # be of the same type as the original, which would not happen with the
57 57 # super type's copy().
58 58 def copy(self):
59 59 return self
60 60
61 61 class excludeddirmanifestctx(manifest.treemanifestctx):
62 62 """context wrapper for excludeddir - see that docstring for rationale"""
63 63 def __init__(self, dir, node):
64 64 self._dir = dir
65 65 self._node = node
66 66
67 67 def read(self):
68 68 return excludeddir(self._dir, self._node)
69 69
70 70 def write(self, *args):
71 71 raise error.ProgrammingError(
72 72 'attempt to write manifest from excluded dir %s' % self._dir)
73 73
74 74 class excludedmanifestrevlog(manifest.manifestrevlog):
75 75 """Stand-in for excluded treemanifest revlogs.
76 76
77 77 When narrowing is active on a treemanifest repository, we'll have
78 78 references to directories we can't see due to the revlog being
79 79 skipped. This class exists to conform to the manifestrevlog
80 80 interface for those directories and proactively prevent writes to
81 81 outside the narrowspec.
82 82 """
83 83
84 84 def __init__(self, dir):
85 85 self._dir = dir
86 86
87 87 def __len__(self):
88 88 raise error.ProgrammingError(
89 89 'attempt to get length of excluded dir %s' % self._dir)
90 90
91 91 def rev(self, node):
92 92 raise error.ProgrammingError(
93 93 'attempt to get rev from excluded dir %s' % self._dir)
94 94
95 95 def linkrev(self, node):
96 96 raise error.ProgrammingError(
97 97 'attempt to get linkrev from excluded dir %s' % self._dir)
98 98
99 99 def node(self, rev):
100 100 raise error.ProgrammingError(
101 101 'attempt to get node from excluded dir %s' % self._dir)
102 102
103 103 def add(self, *args, **kwargs):
104 104 # We should never write entries in dirlogs outside the narrow clone.
105 105 # However, the method still gets called from writesubtree() in
106 106 # _addtree(), so we need to handle it. We should possibly make that
107 107 # avoid calling add() with a clean manifest (_dirty is always False
108 108 # in excludeddir instances).
109 109 pass
110 110
111 111 def makenarrowmanifestrevlog(mfrevlog, repo):
112 112 if util.safehasattr(mfrevlog, '_narrowed'):
113 113 return
114 114
115 115 class narrowmanifestrevlog(mfrevlog.__class__):
116 116 # This function is called via debug{revlog,index,data}, but also during
117 117 # at least some push operations. This will be used to wrap/exclude the
118 118 # child directories when using treemanifests.
119 def dirlog(self, dir):
120 if dir and not dir.endswith('/'):
121 dir = dir + '/'
122 if not repo.narrowmatch().visitdir(dir[:-1] or '.'):
123 return excludedmanifestrevlog(dir)
124 result = super(narrowmanifestrevlog, self).dirlog(dir)
119 def dirlog(self, d):
120 if d and not d.endswith('/'):
121 d = d + '/'
122 if not repo.narrowmatch().visitdir(d[:-1] or '.'):
123 return excludedmanifestrevlog(d)
124 result = super(narrowmanifestrevlog, self).dirlog(d)
125 125 makenarrowmanifestrevlog(result, repo)
126 126 return result
127 127
128 128 mfrevlog.__class__ = narrowmanifestrevlog
129 129 mfrevlog._narrowed = True
130 130
131 131 def makenarrowmanifestlog(mfl, repo):
132 132 class narrowmanifestlog(mfl.__class__):
133 133 def get(self, dir, node, verify=True):
134 134 if not repo.narrowmatch().visitdir(dir[:-1] or '.'):
135 135 return excludeddirmanifestctx(dir, node)
136 136 return super(narrowmanifestlog, self).get(dir, node, verify=verify)
137 137 mfl.__class__ = narrowmanifestlog
138 138
139 139 def makenarrowfilelog(fl, narrowmatch):
140 140 class narrowfilelog(fl.__class__):
141 141 def renamed(self, node):
142 142 m = super(narrowfilelog, self).renamed(node)
143 143 if m and not narrowmatch(m[0]):
144 144 return None
145 145 return m
146 146
147 147 def size(self, rev):
148 148 # We take advantage of the fact that remotefilelog
149 149 # lacks a node() method to just skip the
150 150 # rename-checking logic when on remotefilelog. This
151 151 # might be incorrect on other non-revlog-based storage
152 152 # engines, but for now this seems to be fine.
153 153 if util.safehasattr(self, 'node'):
154 154 node = self.node(rev)
155 155 # Because renamed() is overridden above to
156 156 # sometimes return None even if there is metadata
157 157 # in the revlog, size can be incorrect for
158 158 # copies/renames, so we need to make sure we call
159 159 # the super class's implementation of renamed()
160 160 # for the purpose of size calculation.
161 161 if super(narrowfilelog, self).renamed(node):
162 162 return len(self.read(node))
163 163 return super(narrowfilelog, self).size(rev)
164 164
165 165 def cmp(self, node, text):
166 166 different = super(narrowfilelog, self).cmp(node, text)
167 167 if different:
168 168 # Similar to size() above, if the file was copied from
169 169 # a file outside the narrowspec, the super class's
170 170 # would have returned True because we tricked it into
171 171 # thinking that the file was not renamed.
172 172 if super(narrowfilelog, self).renamed(node):
173 173 t2 = self.read(node)
174 174 return t2 != text
175 175 return different
176 176
177 177 fl.__class__ = narrowfilelog
@@ -1,1643 +1,1643 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import os
13 13 import struct
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 )
20 20 from . import (
21 21 error,
22 22 mdiff,
23 23 policy,
24 24 revlog,
25 25 util,
26 26 )
27 27
28 28 parsers = policy.importmod(r'parsers')
29 29 propertycache = util.propertycache
30 30
31 31 def _parsev1(data):
32 32 # This method does a little bit of excessive-looking
33 33 # precondition checking. This is so that the behavior of this
34 34 # class exactly matches its C counterpart to try and help
35 35 # prevent surprise breakage for anyone that develops against
36 36 # the pure version.
37 37 if data and data[-1:] != '\n':
38 38 raise ValueError('Manifest did not end in a newline.')
39 39 prev = None
40 40 for l in data.splitlines():
41 41 if prev is not None and prev > l:
42 42 raise ValueError('Manifest lines not in sorted order.')
43 43 prev = l
44 44 f, n = l.split('\0')
45 45 if len(n) > 40:
46 46 yield f, bin(n[:40]), n[40:]
47 47 else:
48 48 yield f, bin(n), ''
49 49
50 50 def _parsev2(data):
51 51 metadataend = data.find('\n')
52 52 # Just ignore metadata for now
53 53 pos = metadataend + 1
54 54 prevf = ''
55 55 while pos < len(data):
56 56 end = data.find('\n', pos + 1) # +1 to skip stem length byte
57 57 if end == -1:
58 58 raise ValueError('Manifest ended with incomplete file entry.')
59 59 stemlen = ord(data[pos:pos + 1])
60 60 items = data[pos + 1:end].split('\0')
61 61 f = prevf[:stemlen] + items[0]
62 62 if prevf > f:
63 63 raise ValueError('Manifest entries not in sorted order.')
64 64 fl = items[1]
65 65 # Just ignore metadata (items[2:] for now)
66 66 n = data[end + 1:end + 21]
67 67 yield f, n, fl
68 68 pos = end + 22
69 69 prevf = f
70 70
71 71 def _parse(data):
72 72 """Generates (path, node, flags) tuples from a manifest text"""
73 73 if data.startswith('\0'):
74 74 return iter(_parsev2(data))
75 75 else:
76 76 return iter(_parsev1(data))
77 77
78 78 def _text(it, usemanifestv2):
79 79 """Given an iterator over (path, node, flags) tuples, returns a manifest
80 80 text"""
81 81 if usemanifestv2:
82 82 return _textv2(it)
83 83 else:
84 84 return _textv1(it)
85 85
86 86 def _textv1(it):
87 87 files = []
88 88 lines = []
89 89 _hex = revlog.hex
90 90 for f, n, fl in it:
91 91 files.append(f)
92 92 # if this is changed to support newlines in filenames,
93 93 # be sure to check the templates/ dir again (especially *-raw.tmpl)
94 94 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
95 95
96 96 _checkforbidden(files)
97 97 return ''.join(lines)
98 98
99 99 def _textv2(it):
100 100 files = []
101 101 lines = ['\0\n']
102 102 prevf = ''
103 103 for f, n, fl in it:
104 104 files.append(f)
105 105 stem = os.path.commonprefix([prevf, f])
106 106 stemlen = min(len(stem), 255)
107 107 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
108 108 prevf = f
109 109 _checkforbidden(files)
110 110 return ''.join(lines)
111 111
112 112 class lazymanifestiter(object):
113 113 def __init__(self, lm):
114 114 self.pos = 0
115 115 self.lm = lm
116 116
117 117 def __iter__(self):
118 118 return self
119 119
120 120 def next(self):
121 121 try:
122 122 data, pos = self.lm._get(self.pos)
123 123 except IndexError:
124 124 raise StopIteration
125 125 if pos == -1:
126 126 self.pos += 1
127 127 return data[0]
128 128 self.pos += 1
129 129 zeropos = data.find('\x00', pos)
130 130 return data[pos:zeropos]
131 131
132 132 __next__ = next
133 133
134 134 class lazymanifestiterentries(object):
135 135 def __init__(self, lm):
136 136 self.lm = lm
137 137 self.pos = 0
138 138
139 139 def __iter__(self):
140 140 return self
141 141
142 142 def next(self):
143 143 try:
144 144 data, pos = self.lm._get(self.pos)
145 145 except IndexError:
146 146 raise StopIteration
147 147 if pos == -1:
148 148 self.pos += 1
149 149 return data
150 150 zeropos = data.find('\x00', pos)
151 151 hashval = unhexlify(data, self.lm.extrainfo[self.pos],
152 152 zeropos + 1, 40)
153 153 flags = self.lm._getflags(data, self.pos, zeropos)
154 154 self.pos += 1
155 155 return (data[pos:zeropos], hashval, flags)
156 156
157 157 __next__ = next
158 158
159 159 def unhexlify(data, extra, pos, length):
160 160 s = bin(data[pos:pos + length])
161 161 if extra:
162 162 s += chr(extra & 0xff)
163 163 return s
164 164
165 165 def _cmp(a, b):
166 166 return (a > b) - (a < b)
167 167
168 168 class _lazymanifest(object):
169 169 def __init__(self, data, positions=None, extrainfo=None, extradata=None):
170 170 if positions is None:
171 171 self.positions = self.findlines(data)
172 172 self.extrainfo = [0] * len(self.positions)
173 173 self.data = data
174 174 self.extradata = []
175 175 else:
176 176 self.positions = positions[:]
177 177 self.extrainfo = extrainfo[:]
178 178 self.extradata = extradata[:]
179 179 self.data = data
180 180
181 181 def findlines(self, data):
182 182 if not data:
183 183 return []
184 184 pos = data.find("\n")
185 185 if pos == -1 or data[-1:] != '\n':
186 186 raise ValueError("Manifest did not end in a newline.")
187 187 positions = [0]
188 188 prev = data[:data.find('\x00')]
189 189 while pos < len(data) - 1 and pos != -1:
190 190 positions.append(pos + 1)
191 191 nexts = data[pos + 1:data.find('\x00', pos + 1)]
192 192 if nexts < prev:
193 193 raise ValueError("Manifest lines not in sorted order.")
194 194 prev = nexts
195 195 pos = data.find("\n", pos + 1)
196 196 return positions
197 197
198 198 def _get(self, index):
199 199 # get the position encoded in pos:
200 200 # positive number is an index in 'data'
201 201 # negative number is in extrapieces
202 202 pos = self.positions[index]
203 203 if pos >= 0:
204 204 return self.data, pos
205 205 return self.extradata[-pos - 1], -1
206 206
207 207 def _getkey(self, pos):
208 208 if pos >= 0:
209 209 return self.data[pos:self.data.find('\x00', pos + 1)]
210 210 return self.extradata[-pos - 1][0]
211 211
212 212 def bsearch(self, key):
213 213 first = 0
214 214 last = len(self.positions) - 1
215 215
216 216 while first <= last:
217 217 midpoint = (first + last)//2
218 218 nextpos = self.positions[midpoint]
219 219 candidate = self._getkey(nextpos)
220 220 r = _cmp(key, candidate)
221 221 if r == 0:
222 222 return midpoint
223 223 else:
224 224 if r < 0:
225 225 last = midpoint - 1
226 226 else:
227 227 first = midpoint + 1
228 228 return -1
229 229
230 230 def bsearch2(self, key):
231 231 # same as the above, but will always return the position
232 232 # done for performance reasons
233 233 first = 0
234 234 last = len(self.positions) - 1
235 235
236 236 while first <= last:
237 237 midpoint = (first + last)//2
238 238 nextpos = self.positions[midpoint]
239 239 candidate = self._getkey(nextpos)
240 240 r = _cmp(key, candidate)
241 241 if r == 0:
242 242 return (midpoint, True)
243 243 else:
244 244 if r < 0:
245 245 last = midpoint - 1
246 246 else:
247 247 first = midpoint + 1
248 248 return (first, False)
249 249
250 250 def __contains__(self, key):
251 251 return self.bsearch(key) != -1
252 252
253 253 def _getflags(self, data, needle, pos):
254 254 start = pos + 41
255 255 end = data.find("\n", start)
256 256 if end == -1:
257 257 end = len(data) - 1
258 258 if start == end:
259 259 return ''
260 260 return self.data[start:end]
261 261
262 262 def __getitem__(self, key):
263 263 if not isinstance(key, bytes):
264 264 raise TypeError("getitem: manifest keys must be a bytes.")
265 265 needle = self.bsearch(key)
266 266 if needle == -1:
267 267 raise KeyError
268 268 data, pos = self._get(needle)
269 269 if pos == -1:
270 270 return (data[1], data[2])
271 271 zeropos = data.find('\x00', pos)
272 272 assert 0 <= needle <= len(self.positions)
273 273 assert len(self.extrainfo) == len(self.positions)
274 274 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
275 275 flags = self._getflags(data, needle, zeropos)
276 276 return (hashval, flags)
277 277
278 278 def __delitem__(self, key):
279 279 needle, found = self.bsearch2(key)
280 280 if not found:
281 281 raise KeyError
282 282 cur = self.positions[needle]
283 283 self.positions = self.positions[:needle] + self.positions[needle + 1:]
284 284 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
285 285 if cur >= 0:
286 286 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
287 287
288 288 def __setitem__(self, key, value):
289 289 if not isinstance(key, bytes):
290 290 raise TypeError("setitem: manifest keys must be a byte string.")
291 291 if not isinstance(value, tuple) or len(value) != 2:
292 292 raise TypeError("Manifest values must be a tuple of (node, flags).")
293 293 hashval = value[0]
294 294 if not isinstance(hashval, bytes) or not 20 <= len(hashval) <= 22:
295 295 raise TypeError("node must be a 20-byte byte string")
296 296 flags = value[1]
297 297 if len(hashval) == 22:
298 298 hashval = hashval[:-1]
299 299 if not isinstance(flags, bytes) or len(flags) > 1:
300 300 raise TypeError("flags must a 0 or 1 byte string, got %r", flags)
301 301 needle, found = self.bsearch2(key)
302 302 if found:
303 303 # put the item
304 304 pos = self.positions[needle]
305 305 if pos < 0:
306 306 self.extradata[-pos - 1] = (key, hashval, value[1])
307 307 else:
308 308 # just don't bother
309 309 self.extradata.append((key, hashval, value[1]))
310 310 self.positions[needle] = -len(self.extradata)
311 311 else:
312 312 # not found, put it in with extra positions
313 313 self.extradata.append((key, hashval, value[1]))
314 314 self.positions = (self.positions[:needle] + [-len(self.extradata)]
315 315 + self.positions[needle:])
316 316 self.extrainfo = (self.extrainfo[:needle] + [0] +
317 317 self.extrainfo[needle:])
318 318
319 319 def copy(self):
320 320 # XXX call _compact like in C?
321 321 return _lazymanifest(self.data, self.positions, self.extrainfo,
322 322 self.extradata)
323 323
324 324 def _compact(self):
325 325 # hopefully not called TOO often
326 326 if len(self.extradata) == 0:
327 327 return
328 328 l = []
329 329 last_cut = 0
330 330 i = 0
331 331 offset = 0
332 332 self.extrainfo = [0] * len(self.positions)
333 333 while i < len(self.positions):
334 334 if self.positions[i] >= 0:
335 335 cur = self.positions[i]
336 336 last_cut = cur
337 337 while True:
338 338 self.positions[i] = offset
339 339 i += 1
340 340 if i == len(self.positions) or self.positions[i] < 0:
341 341 break
342 342 offset += self.positions[i] - cur
343 343 cur = self.positions[i]
344 344 end_cut = self.data.find('\n', cur)
345 345 if end_cut != -1:
346 346 end_cut += 1
347 347 offset += end_cut - cur
348 348 l.append(self.data[last_cut:end_cut])
349 349 else:
350 350 while i < len(self.positions) and self.positions[i] < 0:
351 351 cur = self.positions[i]
352 352 t = self.extradata[-cur - 1]
353 353 l.append(self._pack(t))
354 354 self.positions[i] = offset
355 355 if len(t[1]) > 20:
356 356 self.extrainfo[i] = ord(t[1][21])
357 357 offset += len(l[-1])
358 358 i += 1
359 359 self.data = ''.join(l)
360 360 self.extradata = []
361 361
362 362 def _pack(self, d):
363 363 return d[0] + '\x00' + hex(d[1][:20]) + d[2] + '\n'
364 364
365 365 def text(self):
366 366 self._compact()
367 367 return self.data
368 368
369 369 def diff(self, m2, clean=False):
370 370 '''Finds changes between the current manifest and m2.'''
371 371 # XXX think whether efficiency matters here
372 372 diff = {}
373 373
374 374 for fn, e1, flags in self.iterentries():
375 375 if fn not in m2:
376 376 diff[fn] = (e1, flags), (None, '')
377 377 else:
378 378 e2 = m2[fn]
379 379 if (e1, flags) != e2:
380 380 diff[fn] = (e1, flags), e2
381 381 elif clean:
382 382 diff[fn] = None
383 383
384 384 for fn, e2, flags in m2.iterentries():
385 385 if fn not in self:
386 386 diff[fn] = (None, ''), (e2, flags)
387 387
388 388 return diff
389 389
390 390 def iterentries(self):
391 391 return lazymanifestiterentries(self)
392 392
393 393 def iterkeys(self):
394 394 return lazymanifestiter(self)
395 395
396 396 def __iter__(self):
397 397 return lazymanifestiter(self)
398 398
399 399 def __len__(self):
400 400 return len(self.positions)
401 401
402 402 def filtercopy(self, filterfn):
403 403 # XXX should be optimized
404 404 c = _lazymanifest('')
405 405 for f, n, fl in self.iterentries():
406 406 if filterfn(f):
407 407 c[f] = n, fl
408 408 return c
409 409
410 410 try:
411 411 _lazymanifest = parsers.lazymanifest
412 412 except AttributeError:
413 413 pass
414 414
415 415 class manifestdict(object):
416 416 def __init__(self, data=''):
417 417 if data.startswith('\0'):
418 418 #_lazymanifest can not parse v2
419 419 self._lm = _lazymanifest('')
420 420 for f, n, fl in _parsev2(data):
421 421 self._lm[f] = n, fl
422 422 else:
423 423 self._lm = _lazymanifest(data)
424 424
425 425 def __getitem__(self, key):
426 426 return self._lm[key][0]
427 427
428 428 def find(self, key):
429 429 return self._lm[key]
430 430
431 431 def __len__(self):
432 432 return len(self._lm)
433 433
434 434 def __nonzero__(self):
435 435 # nonzero is covered by the __len__ function, but implementing it here
436 436 # makes it easier for extensions to override.
437 437 return len(self._lm) != 0
438 438
439 439 __bool__ = __nonzero__
440 440
441 441 def __setitem__(self, key, node):
442 442 self._lm[key] = node, self.flags(key, '')
443 443
444 444 def __contains__(self, key):
445 445 if key is None:
446 446 return False
447 447 return key in self._lm
448 448
449 449 def __delitem__(self, key):
450 450 del self._lm[key]
451 451
452 452 def __iter__(self):
453 453 return self._lm.__iter__()
454 454
455 455 def iterkeys(self):
456 456 return self._lm.iterkeys()
457 457
458 458 def keys(self):
459 459 return list(self.iterkeys())
460 460
461 461 def filesnotin(self, m2, match=None):
462 462 '''Set of files in this manifest that are not in the other'''
463 463 if match:
464 464 m1 = self.matches(match)
465 465 m2 = m2.matches(match)
466 466 return m1.filesnotin(m2)
467 467 diff = self.diff(m2)
468 468 files = set(filepath
469 469 for filepath, hashflags in diff.iteritems()
470 470 if hashflags[1][0] is None)
471 471 return files
472 472
473 473 @propertycache
474 474 def _dirs(self):
475 475 return util.dirs(self)
476 476
477 477 def dirs(self):
478 478 return self._dirs
479 479
480 480 def hasdir(self, dir):
481 481 return dir in self._dirs
482 482
483 483 def _filesfastpath(self, match):
484 484 '''Checks whether we can correctly and quickly iterate over matcher
485 485 files instead of over manifest files.'''
486 486 files = match.files()
487 487 return (len(files) < 100 and (match.isexact() or
488 488 (match.prefix() and all(fn in self for fn in files))))
489 489
490 490 def walk(self, match):
491 491 '''Generates matching file names.
492 492
493 493 Equivalent to manifest.matches(match).iterkeys(), but without creating
494 494 an entirely new manifest.
495 495
496 496 It also reports nonexistent files by marking them bad with match.bad().
497 497 '''
498 498 if match.always():
499 499 for f in iter(self):
500 500 yield f
501 501 return
502 502
503 503 fset = set(match.files())
504 504
505 505 # avoid the entire walk if we're only looking for specific files
506 506 if self._filesfastpath(match):
507 507 for fn in sorted(fset):
508 508 yield fn
509 509 return
510 510
511 511 for fn in self:
512 512 if fn in fset:
513 513 # specified pattern is the exact name
514 514 fset.remove(fn)
515 515 if match(fn):
516 516 yield fn
517 517
518 518 # for dirstate.walk, files=['.'] means "walk the whole tree".
519 519 # follow that here, too
520 520 fset.discard('.')
521 521
522 522 for fn in sorted(fset):
523 523 if not self.hasdir(fn):
524 524 match.bad(fn, None)
525 525
526 526 def matches(self, match):
527 527 '''generate a new manifest filtered by the match argument'''
528 528 if match.always():
529 529 return self.copy()
530 530
531 531 if self._filesfastpath(match):
532 532 m = manifestdict()
533 533 lm = self._lm
534 534 for fn in match.files():
535 535 if fn in lm:
536 536 m._lm[fn] = lm[fn]
537 537 return m
538 538
539 539 m = manifestdict()
540 540 m._lm = self._lm.filtercopy(match)
541 541 return m
542 542
543 543 def diff(self, m2, match=None, clean=False):
544 544 '''Finds changes between the current manifest and m2.
545 545
546 546 Args:
547 547 m2: the manifest to which this manifest should be compared.
548 548 clean: if true, include files unchanged between these manifests
549 549 with a None value in the returned dictionary.
550 550
551 551 The result is returned as a dict with filename as key and
552 552 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
553 553 nodeid in the current/other manifest and fl1/fl2 is the flag
554 554 in the current/other manifest. Where the file does not exist,
555 555 the nodeid will be None and the flags will be the empty
556 556 string.
557 557 '''
558 558 if match:
559 559 m1 = self.matches(match)
560 560 m2 = m2.matches(match)
561 561 return m1.diff(m2, clean=clean)
562 562 return self._lm.diff(m2._lm, clean)
563 563
564 564 def setflag(self, key, flag):
565 565 self._lm[key] = self[key], flag
566 566
567 567 def get(self, key, default=None):
568 568 try:
569 569 return self._lm[key][0]
570 570 except KeyError:
571 571 return default
572 572
573 573 def flags(self, key, default=''):
574 574 try:
575 575 return self._lm[key][1]
576 576 except KeyError:
577 577 return default
578 578
579 579 def copy(self):
580 580 c = manifestdict()
581 581 c._lm = self._lm.copy()
582 582 return c
583 583
584 584 def items(self):
585 585 return (x[:2] for x in self._lm.iterentries())
586 586
587 587 iteritems = items
588 588
589 589 def iterentries(self):
590 590 return self._lm.iterentries()
591 591
592 592 def text(self, usemanifestv2=False):
593 593 if usemanifestv2:
594 594 return _textv2(self._lm.iterentries())
595 595 else:
596 596 # use (probably) native version for v1
597 597 return self._lm.text()
598 598
599 599 def fastdelta(self, base, changes):
600 600 """Given a base manifest text as a bytearray and a list of changes
601 601 relative to that text, compute a delta that can be used by revlog.
602 602 """
603 603 delta = []
604 604 dstart = None
605 605 dend = None
606 606 dline = [""]
607 607 start = 0
608 608 # zero copy representation of base as a buffer
609 609 addbuf = util.buffer(base)
610 610
611 611 changes = list(changes)
612 612 if len(changes) < 1000:
613 613 # start with a readonly loop that finds the offset of
614 614 # each line and creates the deltas
615 615 for f, todelete in changes:
616 616 # bs will either be the index of the item or the insert point
617 617 start, end = _msearch(addbuf, f, start)
618 618 if not todelete:
619 619 h, fl = self._lm[f]
620 620 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
621 621 else:
622 622 if start == end:
623 623 # item we want to delete was not found, error out
624 624 raise AssertionError(
625 625 _("failed to remove %s from manifest") % f)
626 626 l = ""
627 627 if dstart is not None and dstart <= start and dend >= start:
628 628 if dend < end:
629 629 dend = end
630 630 if l:
631 631 dline.append(l)
632 632 else:
633 633 if dstart is not None:
634 634 delta.append([dstart, dend, "".join(dline)])
635 635 dstart = start
636 636 dend = end
637 637 dline = [l]
638 638
639 639 if dstart is not None:
640 640 delta.append([dstart, dend, "".join(dline)])
641 641 # apply the delta to the base, and get a delta for addrevision
642 642 deltatext, arraytext = _addlistdelta(base, delta)
643 643 else:
644 644 # For large changes, it's much cheaper to just build the text and
645 645 # diff it.
646 646 arraytext = bytearray(self.text())
647 647 deltatext = mdiff.textdiff(
648 648 util.buffer(base), util.buffer(arraytext))
649 649
650 650 return arraytext, deltatext
651 651
652 652 def _msearch(m, s, lo=0, hi=None):
653 653 '''return a tuple (start, end) that says where to find s within m.
654 654
655 655 If the string is found m[start:end] are the line containing
656 656 that string. If start == end the string was not found and
657 657 they indicate the proper sorted insertion point.
658 658
659 659 m should be a buffer, a memoryview or a byte string.
660 660 s is a byte string'''
661 661 def advance(i, c):
662 662 while i < lenm and m[i:i + 1] != c:
663 663 i += 1
664 664 return i
665 665 if not s:
666 666 return (lo, lo)
667 667 lenm = len(m)
668 668 if not hi:
669 669 hi = lenm
670 670 while lo < hi:
671 671 mid = (lo + hi) // 2
672 672 start = mid
673 673 while start > 0 and m[start - 1:start] != '\n':
674 674 start -= 1
675 675 end = advance(start, '\0')
676 676 if bytes(m[start:end]) < s:
677 677 # we know that after the null there are 40 bytes of sha1
678 678 # this translates to the bisect lo = mid + 1
679 679 lo = advance(end + 40, '\n') + 1
680 680 else:
681 681 # this translates to the bisect hi = mid
682 682 hi = start
683 683 end = advance(lo, '\0')
684 684 found = m[lo:end]
685 685 if s == found:
686 686 # we know that after the null there are 40 bytes of sha1
687 687 end = advance(end + 40, '\n')
688 688 return (lo, end + 1)
689 689 else:
690 690 return (lo, lo)
691 691
692 692 def _checkforbidden(l):
693 693 """Check filenames for illegal characters."""
694 694 for f in l:
695 695 if '\n' in f or '\r' in f:
696 696 raise error.RevlogError(
697 697 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
698 698
699 699
700 700 # apply the changes collected during the bisect loop to our addlist
701 701 # return a delta suitable for addrevision
702 702 def _addlistdelta(addlist, x):
703 703 # for large addlist arrays, building a new array is cheaper
704 704 # than repeatedly modifying the existing one
705 705 currentposition = 0
706 706 newaddlist = bytearray()
707 707
708 708 for start, end, content in x:
709 709 newaddlist += addlist[currentposition:start]
710 710 if content:
711 711 newaddlist += bytearray(content)
712 712
713 713 currentposition = end
714 714
715 715 newaddlist += addlist[currentposition:]
716 716
717 717 deltatext = "".join(struct.pack(">lll", start, end, len(content))
718 718 + content for start, end, content in x)
719 719 return deltatext, newaddlist
720 720
721 721 def _splittopdir(f):
722 722 if '/' in f:
723 723 dir, subpath = f.split('/', 1)
724 724 return dir + '/', subpath
725 725 else:
726 726 return '', f
727 727
728 728 _noop = lambda s: None
729 729
730 730 class treemanifest(object):
731 731 def __init__(self, dir='', text=''):
732 732 self._dir = dir
733 733 self._node = revlog.nullid
734 734 self._loadfunc = _noop
735 735 self._copyfunc = _noop
736 736 self._dirty = False
737 737 self._dirs = {}
738 738 # Using _lazymanifest here is a little slower than plain old dicts
739 739 self._files = {}
740 740 self._flags = {}
741 741 if text:
742 742 def readsubtree(subdir, subm):
743 743 raise AssertionError('treemanifest constructor only accepts '
744 744 'flat manifests')
745 745 self.parse(text, readsubtree)
746 746 self._dirty = True # Mark flat manifest dirty after parsing
747 747
748 748 def _subpath(self, path):
749 749 return self._dir + path
750 750
751 751 def __len__(self):
752 752 self._load()
753 753 size = len(self._files)
754 754 for m in self._dirs.values():
755 755 size += m.__len__()
756 756 return size
757 757
758 758 def _isempty(self):
759 759 self._load() # for consistency; already loaded by all callers
760 760 return (not self._files and (not self._dirs or
761 761 all(m._isempty() for m in self._dirs.values())))
762 762
763 763 def __repr__(self):
764 764 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
765 765 (self._dir, revlog.hex(self._node),
766 766 bool(self._loadfunc is _noop),
767 767 self._dirty, id(self)))
768 768
769 769 def dir(self):
770 770 '''The directory that this tree manifest represents, including a
771 771 trailing '/'. Empty string for the repo root directory.'''
772 772 return self._dir
773 773
774 774 def node(self):
775 775 '''This node of this instance. nullid for unsaved instances. Should
776 776 be updated when the instance is read or written from a revlog.
777 777 '''
778 778 assert not self._dirty
779 779 return self._node
780 780
781 781 def setnode(self, node):
782 782 self._node = node
783 783 self._dirty = False
784 784
785 785 def iterentries(self):
786 786 self._load()
787 787 for p, n in sorted(itertools.chain(self._dirs.items(),
788 788 self._files.items())):
789 789 if p in self._files:
790 790 yield self._subpath(p), n, self._flags.get(p, '')
791 791 else:
792 792 for x in n.iterentries():
793 793 yield x
794 794
795 795 def items(self):
796 796 self._load()
797 797 for p, n in sorted(itertools.chain(self._dirs.items(),
798 798 self._files.items())):
799 799 if p in self._files:
800 800 yield self._subpath(p), n
801 801 else:
802 802 for f, sn in n.iteritems():
803 803 yield f, sn
804 804
805 805 iteritems = items
806 806
807 807 def iterkeys(self):
808 808 self._load()
809 809 for p in sorted(itertools.chain(self._dirs, self._files)):
810 810 if p in self._files:
811 811 yield self._subpath(p)
812 812 else:
813 813 for f in self._dirs[p]:
814 814 yield f
815 815
816 816 def keys(self):
817 817 return list(self.iterkeys())
818 818
819 819 def __iter__(self):
820 820 return self.iterkeys()
821 821
822 822 def __contains__(self, f):
823 823 if f is None:
824 824 return False
825 825 self._load()
826 826 dir, subpath = _splittopdir(f)
827 827 if dir:
828 828 if dir not in self._dirs:
829 829 return False
830 830 return self._dirs[dir].__contains__(subpath)
831 831 else:
832 832 return f in self._files
833 833
834 834 def get(self, f, default=None):
835 835 self._load()
836 836 dir, subpath = _splittopdir(f)
837 837 if dir:
838 838 if dir not in self._dirs:
839 839 return default
840 840 return self._dirs[dir].get(subpath, default)
841 841 else:
842 842 return self._files.get(f, default)
843 843
844 844 def __getitem__(self, f):
845 845 self._load()
846 846 dir, subpath = _splittopdir(f)
847 847 if dir:
848 848 return self._dirs[dir].__getitem__(subpath)
849 849 else:
850 850 return self._files[f]
851 851
852 852 def flags(self, f):
853 853 self._load()
854 854 dir, subpath = _splittopdir(f)
855 855 if dir:
856 856 if dir not in self._dirs:
857 857 return ''
858 858 return self._dirs[dir].flags(subpath)
859 859 else:
860 860 if f in self._dirs:
861 861 return ''
862 862 return self._flags.get(f, '')
863 863
864 864 def find(self, f):
865 865 self._load()
866 866 dir, subpath = _splittopdir(f)
867 867 if dir:
868 868 return self._dirs[dir].find(subpath)
869 869 else:
870 870 return self._files[f], self._flags.get(f, '')
871 871
872 872 def __delitem__(self, f):
873 873 self._load()
874 874 dir, subpath = _splittopdir(f)
875 875 if dir:
876 876 self._dirs[dir].__delitem__(subpath)
877 877 # If the directory is now empty, remove it
878 878 if self._dirs[dir]._isempty():
879 879 del self._dirs[dir]
880 880 else:
881 881 del self._files[f]
882 882 if f in self._flags:
883 883 del self._flags[f]
884 884 self._dirty = True
885 885
886 886 def __setitem__(self, f, n):
887 887 assert n is not None
888 888 self._load()
889 889 dir, subpath = _splittopdir(f)
890 890 if dir:
891 891 if dir not in self._dirs:
892 892 self._dirs[dir] = treemanifest(self._subpath(dir))
893 893 self._dirs[dir].__setitem__(subpath, n)
894 894 else:
895 895 self._files[f] = n[:21] # to match manifestdict's behavior
896 896 self._dirty = True
897 897
898 898 def _load(self):
899 899 if self._loadfunc is not _noop:
900 900 lf, self._loadfunc = self._loadfunc, _noop
901 901 lf(self)
902 902 elif self._copyfunc is not _noop:
903 903 cf, self._copyfunc = self._copyfunc, _noop
904 904 cf(self)
905 905
906 906 def setflag(self, f, flags):
907 907 """Set the flags (symlink, executable) for path f."""
908 908 self._load()
909 909 dir, subpath = _splittopdir(f)
910 910 if dir:
911 911 if dir not in self._dirs:
912 912 self._dirs[dir] = treemanifest(self._subpath(dir))
913 913 self._dirs[dir].setflag(subpath, flags)
914 914 else:
915 915 self._flags[f] = flags
916 916 self._dirty = True
917 917
918 918 def copy(self):
919 919 copy = treemanifest(self._dir)
920 920 copy._node = self._node
921 921 copy._dirty = self._dirty
922 922 if self._copyfunc is _noop:
923 923 def _copyfunc(s):
924 924 self._load()
925 925 for d in self._dirs:
926 926 s._dirs[d] = self._dirs[d].copy()
927 927 s._files = dict.copy(self._files)
928 928 s._flags = dict.copy(self._flags)
929 929 if self._loadfunc is _noop:
930 930 _copyfunc(copy)
931 931 else:
932 932 copy._copyfunc = _copyfunc
933 933 else:
934 934 copy._copyfunc = self._copyfunc
935 935 return copy
936 936
937 937 def filesnotin(self, m2, match=None):
938 938 '''Set of files in this manifest that are not in the other'''
939 939 if match:
940 940 m1 = self.matches(match)
941 941 m2 = m2.matches(match)
942 942 return m1.filesnotin(m2)
943 943
944 944 files = set()
945 945 def _filesnotin(t1, t2):
946 946 if t1._node == t2._node and not t1._dirty and not t2._dirty:
947 947 return
948 948 t1._load()
949 949 t2._load()
950 950 for d, m1 in t1._dirs.iteritems():
951 951 if d in t2._dirs:
952 952 m2 = t2._dirs[d]
953 953 _filesnotin(m1, m2)
954 954 else:
955 955 files.update(m1.iterkeys())
956 956
957 957 for fn in t1._files.iterkeys():
958 958 if fn not in t2._files:
959 959 files.add(t1._subpath(fn))
960 960
961 961 _filesnotin(self, m2)
962 962 return files
963 963
964 964 @propertycache
965 965 def _alldirs(self):
966 966 return util.dirs(self)
967 967
968 968 def dirs(self):
969 969 return self._alldirs
970 970
971 971 def hasdir(self, dir):
972 972 self._load()
973 973 topdir, subdir = _splittopdir(dir)
974 974 if topdir:
975 975 if topdir in self._dirs:
976 976 return self._dirs[topdir].hasdir(subdir)
977 977 return False
978 978 return (dir + '/') in self._dirs
979 979
980 980 def walk(self, match):
981 981 '''Generates matching file names.
982 982
983 983 Equivalent to manifest.matches(match).iterkeys(), but without creating
984 984 an entirely new manifest.
985 985
986 986 It also reports nonexistent files by marking them bad with match.bad().
987 987 '''
988 988 if match.always():
989 989 for f in iter(self):
990 990 yield f
991 991 return
992 992
993 993 fset = set(match.files())
994 994
995 995 for fn in self._walk(match):
996 996 if fn in fset:
997 997 # specified pattern is the exact name
998 998 fset.remove(fn)
999 999 yield fn
1000 1000
1001 1001 # for dirstate.walk, files=['.'] means "walk the whole tree".
1002 1002 # follow that here, too
1003 1003 fset.discard('.')
1004 1004
1005 1005 for fn in sorted(fset):
1006 1006 if not self.hasdir(fn):
1007 1007 match.bad(fn, None)
1008 1008
1009 1009 def _walk(self, match):
1010 1010 '''Recursively generates matching file names for walk().'''
1011 1011 if not match.visitdir(self._dir[:-1] or '.'):
1012 1012 return
1013 1013
1014 1014 # yield this dir's files and walk its submanifests
1015 1015 self._load()
1016 1016 for p in sorted(self._dirs.keys() + self._files.keys()):
1017 1017 if p in self._files:
1018 1018 fullp = self._subpath(p)
1019 1019 if match(fullp):
1020 1020 yield fullp
1021 1021 else:
1022 1022 for f in self._dirs[p]._walk(match):
1023 1023 yield f
1024 1024
1025 1025 def matches(self, match):
1026 1026 '''generate a new manifest filtered by the match argument'''
1027 1027 if match.always():
1028 1028 return self.copy()
1029 1029
1030 1030 return self._matches(match)
1031 1031
1032 1032 def _matches(self, match):
1033 1033 '''recursively generate a new manifest filtered by the match argument.
1034 1034 '''
1035 1035
1036 1036 visit = match.visitdir(self._dir[:-1] or '.')
1037 1037 if visit == 'all':
1038 1038 return self.copy()
1039 1039 ret = treemanifest(self._dir)
1040 1040 if not visit:
1041 1041 return ret
1042 1042
1043 1043 self._load()
1044 1044 for fn in self._files:
1045 1045 fullp = self._subpath(fn)
1046 1046 if not match(fullp):
1047 1047 continue
1048 1048 ret._files[fn] = self._files[fn]
1049 1049 if fn in self._flags:
1050 1050 ret._flags[fn] = self._flags[fn]
1051 1051
1052 1052 for dir, subm in self._dirs.iteritems():
1053 1053 m = subm._matches(match)
1054 1054 if not m._isempty():
1055 1055 ret._dirs[dir] = m
1056 1056
1057 1057 if not ret._isempty():
1058 1058 ret._dirty = True
1059 1059 return ret
1060 1060
1061 1061 def diff(self, m2, match=None, clean=False):
1062 1062 '''Finds changes between the current manifest and m2.
1063 1063
1064 1064 Args:
1065 1065 m2: the manifest to which this manifest should be compared.
1066 1066 clean: if true, include files unchanged between these manifests
1067 1067 with a None value in the returned dictionary.
1068 1068
1069 1069 The result is returned as a dict with filename as key and
1070 1070 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1071 1071 nodeid in the current/other manifest and fl1/fl2 is the flag
1072 1072 in the current/other manifest. Where the file does not exist,
1073 1073 the nodeid will be None and the flags will be the empty
1074 1074 string.
1075 1075 '''
1076 1076 if match:
1077 1077 m1 = self.matches(match)
1078 1078 m2 = m2.matches(match)
1079 1079 return m1.diff(m2, clean=clean)
1080 1080 result = {}
1081 1081 emptytree = treemanifest()
1082 1082 def _diff(t1, t2):
1083 1083 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1084 1084 return
1085 1085 t1._load()
1086 1086 t2._load()
1087 1087 for d, m1 in t1._dirs.iteritems():
1088 1088 m2 = t2._dirs.get(d, emptytree)
1089 1089 _diff(m1, m2)
1090 1090
1091 1091 for d, m2 in t2._dirs.iteritems():
1092 1092 if d not in t1._dirs:
1093 1093 _diff(emptytree, m2)
1094 1094
1095 1095 for fn, n1 in t1._files.iteritems():
1096 1096 fl1 = t1._flags.get(fn, '')
1097 1097 n2 = t2._files.get(fn, None)
1098 1098 fl2 = t2._flags.get(fn, '')
1099 1099 if n1 != n2 or fl1 != fl2:
1100 1100 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1101 1101 elif clean:
1102 1102 result[t1._subpath(fn)] = None
1103 1103
1104 1104 for fn, n2 in t2._files.iteritems():
1105 1105 if fn not in t1._files:
1106 1106 fl2 = t2._flags.get(fn, '')
1107 1107 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
1108 1108
1109 1109 _diff(self, m2)
1110 1110 return result
1111 1111
1112 1112 def unmodifiedsince(self, m2):
1113 1113 return not self._dirty and not m2._dirty and self._node == m2._node
1114 1114
1115 1115 def parse(self, text, readsubtree):
1116 1116 for f, n, fl in _parse(text):
1117 1117 if fl == 't':
1118 1118 f = f + '/'
1119 1119 self._dirs[f] = readsubtree(self._subpath(f), n)
1120 1120 elif '/' in f:
1121 1121 # This is a flat manifest, so use __setitem__ and setflag rather
1122 1122 # than assigning directly to _files and _flags, so we can
1123 1123 # assign a path in a subdirectory, and to mark dirty (compared
1124 1124 # to nullid).
1125 1125 self[f] = n
1126 1126 if fl:
1127 1127 self.setflag(f, fl)
1128 1128 else:
1129 1129 # Assigning to _files and _flags avoids marking as dirty,
1130 1130 # and should be a little faster.
1131 1131 self._files[f] = n
1132 1132 if fl:
1133 1133 self._flags[f] = fl
1134 1134
1135 1135 def text(self, usemanifestv2=False):
1136 1136 """Get the full data of this manifest as a bytestring."""
1137 1137 self._load()
1138 1138 return _text(self.iterentries(), usemanifestv2)
1139 1139
1140 1140 def dirtext(self, usemanifestv2=False):
1141 1141 """Get the full data of this directory as a bytestring. Make sure that
1142 1142 any submanifests have been written first, so their nodeids are correct.
1143 1143 """
1144 1144 self._load()
1145 1145 flags = self.flags
1146 1146 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
1147 1147 files = [(f, self._files[f], flags(f)) for f in self._files]
1148 1148 return _text(sorted(dirs + files), usemanifestv2)
1149 1149
1150 1150 def read(self, gettext, readsubtree):
1151 1151 def _load_for_read(s):
1152 1152 s.parse(gettext(), readsubtree)
1153 1153 s._dirty = False
1154 1154 self._loadfunc = _load_for_read
1155 1155
1156 1156 def writesubtrees(self, m1, m2, writesubtree):
1157 1157 self._load() # for consistency; should never have any effect here
1158 1158 m1._load()
1159 1159 m2._load()
1160 1160 emptytree = treemanifest()
1161 1161 for d, subm in self._dirs.iteritems():
1162 1162 subp1 = m1._dirs.get(d, emptytree)._node
1163 1163 subp2 = m2._dirs.get(d, emptytree)._node
1164 1164 if subp1 == revlog.nullid:
1165 1165 subp1, subp2 = subp2, subp1
1166 1166 writesubtree(subm, subp1, subp2)
1167 1167
1168 1168 def walksubtrees(self, matcher=None):
1169 1169 """Returns an iterator of the subtrees of this manifest, including this
1170 1170 manifest itself.
1171 1171
1172 1172 If `matcher` is provided, it only returns subtrees that match.
1173 1173 """
1174 1174 if matcher and not matcher.visitdir(self._dir[:-1] or '.'):
1175 1175 return
1176 1176 if not matcher or matcher(self._dir[:-1]):
1177 1177 yield self
1178 1178
1179 1179 self._load()
1180 1180 for d, subm in self._dirs.iteritems():
1181 1181 for subtree in subm.walksubtrees(matcher=matcher):
1182 1182 yield subtree
1183 1183
1184 1184 class manifestrevlog(revlog.revlog):
1185 1185 '''A revlog that stores manifest texts. This is responsible for caching the
1186 1186 full-text manifest contents.
1187 1187 '''
1188 1188 def __init__(self, opener, dir='', dirlogcache=None, indexfile=None,
1189 1189 treemanifest=False):
1190 1190 """Constructs a new manifest revlog
1191 1191
1192 1192 `indexfile` - used by extensions to have two manifests at once, like
1193 1193 when transitioning between flatmanifeset and treemanifests.
1194 1194
1195 1195 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1196 1196 options can also be used to make this a tree manifest revlog. The opener
1197 1197 option takes precedence, so if it is set to True, we ignore whatever
1198 1198 value is passed in to the constructor.
1199 1199 """
1200 1200 # During normal operations, we expect to deal with not more than four
1201 1201 # revs at a time (such as during commit --amend). When rebasing large
1202 1202 # stacks of commits, the number can go up, hence the config knob below.
1203 1203 cachesize = 4
1204 1204 optiontreemanifest = False
1205 1205 usemanifestv2 = False
1206 1206 opts = getattr(opener, 'options', None)
1207 1207 if opts is not None:
1208 1208 cachesize = opts.get('manifestcachesize', cachesize)
1209 1209 optiontreemanifest = opts.get('treemanifest', False)
1210 1210 usemanifestv2 = opts.get('manifestv2', usemanifestv2)
1211 1211
1212 1212 self._treeondisk = optiontreemanifest or treemanifest
1213 1213 self._usemanifestv2 = usemanifestv2
1214 1214
1215 1215 self._fulltextcache = util.lrucachedict(cachesize)
1216 1216
1217 1217 if dir:
1218 1218 assert self._treeondisk, 'opts is %r' % opts
1219 1219 if not dir.endswith('/'):
1220 1220 dir = dir + '/'
1221 1221
1222 1222 if indexfile is None:
1223 1223 indexfile = '00manifest.i'
1224 1224 if dir:
1225 1225 indexfile = "meta/" + dir + indexfile
1226 1226
1227 1227 self._dir = dir
1228 1228 # The dirlogcache is kept on the root manifest log
1229 1229 if dir:
1230 1230 self._dirlogcache = dirlogcache
1231 1231 else:
1232 1232 self._dirlogcache = {'': self}
1233 1233
1234 1234 super(manifestrevlog, self).__init__(opener, indexfile,
1235 1235 # only root indexfile is cached
1236 1236 checkambig=not bool(dir),
1237 1237 mmaplargeindex=True)
1238 1238
1239 1239 @property
1240 1240 def fulltextcache(self):
1241 1241 return self._fulltextcache
1242 1242
1243 1243 def clearcaches(self):
1244 1244 super(manifestrevlog, self).clearcaches()
1245 1245 self._fulltextcache.clear()
1246 1246 self._dirlogcache = {'': self}
1247 1247
1248 def dirlog(self, dir):
1249 if dir:
1248 def dirlog(self, d):
1249 if d:
1250 1250 assert self._treeondisk
1251 if dir not in self._dirlogcache:
1252 mfrevlog = manifestrevlog(self.opener, dir,
1251 if d not in self._dirlogcache:
1252 mfrevlog = manifestrevlog(self.opener, d,
1253 1253 self._dirlogcache,
1254 1254 treemanifest=self._treeondisk)
1255 self._dirlogcache[dir] = mfrevlog
1256 return self._dirlogcache[dir]
1255 self._dirlogcache[d] = mfrevlog
1256 return self._dirlogcache[d]
1257 1257
1258 1258 def add(self, m, transaction, link, p1, p2, added, removed, readtree=None):
1259 1259 if (p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta')
1260 1260 and not self._usemanifestv2):
1261 1261 # If our first parent is in the manifest cache, we can
1262 1262 # compute a delta here using properties we know about the
1263 1263 # manifest up-front, which may save time later for the
1264 1264 # revlog layer.
1265 1265
1266 1266 _checkforbidden(added)
1267 1267 # combine the changed lists into one sorted iterator
1268 1268 work = heapq.merge([(x, False) for x in added],
1269 1269 [(x, True) for x in removed])
1270 1270
1271 1271 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1272 1272 cachedelta = self.rev(p1), deltatext
1273 1273 text = util.buffer(arraytext)
1274 1274 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
1275 1275 else:
1276 1276 # The first parent manifest isn't already loaded, so we'll
1277 1277 # just encode a fulltext of the manifest and pass that
1278 1278 # through to the revlog layer, and let it handle the delta
1279 1279 # process.
1280 1280 if self._treeondisk:
1281 1281 assert readtree, "readtree must be set for treemanifest writes"
1282 1282 m1 = readtree(self._dir, p1)
1283 1283 m2 = readtree(self._dir, p2)
1284 1284 n = self._addtree(m, transaction, link, m1, m2, readtree)
1285 1285 arraytext = None
1286 1286 else:
1287 1287 text = m.text(self._usemanifestv2)
1288 1288 n = self.addrevision(text, transaction, link, p1, p2)
1289 1289 arraytext = bytearray(text)
1290 1290
1291 1291 if arraytext is not None:
1292 1292 self.fulltextcache[n] = arraytext
1293 1293
1294 1294 return n
1295 1295
1296 1296 def _addtree(self, m, transaction, link, m1, m2, readtree):
1297 1297 # If the manifest is unchanged compared to one parent,
1298 1298 # don't write a new revision
1299 1299 if self._dir != '' and (m.unmodifiedsince(m1) or m.unmodifiedsince(m2)):
1300 1300 return m.node()
1301 1301 def writesubtree(subm, subp1, subp2):
1302 1302 sublog = self.dirlog(subm.dir())
1303 1303 sublog.add(subm, transaction, link, subp1, subp2, None, None,
1304 1304 readtree=readtree)
1305 1305 m.writesubtrees(m1, m2, writesubtree)
1306 1306 text = m.dirtext(self._usemanifestv2)
1307 1307 n = None
1308 1308 if self._dir != '':
1309 1309 # Double-check whether contents are unchanged to one parent
1310 1310 if text == m1.dirtext(self._usemanifestv2):
1311 1311 n = m1.node()
1312 1312 elif text == m2.dirtext(self._usemanifestv2):
1313 1313 n = m2.node()
1314 1314
1315 1315 if not n:
1316 1316 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
1317 1317
1318 1318 # Save nodeid so parent manifest can calculate its nodeid
1319 1319 m.setnode(n)
1320 1320 return n
1321 1321
1322 1322 class manifestlog(object):
1323 1323 """A collection class representing the collection of manifest snapshots
1324 1324 referenced by commits in the repository.
1325 1325
1326 1326 In this situation, 'manifest' refers to the abstract concept of a snapshot
1327 1327 of the list of files in the given commit. Consumers of the output of this
1328 1328 class do not care about the implementation details of the actual manifests
1329 1329 they receive (i.e. tree or flat or lazily loaded, etc)."""
1330 1330 def __init__(self, opener, repo):
1331 1331 usetreemanifest = False
1332 1332 cachesize = 4
1333 1333
1334 1334 opts = getattr(opener, 'options', None)
1335 1335 if opts is not None:
1336 1336 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1337 1337 cachesize = opts.get('manifestcachesize', cachesize)
1338 1338 self._treeinmem = usetreemanifest
1339 1339
1340 1340 self._revlog = repo._constructmanifest()
1341 1341
1342 1342 # A cache of the manifestctx or treemanifestctx for each directory
1343 1343 self._dirmancache = {}
1344 1344 self._dirmancache[''] = util.lrucachedict(cachesize)
1345 1345
1346 1346 self.cachesize = cachesize
1347 1347
1348 1348 def __getitem__(self, node):
1349 1349 """Retrieves the manifest instance for the given node. Throws a
1350 1350 LookupError if not found.
1351 1351 """
1352 1352 return self.get('', node)
1353 1353
1354 1354 def get(self, dir, node, verify=True):
1355 1355 """Retrieves the manifest instance for the given node. Throws a
1356 1356 LookupError if not found.
1357 1357
1358 1358 `verify` - if True an exception will be thrown if the node is not in
1359 1359 the revlog
1360 1360 """
1361 1361 if node in self._dirmancache.get(dir, ()):
1362 1362 return self._dirmancache[dir][node]
1363 1363
1364 1364 if dir:
1365 1365 if self._revlog._treeondisk:
1366 1366 if verify:
1367 1367 dirlog = self._revlog.dirlog(dir)
1368 1368 if node not in dirlog.nodemap:
1369 1369 raise LookupError(node, dirlog.indexfile,
1370 1370 _('no node'))
1371 1371 m = treemanifestctx(self, dir, node)
1372 1372 else:
1373 1373 raise error.Abort(
1374 1374 _("cannot ask for manifest directory '%s' in a flat "
1375 1375 "manifest") % dir)
1376 1376 else:
1377 1377 if verify:
1378 1378 if node not in self._revlog.nodemap:
1379 1379 raise LookupError(node, self._revlog.indexfile,
1380 1380 _('no node'))
1381 1381 if self._treeinmem:
1382 1382 m = treemanifestctx(self, '', node)
1383 1383 else:
1384 1384 m = manifestctx(self, node)
1385 1385
1386 1386 if node != revlog.nullid:
1387 1387 mancache = self._dirmancache.get(dir)
1388 1388 if not mancache:
1389 1389 mancache = util.lrucachedict(self.cachesize)
1390 1390 self._dirmancache[dir] = mancache
1391 1391 mancache[node] = m
1392 1392 return m
1393 1393
1394 1394 def clearcaches(self):
1395 1395 self._dirmancache.clear()
1396 1396 self._revlog.clearcaches()
1397 1397
1398 1398 class memmanifestctx(object):
1399 1399 def __init__(self, manifestlog):
1400 1400 self._manifestlog = manifestlog
1401 1401 self._manifestdict = manifestdict()
1402 1402
1403 1403 def _revlog(self):
1404 1404 return self._manifestlog._revlog
1405 1405
1406 1406 def new(self):
1407 1407 return memmanifestctx(self._manifestlog)
1408 1408
1409 1409 def copy(self):
1410 1410 memmf = memmanifestctx(self._manifestlog)
1411 1411 memmf._manifestdict = self.read().copy()
1412 1412 return memmf
1413 1413
1414 1414 def read(self):
1415 1415 return self._manifestdict
1416 1416
1417 1417 def write(self, transaction, link, p1, p2, added, removed):
1418 1418 return self._revlog().add(self._manifestdict, transaction, link, p1, p2,
1419 1419 added, removed)
1420 1420
1421 1421 class manifestctx(object):
1422 1422 """A class representing a single revision of a manifest, including its
1423 1423 contents, its parent revs, and its linkrev.
1424 1424 """
1425 1425 def __init__(self, manifestlog, node):
1426 1426 self._manifestlog = manifestlog
1427 1427 self._data = None
1428 1428
1429 1429 self._node = node
1430 1430
1431 1431 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1432 1432 # but let's add it later when something needs it and we can load it
1433 1433 # lazily.
1434 1434 #self.p1, self.p2 = revlog.parents(node)
1435 1435 #rev = revlog.rev(node)
1436 1436 #self.linkrev = revlog.linkrev(rev)
1437 1437
1438 1438 def _revlog(self):
1439 1439 return self._manifestlog._revlog
1440 1440
1441 1441 def node(self):
1442 1442 return self._node
1443 1443
1444 1444 def new(self):
1445 1445 return memmanifestctx(self._manifestlog)
1446 1446
1447 1447 def copy(self):
1448 1448 memmf = memmanifestctx(self._manifestlog)
1449 1449 memmf._manifestdict = self.read().copy()
1450 1450 return memmf
1451 1451
1452 1452 @propertycache
1453 1453 def parents(self):
1454 1454 return self._revlog().parents(self._node)
1455 1455
1456 1456 def read(self):
1457 1457 if self._data is None:
1458 1458 if self._node == revlog.nullid:
1459 1459 self._data = manifestdict()
1460 1460 else:
1461 1461 rl = self._revlog()
1462 1462 text = rl.revision(self._node)
1463 1463 arraytext = bytearray(text)
1464 1464 rl._fulltextcache[self._node] = arraytext
1465 1465 self._data = manifestdict(text)
1466 1466 return self._data
1467 1467
1468 1468 def readfast(self, shallow=False):
1469 1469 '''Calls either readdelta or read, based on which would be less work.
1470 1470 readdelta is called if the delta is against the p1, and therefore can be
1471 1471 read quickly.
1472 1472
1473 1473 If `shallow` is True, nothing changes since this is a flat manifest.
1474 1474 '''
1475 1475 rl = self._revlog()
1476 1476 r = rl.rev(self._node)
1477 1477 deltaparent = rl.deltaparent(r)
1478 1478 if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
1479 1479 return self.readdelta()
1480 1480 return self.read()
1481 1481
1482 1482 def readdelta(self, shallow=False):
1483 1483 '''Returns a manifest containing just the entries that are present
1484 1484 in this manifest, but not in its p1 manifest. This is efficient to read
1485 1485 if the revlog delta is already p1.
1486 1486
1487 1487 Changing the value of `shallow` has no effect on flat manifests.
1488 1488 '''
1489 1489 revlog = self._revlog()
1490 1490 if revlog._usemanifestv2:
1491 1491 # Need to perform a slow delta
1492 1492 r0 = revlog.deltaparent(revlog.rev(self._node))
1493 1493 m0 = self._manifestlog[revlog.node(r0)].read()
1494 1494 m1 = self.read()
1495 1495 md = manifestdict()
1496 1496 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1497 1497 if n1:
1498 1498 md[f] = n1
1499 1499 if fl1:
1500 1500 md.setflag(f, fl1)
1501 1501 return md
1502 1502
1503 1503 r = revlog.rev(self._node)
1504 1504 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1505 1505 return manifestdict(d)
1506 1506
1507 1507 def find(self, key):
1508 1508 return self.read().find(key)
1509 1509
1510 1510 class memtreemanifestctx(object):
1511 1511 def __init__(self, manifestlog, dir=''):
1512 1512 self._manifestlog = manifestlog
1513 1513 self._dir = dir
1514 1514 self._treemanifest = treemanifest()
1515 1515
1516 1516 def _revlog(self):
1517 1517 return self._manifestlog._revlog
1518 1518
1519 1519 def new(self, dir=''):
1520 1520 return memtreemanifestctx(self._manifestlog, dir=dir)
1521 1521
1522 1522 def copy(self):
1523 1523 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1524 1524 memmf._treemanifest = self._treemanifest.copy()
1525 1525 return memmf
1526 1526
1527 1527 def read(self):
1528 1528 return self._treemanifest
1529 1529
1530 1530 def write(self, transaction, link, p1, p2, added, removed):
1531 1531 def readtree(dir, node):
1532 1532 return self._manifestlog.get(dir, node).read()
1533 1533 return self._revlog().add(self._treemanifest, transaction, link, p1, p2,
1534 1534 added, removed, readtree=readtree)
1535 1535
1536 1536 class treemanifestctx(object):
1537 1537 def __init__(self, manifestlog, dir, node):
1538 1538 self._manifestlog = manifestlog
1539 1539 self._dir = dir
1540 1540 self._data = None
1541 1541
1542 1542 self._node = node
1543 1543
1544 1544 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
1545 1545 # we can instantiate treemanifestctx objects for directories we don't
1546 1546 # have on disk.
1547 1547 #self.p1, self.p2 = revlog.parents(node)
1548 1548 #rev = revlog.rev(node)
1549 1549 #self.linkrev = revlog.linkrev(rev)
1550 1550
1551 1551 def _revlog(self):
1552 1552 return self._manifestlog._revlog.dirlog(self._dir)
1553 1553
1554 1554 def read(self):
1555 1555 if self._data is None:
1556 1556 rl = self._revlog()
1557 1557 if self._node == revlog.nullid:
1558 1558 self._data = treemanifest()
1559 1559 elif rl._treeondisk:
1560 1560 m = treemanifest(dir=self._dir)
1561 1561 def gettext():
1562 1562 return rl.revision(self._node)
1563 1563 def readsubtree(dir, subm):
1564 1564 # Set verify to False since we need to be able to create
1565 1565 # subtrees for trees that don't exist on disk.
1566 1566 return self._manifestlog.get(dir, subm, verify=False).read()
1567 1567 m.read(gettext, readsubtree)
1568 1568 m.setnode(self._node)
1569 1569 self._data = m
1570 1570 else:
1571 1571 text = rl.revision(self._node)
1572 1572 arraytext = bytearray(text)
1573 1573 rl.fulltextcache[self._node] = arraytext
1574 1574 self._data = treemanifest(dir=self._dir, text=text)
1575 1575
1576 1576 return self._data
1577 1577
1578 1578 def node(self):
1579 1579 return self._node
1580 1580
1581 1581 def new(self, dir=''):
1582 1582 return memtreemanifestctx(self._manifestlog, dir=dir)
1583 1583
1584 1584 def copy(self):
1585 1585 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1586 1586 memmf._treemanifest = self.read().copy()
1587 1587 return memmf
1588 1588
1589 1589 @propertycache
1590 1590 def parents(self):
1591 1591 return self._revlog().parents(self._node)
1592 1592
1593 1593 def readdelta(self, shallow=False):
1594 1594 '''Returns a manifest containing just the entries that are present
1595 1595 in this manifest, but not in its p1 manifest. This is efficient to read
1596 1596 if the revlog delta is already p1.
1597 1597
1598 1598 If `shallow` is True, this will read the delta for this directory,
1599 1599 without recursively reading subdirectory manifests. Instead, any
1600 1600 subdirectory entry will be reported as it appears in the manifest, i.e.
1601 1601 the subdirectory will be reported among files and distinguished only by
1602 1602 its 't' flag.
1603 1603 '''
1604 1604 revlog = self._revlog()
1605 1605 if shallow and not revlog._usemanifestv2:
1606 1606 r = revlog.rev(self._node)
1607 1607 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1608 1608 return manifestdict(d)
1609 1609 else:
1610 1610 # Need to perform a slow delta
1611 1611 r0 = revlog.deltaparent(revlog.rev(self._node))
1612 1612 m0 = self._manifestlog.get(self._dir, revlog.node(r0)).read()
1613 1613 m1 = self.read()
1614 1614 md = treemanifest(dir=self._dir)
1615 1615 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1616 1616 if n1:
1617 1617 md[f] = n1
1618 1618 if fl1:
1619 1619 md.setflag(f, fl1)
1620 1620 return md
1621 1621
1622 1622 def readfast(self, shallow=False):
1623 1623 '''Calls either readdelta or read, based on which would be less work.
1624 1624 readdelta is called if the delta is against the p1, and therefore can be
1625 1625 read quickly.
1626 1626
1627 1627 If `shallow` is True, it only returns the entries from this manifest,
1628 1628 and not any submanifests.
1629 1629 '''
1630 1630 rl = self._revlog()
1631 1631 r = rl.rev(self._node)
1632 1632 deltaparent = rl.deltaparent(r)
1633 1633 if (deltaparent != revlog.nullrev and
1634 1634 deltaparent in rl.parentrevs(r)):
1635 1635 return self.readdelta(shallow=shallow)
1636 1636
1637 1637 if shallow:
1638 1638 return manifestdict(rl.revision(self._node))
1639 1639 else:
1640 1640 return self.read()
1641 1641
1642 1642 def find(self, key):
1643 1643 return self.read().find(key)
General Comments 0
You need to be logged in to leave comments. Login now