##// END OF EJS Templates
manifest: drop the CamelCase name for `manifest.manifestlog`...
Matt Harbison -
r52963:6641a3ae default
parent child Browse files
Show More
@@ -1,2780 +1,2774
1 # manifest.py - manifest revision class for mercurial
1 # manifest.py - manifest revision class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import annotations
8 from __future__ import annotations
9
9
10 import heapq
10 import heapq
11 import itertools
11 import itertools
12 import struct
12 import struct
13 import typing
13 import typing
14 import weakref
14 import weakref
15
15
16 from typing import (
16 from typing import (
17 ByteString,
17 ByteString,
18 Callable,
18 Callable,
19 Collection,
19 Collection,
20 Dict,
20 Dict,
21 Iterable,
21 Iterable,
22 Iterator,
22 Iterator,
23 List,
23 List,
24 Optional,
24 Optional,
25 Set,
25 Set,
26 Tuple,
26 Tuple,
27 Union,
27 Union,
28 cast,
28 cast,
29 )
29 )
30
30
31 from .i18n import _
31 from .i18n import _
32 from .node import (
32 from .node import (
33 bin,
33 bin,
34 hex,
34 hex,
35 nullrev,
35 nullrev,
36 )
36 )
37 from . import (
37 from . import (
38 encoding,
38 encoding,
39 error,
39 error,
40 match as matchmod,
40 match as matchmod,
41 mdiff,
41 mdiff,
42 pathutil,
42 pathutil,
43 policy,
43 policy,
44 pycompat,
44 pycompat,
45 revlog,
45 revlog,
46 util,
46 util,
47 )
47 )
48 from .interfaces import (
48 from .interfaces import (
49 repository,
49 repository,
50 util as interfaceutil,
50 util as interfaceutil,
51 )
51 )
52 from .revlogutils import (
52 from .revlogutils import (
53 constants as revlog_constants,
53 constants as revlog_constants,
54 )
54 )
55
55
56 parsers = policy.importmod('parsers')
56 parsers = policy.importmod('parsers')
57 propertycache = util.propertycache
57 propertycache = util.propertycache
58
58
59 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
59 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
60 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
60 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
61
61
62
62
63 def _parse(nodelen, data: bytes):
63 def _parse(nodelen, data: bytes):
64 # This method does a little bit of excessive-looking
64 # This method does a little bit of excessive-looking
65 # precondition checking. This is so that the behavior of this
65 # precondition checking. This is so that the behavior of this
66 # class exactly matches its C counterpart to try and help
66 # class exactly matches its C counterpart to try and help
67 # prevent surprise breakage for anyone that develops against
67 # prevent surprise breakage for anyone that develops against
68 # the pure version.
68 # the pure version.
69 if data and data[-1:] != b'\n':
69 if data and data[-1:] != b'\n':
70 raise ValueError(b'Manifest did not end in a newline.')
70 raise ValueError(b'Manifest did not end in a newline.')
71 prev = None
71 prev = None
72 for l in data.splitlines():
72 for l in data.splitlines():
73 if prev is not None and prev > l:
73 if prev is not None and prev > l:
74 raise ValueError(b'Manifest lines not in sorted order.')
74 raise ValueError(b'Manifest lines not in sorted order.')
75 prev = l
75 prev = l
76 f, n = l.split(b'\0')
76 f, n = l.split(b'\0')
77 nl = len(n)
77 nl = len(n)
78 flags = n[-1:]
78 flags = n[-1:]
79 if flags in _manifestflags:
79 if flags in _manifestflags:
80 n = n[:-1]
80 n = n[:-1]
81 nl -= 1
81 nl -= 1
82 else:
82 else:
83 flags = b''
83 flags = b''
84 if nl != 2 * nodelen:
84 if nl != 2 * nodelen:
85 raise ValueError(b'Invalid manifest line')
85 raise ValueError(b'Invalid manifest line')
86
86
87 yield f, bin(n), flags
87 yield f, bin(n), flags
88
88
89
89
90 def _text(it):
90 def _text(it):
91 files = []
91 files = []
92 lines = []
92 lines = []
93 for f, n, fl in it:
93 for f, n, fl in it:
94 files.append(f)
94 files.append(f)
95 # if this is changed to support newlines in filenames,
95 # if this is changed to support newlines in filenames,
96 # be sure to check the templates/ dir again (especially *-raw.tmpl)
96 # be sure to check the templates/ dir again (especially *-raw.tmpl)
97 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
97 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
98
98
99 _checkforbidden(files)
99 _checkforbidden(files)
100 return b''.join(lines)
100 return b''.join(lines)
101
101
102
102
103 class lazymanifestiter:
103 class lazymanifestiter:
104 def __init__(self, lm: '_LazyManifest') -> None:
104 def __init__(self, lm: '_LazyManifest') -> None:
105 self.pos = 0
105 self.pos = 0
106 self.lm = lm
106 self.lm = lm
107
107
108 def __iter__(self) -> 'lazymanifestiter':
108 def __iter__(self) -> 'lazymanifestiter':
109 return self
109 return self
110
110
111 def next(self) -> bytes:
111 def next(self) -> bytes:
112 try:
112 try:
113 data, pos = self.lm._get(self.pos)
113 data, pos = self.lm._get(self.pos)
114 except IndexError:
114 except IndexError:
115 raise StopIteration
115 raise StopIteration
116 if pos == -1:
116 if pos == -1:
117 assert isinstance(data, tuple)
117 assert isinstance(data, tuple)
118 self.pos += 1
118 self.pos += 1
119 return data[0]
119 return data[0]
120 assert isinstance(data, bytes)
120 assert isinstance(data, bytes)
121 self.pos += 1
121 self.pos += 1
122 zeropos = data.find(b'\x00', pos)
122 zeropos = data.find(b'\x00', pos)
123 return data[pos:zeropos]
123 return data[pos:zeropos]
124
124
125 __next__ = next
125 __next__ = next
126
126
127
127
128 class lazymanifestiterentries:
128 class lazymanifestiterentries:
129 def __init__(self, lm: '_LazyManifest') -> None:
129 def __init__(self, lm: '_LazyManifest') -> None:
130 self.lm = lm
130 self.lm = lm
131 self.pos = 0
131 self.pos = 0
132
132
133 def __iter__(self) -> 'lazymanifestiterentries':
133 def __iter__(self) -> 'lazymanifestiterentries':
134 return self
134 return self
135
135
136 def next(self) -> Tuple[bytes, bytes, bytes]:
136 def next(self) -> Tuple[bytes, bytes, bytes]:
137 try:
137 try:
138 data, pos = self.lm._get(self.pos)
138 data, pos = self.lm._get(self.pos)
139 except IndexError:
139 except IndexError:
140 raise StopIteration
140 raise StopIteration
141 if pos == -1:
141 if pos == -1:
142 assert isinstance(data, tuple)
142 assert isinstance(data, tuple)
143 self.pos += 1
143 self.pos += 1
144 return data
144 return data
145 assert isinstance(data, bytes)
145 assert isinstance(data, bytes)
146 zeropos = data.find(b'\x00', pos)
146 zeropos = data.find(b'\x00', pos)
147 nlpos = data.find(b'\n', pos)
147 nlpos = data.find(b'\n', pos)
148 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
148 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
149 raise error.StorageError(b'Invalid manifest line')
149 raise error.StorageError(b'Invalid manifest line')
150 flags = data[nlpos - 1 : nlpos]
150 flags = data[nlpos - 1 : nlpos]
151 if flags in _manifestflags:
151 if flags in _manifestflags:
152 hlen = nlpos - zeropos - 2
152 hlen = nlpos - zeropos - 2
153 else:
153 else:
154 hlen = nlpos - zeropos - 1
154 hlen = nlpos - zeropos - 1
155 flags = b''
155 flags = b''
156 if hlen != 2 * self.lm._nodelen:
156 if hlen != 2 * self.lm._nodelen:
157 raise error.StorageError(b'Invalid manifest line')
157 raise error.StorageError(b'Invalid manifest line')
158 hashval = unhexlify(
158 hashval = unhexlify(
159 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
159 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
160 )
160 )
161 self.pos += 1
161 self.pos += 1
162 return (data[pos:zeropos], hashval, flags)
162 return (data[pos:zeropos], hashval, flags)
163
163
164 __next__ = next
164 __next__ = next
165
165
166
166
167 def unhexlify(data: bytes, extra: int, pos, length: int):
167 def unhexlify(data: bytes, extra: int, pos, length: int):
168 s = bin(data[pos : pos + length])
168 s = bin(data[pos : pos + length])
169 if extra:
169 if extra:
170 s += bytes([extra & 0xFF])
170 s += bytes([extra & 0xFF])
171 return s
171 return s
172
172
173
173
174 def _cmp(a, b):
174 def _cmp(a, b):
175 return (a > b) - (a < b)
175 return (a > b) - (a < b)
176
176
177
177
178 _manifestflags = {b'', b'l', b't', b'x'}
178 _manifestflags = {b'', b'l', b't', b'x'}
179
179
180
180
181 class _LazyManifest:
181 class _LazyManifest:
182 """A pure python manifest backed by a byte string. It is supplimented with
182 """A pure python manifest backed by a byte string. It is supplimented with
183 internal lists as it is modified, until it is compacted back to a pure byte
183 internal lists as it is modified, until it is compacted back to a pure byte
184 string.
184 string.
185
185
186 ``data`` is the initial manifest data.
186 ``data`` is the initial manifest data.
187
187
188 ``positions`` is a list of offsets, one per manifest entry. Positive
188 ``positions`` is a list of offsets, one per manifest entry. Positive
189 values are offsets into ``data``, negative values are offsets into the
189 values are offsets into ``data``, negative values are offsets into the
190 ``extradata`` list. When an entry is removed, its entry is dropped from
190 ``extradata`` list. When an entry is removed, its entry is dropped from
191 ``positions``. The values are encoded such that when walking the list and
191 ``positions``. The values are encoded such that when walking the list and
192 indexing into ``data`` or ``extradata`` as appropriate, the entries are
192 indexing into ``data`` or ``extradata`` as appropriate, the entries are
193 sorted by filename.
193 sorted by filename.
194
194
195 ``extradata`` is a list of (key, hash, flags) for entries that were added or
195 ``extradata`` is a list of (key, hash, flags) for entries that were added or
196 modified since the manifest was created or compacted.
196 modified since the manifest was created or compacted.
197 """
197 """
198
198
199 def __init__(
199 def __init__(
200 self,
200 self,
201 nodelen: int,
201 nodelen: int,
202 data: bytes,
202 data: bytes,
203 positions=None,
203 positions=None,
204 extrainfo=None,
204 extrainfo=None,
205 extradata=None,
205 extradata=None,
206 hasremovals: bool = False,
206 hasremovals: bool = False,
207 ):
207 ):
208 self._nodelen = nodelen
208 self._nodelen = nodelen
209 if positions is None:
209 if positions is None:
210 self.positions = self.findlines(data)
210 self.positions = self.findlines(data)
211 self.extrainfo = [0] * len(self.positions)
211 self.extrainfo = [0] * len(self.positions)
212 self.data = data
212 self.data = data
213 self.extradata = []
213 self.extradata = []
214 self.hasremovals = False
214 self.hasremovals = False
215 else:
215 else:
216 self.positions = positions[:]
216 self.positions = positions[:]
217 self.extrainfo = extrainfo[:]
217 self.extrainfo = extrainfo[:]
218 self.extradata = extradata[:]
218 self.extradata = extradata[:]
219 self.data = data
219 self.data = data
220 self.hasremovals = hasremovals
220 self.hasremovals = hasremovals
221
221
222 def findlines(self, data: bytes) -> List[int]:
222 def findlines(self, data: bytes) -> List[int]:
223 if not data:
223 if not data:
224 return []
224 return []
225 pos = data.find(b"\n")
225 pos = data.find(b"\n")
226 if pos == -1 or data[-1:] != b'\n':
226 if pos == -1 or data[-1:] != b'\n':
227 raise ValueError(b"Manifest did not end in a newline.")
227 raise ValueError(b"Manifest did not end in a newline.")
228 positions = [0]
228 positions = [0]
229 prev = data[: data.find(b'\x00')]
229 prev = data[: data.find(b'\x00')]
230 while pos < len(data) - 1 and pos != -1:
230 while pos < len(data) - 1 and pos != -1:
231 positions.append(pos + 1)
231 positions.append(pos + 1)
232 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
232 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
233 if nexts < prev:
233 if nexts < prev:
234 raise ValueError(b"Manifest lines not in sorted order.")
234 raise ValueError(b"Manifest lines not in sorted order.")
235 prev = nexts
235 prev = nexts
236 pos = data.find(b"\n", pos + 1)
236 pos = data.find(b"\n", pos + 1)
237 return positions
237 return positions
238
238
239 def _get(
239 def _get(
240 self, index: int
240 self, index: int
241 ) -> Tuple[Union[bytes, Tuple[bytes, bytes, bytes]], int]:
241 ) -> Tuple[Union[bytes, Tuple[bytes, bytes, bytes]], int]:
242 # get the position encoded in pos:
242 # get the position encoded in pos:
243 # positive number is an index in 'data'
243 # positive number is an index in 'data'
244 # negative number is in extrapieces
244 # negative number is in extrapieces
245 pos = self.positions[index]
245 pos = self.positions[index]
246 if pos >= 0:
246 if pos >= 0:
247 return self.data, pos
247 return self.data, pos
248 return self.extradata[-pos - 1], -1
248 return self.extradata[-pos - 1], -1
249
249
250 def _getkey(self, pos) -> bytes:
250 def _getkey(self, pos) -> bytes:
251 if pos >= 0:
251 if pos >= 0:
252 return self.data[pos : self.data.find(b'\x00', pos + 1)]
252 return self.data[pos : self.data.find(b'\x00', pos + 1)]
253 return self.extradata[-pos - 1][0]
253 return self.extradata[-pos - 1][0]
254
254
255 def bsearch(self, key: bytes) -> int:
255 def bsearch(self, key: bytes) -> int:
256 first = 0
256 first = 0
257 last = len(self.positions) - 1
257 last = len(self.positions) - 1
258
258
259 while first <= last:
259 while first <= last:
260 midpoint = (first + last) // 2
260 midpoint = (first + last) // 2
261 nextpos = self.positions[midpoint]
261 nextpos = self.positions[midpoint]
262 candidate = self._getkey(nextpos)
262 candidate = self._getkey(nextpos)
263 r = _cmp(key, candidate)
263 r = _cmp(key, candidate)
264 if r == 0:
264 if r == 0:
265 return midpoint
265 return midpoint
266 else:
266 else:
267 if r < 0:
267 if r < 0:
268 last = midpoint - 1
268 last = midpoint - 1
269 else:
269 else:
270 first = midpoint + 1
270 first = midpoint + 1
271 return -1
271 return -1
272
272
273 def bsearch2(self, key: bytes) -> Tuple[int, bool]:
273 def bsearch2(self, key: bytes) -> Tuple[int, bool]:
274 # same as the above, but will always return the position
274 # same as the above, but will always return the position
275 # done for performance reasons
275 # done for performance reasons
276 first = 0
276 first = 0
277 last = len(self.positions) - 1
277 last = len(self.positions) - 1
278
278
279 while first <= last:
279 while first <= last:
280 midpoint = (first + last) // 2
280 midpoint = (first + last) // 2
281 nextpos = self.positions[midpoint]
281 nextpos = self.positions[midpoint]
282 candidate = self._getkey(nextpos)
282 candidate = self._getkey(nextpos)
283 r = _cmp(key, candidate)
283 r = _cmp(key, candidate)
284 if r == 0:
284 if r == 0:
285 return (midpoint, True)
285 return (midpoint, True)
286 else:
286 else:
287 if r < 0:
287 if r < 0:
288 last = midpoint - 1
288 last = midpoint - 1
289 else:
289 else:
290 first = midpoint + 1
290 first = midpoint + 1
291 return (first, False)
291 return (first, False)
292
292
293 def __contains__(self, key: bytes) -> bool:
293 def __contains__(self, key: bytes) -> bool:
294 return self.bsearch(key) != -1
294 return self.bsearch(key) != -1
295
295
296 def __getitem__(self, key: bytes) -> Tuple[bytes, bytes]:
296 def __getitem__(self, key: bytes) -> Tuple[bytes, bytes]:
297 if not isinstance(key, bytes):
297 if not isinstance(key, bytes):
298 raise TypeError(b"getitem: manifest keys must be a bytes.")
298 raise TypeError(b"getitem: manifest keys must be a bytes.")
299 needle = self.bsearch(key)
299 needle = self.bsearch(key)
300 if needle == -1:
300 if needle == -1:
301 raise KeyError
301 raise KeyError
302 data, pos = self._get(needle)
302 data, pos = self._get(needle)
303 if pos == -1:
303 if pos == -1:
304 assert isinstance(data, tuple)
304 assert isinstance(data, tuple)
305 return (data[1], data[2])
305 return (data[1], data[2])
306
306
307 assert isinstance(data, bytes)
307 assert isinstance(data, bytes)
308 zeropos = data.find(b'\x00', pos)
308 zeropos = data.find(b'\x00', pos)
309 nlpos = data.find(b'\n', zeropos)
309 nlpos = data.find(b'\n', zeropos)
310 assert 0 <= needle <= len(self.positions)
310 assert 0 <= needle <= len(self.positions)
311 assert len(self.extrainfo) == len(self.positions)
311 assert len(self.extrainfo) == len(self.positions)
312 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
312 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
313 raise error.StorageError(b'Invalid manifest line')
313 raise error.StorageError(b'Invalid manifest line')
314 hlen = nlpos - zeropos - 1
314 hlen = nlpos - zeropos - 1
315 flags = data[nlpos - 1 : nlpos]
315 flags = data[nlpos - 1 : nlpos]
316 if flags in _manifestflags:
316 if flags in _manifestflags:
317 hlen -= 1
317 hlen -= 1
318 else:
318 else:
319 flags = b''
319 flags = b''
320 if hlen != 2 * self._nodelen:
320 if hlen != 2 * self._nodelen:
321 raise error.StorageError(b'Invalid manifest line')
321 raise error.StorageError(b'Invalid manifest line')
322 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
322 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
323 return (hashval, flags)
323 return (hashval, flags)
324
324
325 def __delitem__(self, key: bytes) -> None:
325 def __delitem__(self, key: bytes) -> None:
326 needle, found = self.bsearch2(key)
326 needle, found = self.bsearch2(key)
327 if not found:
327 if not found:
328 raise KeyError
328 raise KeyError
329 cur = self.positions[needle]
329 cur = self.positions[needle]
330 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
330 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
331 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
331 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
332 if cur >= 0:
332 if cur >= 0:
333 # This does NOT unsort the list as far as the search functions are
333 # This does NOT unsort the list as far as the search functions are
334 # concerned, as they only examine lines mapped by self.positions.
334 # concerned, as they only examine lines mapped by self.positions.
335 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
335 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
336 self.hasremovals = True
336 self.hasremovals = True
337
337
338 def __setitem__(self, key: bytes, value: Tuple[bytes, bytes]):
338 def __setitem__(self, key: bytes, value: Tuple[bytes, bytes]):
339 if not isinstance(key, bytes):
339 if not isinstance(key, bytes):
340 raise TypeError(b"setitem: manifest keys must be a byte string.")
340 raise TypeError(b"setitem: manifest keys must be a byte string.")
341 if not isinstance(value, tuple) or len(value) != 2:
341 if not isinstance(value, tuple) or len(value) != 2:
342 raise TypeError(
342 raise TypeError(
343 b"Manifest values must be a tuple of (node, flags)."
343 b"Manifest values must be a tuple of (node, flags)."
344 )
344 )
345 hashval = value[0]
345 hashval = value[0]
346 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
346 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
347 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
347 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
348 flags = value[1]
348 flags = value[1]
349 if not isinstance(flags, bytes) or len(flags) > 1:
349 if not isinstance(flags, bytes) or len(flags) > 1:
350 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
350 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
351 needle, found = self.bsearch2(key)
351 needle, found = self.bsearch2(key)
352 if found:
352 if found:
353 # put the item
353 # put the item
354 pos = self.positions[needle]
354 pos = self.positions[needle]
355 if pos < 0:
355 if pos < 0:
356 self.extradata[-pos - 1] = (key, hashval, value[1])
356 self.extradata[-pos - 1] = (key, hashval, value[1])
357 else:
357 else:
358 # just don't bother
358 # just don't bother
359 self.extradata.append((key, hashval, value[1]))
359 self.extradata.append((key, hashval, value[1]))
360 self.positions[needle] = -len(self.extradata)
360 self.positions[needle] = -len(self.extradata)
361 else:
361 else:
362 # not found, put it in with extra positions
362 # not found, put it in with extra positions
363 self.extradata.append((key, hashval, value[1]))
363 self.extradata.append((key, hashval, value[1]))
364 self.positions = (
364 self.positions = (
365 self.positions[:needle]
365 self.positions[:needle]
366 + [-len(self.extradata)]
366 + [-len(self.extradata)]
367 + self.positions[needle:]
367 + self.positions[needle:]
368 )
368 )
369 self.extrainfo = (
369 self.extrainfo = (
370 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
370 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
371 )
371 )
372
372
373 def copy(self) -> '_LazyManifest':
373 def copy(self) -> '_LazyManifest':
374 # XXX call _compact like in C?
374 # XXX call _compact like in C?
375 return _lazymanifest(
375 return _lazymanifest(
376 self._nodelen,
376 self._nodelen,
377 self.data,
377 self.data,
378 self.positions,
378 self.positions,
379 self.extrainfo,
379 self.extrainfo,
380 self.extradata,
380 self.extradata,
381 self.hasremovals,
381 self.hasremovals,
382 )
382 )
383
383
384 def _compact(self) -> None:
384 def _compact(self) -> None:
385 # hopefully not called TOO often
385 # hopefully not called TOO often
386 if len(self.extradata) == 0 and not self.hasremovals:
386 if len(self.extradata) == 0 and not self.hasremovals:
387 return
387 return
388 l = []
388 l = []
389 i = 0
389 i = 0
390 offset = 0
390 offset = 0
391 self.extrainfo = [0] * len(self.positions)
391 self.extrainfo = [0] * len(self.positions)
392 while i < len(self.positions):
392 while i < len(self.positions):
393 if self.positions[i] >= 0:
393 if self.positions[i] >= 0:
394 cur = self.positions[i]
394 cur = self.positions[i]
395 last_cut = cur
395 last_cut = cur
396
396
397 # Collect all contiguous entries in the buffer at the current
397 # Collect all contiguous entries in the buffer at the current
398 # offset, breaking out only for added/modified items held in
398 # offset, breaking out only for added/modified items held in
399 # extradata, or a deleted line prior to the next position.
399 # extradata, or a deleted line prior to the next position.
400 while True:
400 while True:
401 self.positions[i] = offset
401 self.positions[i] = offset
402 i += 1
402 i += 1
403 if i == len(self.positions) or self.positions[i] < 0:
403 if i == len(self.positions) or self.positions[i] < 0:
404 break
404 break
405
405
406 # A removed file has no positions[] entry, but does have an
406 # A removed file has no positions[] entry, but does have an
407 # overwritten first byte. Break out and find the end of the
407 # overwritten first byte. Break out and find the end of the
408 # current good entry/entries if there is a removed file
408 # current good entry/entries if there is a removed file
409 # before the next position.
409 # before the next position.
410 if (
410 if (
411 self.hasremovals
411 self.hasremovals
412 and self.data.find(b'\n\x00', cur, self.positions[i])
412 and self.data.find(b'\n\x00', cur, self.positions[i])
413 != -1
413 != -1
414 ):
414 ):
415 break
415 break
416
416
417 offset += self.positions[i] - cur
417 offset += self.positions[i] - cur
418 cur = self.positions[i]
418 cur = self.positions[i]
419 end_cut = self.data.find(b'\n', cur)
419 end_cut = self.data.find(b'\n', cur)
420 if end_cut != -1:
420 if end_cut != -1:
421 end_cut += 1
421 end_cut += 1
422 offset += end_cut - cur
422 offset += end_cut - cur
423 l.append(self.data[last_cut:end_cut])
423 l.append(self.data[last_cut:end_cut])
424 else:
424 else:
425 while i < len(self.positions) and self.positions[i] < 0:
425 while i < len(self.positions) and self.positions[i] < 0:
426 cur = self.positions[i]
426 cur = self.positions[i]
427 t = self.extradata[-cur - 1]
427 t = self.extradata[-cur - 1]
428 l.append(self._pack(t))
428 l.append(self._pack(t))
429 self.positions[i] = offset
429 self.positions[i] = offset
430 # Hashes are either 20 bytes (old sha1s) or 32
430 # Hashes are either 20 bytes (old sha1s) or 32
431 # bytes (new non-sha1).
431 # bytes (new non-sha1).
432 hlen = 20
432 hlen = 20
433 if len(t[1]) > 25:
433 if len(t[1]) > 25:
434 hlen = 32
434 hlen = 32
435 if len(t[1]) > hlen:
435 if len(t[1]) > hlen:
436 self.extrainfo[i] = ord(t[1][hlen + 1])
436 self.extrainfo[i] = ord(t[1][hlen + 1])
437 offset += len(l[-1])
437 offset += len(l[-1])
438 i += 1
438 i += 1
439 self.data = b''.join(l)
439 self.data = b''.join(l)
440 self.hasremovals = False
440 self.hasremovals = False
441 self.extradata = []
441 self.extradata = []
442
442
443 def _pack(self, d: Tuple[bytes, bytes, bytes]) -> bytes:
443 def _pack(self, d: Tuple[bytes, bytes, bytes]) -> bytes:
444 n = d[1]
444 n = d[1]
445 assert len(n) in (20, 32)
445 assert len(n) in (20, 32)
446 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
446 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
447
447
448 def text(self) -> ByteString:
448 def text(self) -> ByteString:
449 self._compact()
449 self._compact()
450 return self.data
450 return self.data
451
451
452 def diff(
452 def diff(
453 self, m2: '_LazyManifest', clean: bool = False
453 self, m2: '_LazyManifest', clean: bool = False
454 ) -> Dict[
454 ) -> Dict[
455 bytes,
455 bytes,
456 Optional[
456 Optional[
457 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
457 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
458 ],
458 ],
459 ]:
459 ]:
460 '''Finds changes between the current manifest and m2.'''
460 '''Finds changes between the current manifest and m2.'''
461 # XXX think whether efficiency matters here
461 # XXX think whether efficiency matters here
462 diff = {}
462 diff = {}
463
463
464 for fn, e1, flags in self.iterentries():
464 for fn, e1, flags in self.iterentries():
465 if fn not in m2:
465 if fn not in m2:
466 diff[fn] = (e1, flags), (None, b'')
466 diff[fn] = (e1, flags), (None, b'')
467 else:
467 else:
468 e2 = m2[fn]
468 e2 = m2[fn]
469 if (e1, flags) != e2:
469 if (e1, flags) != e2:
470 diff[fn] = (e1, flags), e2
470 diff[fn] = (e1, flags), e2
471 elif clean:
471 elif clean:
472 diff[fn] = None
472 diff[fn] = None
473
473
474 for fn, e2, flags in m2.iterentries():
474 for fn, e2, flags in m2.iterentries():
475 if fn not in self:
475 if fn not in self:
476 diff[fn] = (None, b''), (e2, flags)
476 diff[fn] = (None, b''), (e2, flags)
477
477
478 return diff
478 return diff
479
479
480 def iterentries(self) -> lazymanifestiterentries:
480 def iterentries(self) -> lazymanifestiterentries:
481 return lazymanifestiterentries(self)
481 return lazymanifestiterentries(self)
482
482
483 def iterkeys(self) -> lazymanifestiter:
483 def iterkeys(self) -> lazymanifestiter:
484 return lazymanifestiter(self)
484 return lazymanifestiter(self)
485
485
486 def __iter__(self) -> lazymanifestiter:
486 def __iter__(self) -> lazymanifestiter:
487 return lazymanifestiter(self)
487 return lazymanifestiter(self)
488
488
489 def __len__(self) -> int:
489 def __len__(self) -> int:
490 return len(self.positions)
490 return len(self.positions)
491
491
492 def filtercopy(self, filterfn: Callable[[bytes], bool]) -> '_LazyManifest':
492 def filtercopy(self, filterfn: Callable[[bytes], bool]) -> '_LazyManifest':
493 # XXX should be optimized
493 # XXX should be optimized
494 c = _lazymanifest(self._nodelen, b'')
494 c = _lazymanifest(self._nodelen, b'')
495 for f, n, fl in self.iterentries():
495 for f, n, fl in self.iterentries():
496 if filterfn(f):
496 if filterfn(f):
497 c[f] = n, fl
497 c[f] = n, fl
498 return c
498 return c
499
499
500
500
501 try:
501 try:
502 _lazymanifest = parsers.lazymanifest
502 _lazymanifest = parsers.lazymanifest
503 except AttributeError:
503 except AttributeError:
504 _lazymanifest = _LazyManifest
504 _lazymanifest = _LazyManifest
505
505
506
506
507 class manifestdict: # (repository.imanifestdict)
507 class manifestdict: # (repository.imanifestdict)
508 def __init__(self, nodelen: int, data: ByteString = b''):
508 def __init__(self, nodelen: int, data: ByteString = b''):
509 self._nodelen = nodelen
509 self._nodelen = nodelen
510 self._lm = _lazymanifest(nodelen, data)
510 self._lm = _lazymanifest(nodelen, data)
511
511
512 def __getitem__(self, key: bytes) -> bytes:
512 def __getitem__(self, key: bytes) -> bytes:
513 return self._lm[key][0]
513 return self._lm[key][0]
514
514
515 def find(self, key: bytes) -> Tuple[bytes, bytes]:
515 def find(self, key: bytes) -> Tuple[bytes, bytes]:
516 return self._lm[key]
516 return self._lm[key]
517
517
518 def __len__(self) -> int:
518 def __len__(self) -> int:
519 return len(self._lm)
519 return len(self._lm)
520
520
521 def __nonzero__(self) -> bool:
521 def __nonzero__(self) -> bool:
522 # nonzero is covered by the __len__ function, but implementing it here
522 # nonzero is covered by the __len__ function, but implementing it here
523 # makes it easier for extensions to override.
523 # makes it easier for extensions to override.
524 return len(self._lm) != 0
524 return len(self._lm) != 0
525
525
526 __bool__ = __nonzero__
526 __bool__ = __nonzero__
527
527
528 def set(self, key: bytes, node: bytes, flags: bytes) -> None:
528 def set(self, key: bytes, node: bytes, flags: bytes) -> None:
529 self._lm[key] = node, flags
529 self._lm[key] = node, flags
530
530
531 def __setitem__(self, key: bytes, node: bytes) -> None:
531 def __setitem__(self, key: bytes, node: bytes) -> None:
532 self._lm[key] = node, self.flags(key)
532 self._lm[key] = node, self.flags(key)
533
533
534 def __contains__(self, key: bytes) -> bool:
534 def __contains__(self, key: bytes) -> bool:
535 if key is None:
535 if key is None:
536 return False
536 return False
537 return key in self._lm
537 return key in self._lm
538
538
539 def __delitem__(self, key: bytes) -> None:
539 def __delitem__(self, key: bytes) -> None:
540 del self._lm[key]
540 del self._lm[key]
541
541
542 def __iter__(self) -> Iterator[bytes]:
542 def __iter__(self) -> Iterator[bytes]:
543 return self._lm.__iter__()
543 return self._lm.__iter__()
544
544
545 def iterkeys(self) -> Iterator[bytes]:
545 def iterkeys(self) -> Iterator[bytes]:
546 return self._lm.iterkeys()
546 return self._lm.iterkeys()
547
547
548 def keys(self) -> List[bytes]:
548 def keys(self) -> List[bytes]:
549 return list(self.iterkeys())
549 return list(self.iterkeys())
550
550
551 def filesnotin(self, m2, match=None) -> Set[bytes]:
551 def filesnotin(self, m2, match=None) -> Set[bytes]:
552 '''Set of files in this manifest that are not in the other'''
552 '''Set of files in this manifest that are not in the other'''
553 if match is not None:
553 if match is not None:
554 match = matchmod.badmatch(match, lambda path, msg: None)
554 match = matchmod.badmatch(match, lambda path, msg: None)
555 sm2 = set(m2.walk(match))
555 sm2 = set(m2.walk(match))
556 return {f for f in self.walk(match) if f not in sm2}
556 return {f for f in self.walk(match) if f not in sm2}
557 return {f for f in self if f not in m2}
557 return {f for f in self if f not in m2}
558
558
559 @propertycache
559 @propertycache
560 def _dirs(self) -> pathutil.dirs:
560 def _dirs(self) -> pathutil.dirs:
561 return pathutil.dirs(self)
561 return pathutil.dirs(self)
562
562
563 def dirs(self) -> pathutil.dirs:
563 def dirs(self) -> pathutil.dirs:
564 return self._dirs
564 return self._dirs
565
565
566 def hasdir(self, dir: bytes) -> bool:
566 def hasdir(self, dir: bytes) -> bool:
567 return dir in self._dirs
567 return dir in self._dirs
568
568
569 def _filesfastpath(self, match: matchmod.basematcher) -> bool:
569 def _filesfastpath(self, match: matchmod.basematcher) -> bool:
570 """Checks whether we can correctly and quickly iterate over matcher
570 """Checks whether we can correctly and quickly iterate over matcher
571 files instead of over manifest files."""
571 files instead of over manifest files."""
572 files = match.files()
572 files = match.files()
573 return len(files) < 100 and (
573 return len(files) < 100 and (
574 match.isexact()
574 match.isexact()
575 or (match.prefix() and all(fn in self for fn in files))
575 or (match.prefix() and all(fn in self for fn in files))
576 )
576 )
577
577
578 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
578 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
579 """Generates matching file names.
579 """Generates matching file names.
580
580
581 Equivalent to manifest.matches(match).iterkeys(), but without creating
581 Equivalent to manifest.matches(match).iterkeys(), but without creating
582 an entirely new manifest.
582 an entirely new manifest.
583
583
584 It also reports nonexistent files by marking them bad with match.bad().
584 It also reports nonexistent files by marking them bad with match.bad().
585 """
585 """
586 if match.always():
586 if match.always():
587 for f in iter(self):
587 for f in iter(self):
588 yield f
588 yield f
589 return
589 return
590
590
591 fset = set(match.files())
591 fset = set(match.files())
592
592
593 # avoid the entire walk if we're only looking for specific files
593 # avoid the entire walk if we're only looking for specific files
594 if self._filesfastpath(match):
594 if self._filesfastpath(match):
595 for fn in sorted(fset):
595 for fn in sorted(fset):
596 if fn in self:
596 if fn in self:
597 yield fn
597 yield fn
598 return
598 return
599
599
600 for fn in self:
600 for fn in self:
601 if fn in fset:
601 if fn in fset:
602 # specified pattern is the exact name
602 # specified pattern is the exact name
603 fset.remove(fn)
603 fset.remove(fn)
604 if match(fn):
604 if match(fn):
605 yield fn
605 yield fn
606
606
607 # for dirstate.walk, files=[''] means "walk the whole tree".
607 # for dirstate.walk, files=[''] means "walk the whole tree".
608 # follow that here, too
608 # follow that here, too
609 fset.discard(b'')
609 fset.discard(b'')
610
610
611 for fn in sorted(fset):
611 for fn in sorted(fset):
612 if not self.hasdir(fn):
612 if not self.hasdir(fn):
613 match.bad(fn, None)
613 match.bad(fn, None)
614
614
615 def _matches(self, match: matchmod.basematcher) -> 'manifestdict':
615 def _matches(self, match: matchmod.basematcher) -> 'manifestdict':
616 '''generate a new manifest filtered by the match argument'''
616 '''generate a new manifest filtered by the match argument'''
617 if match.always():
617 if match.always():
618 return self.copy()
618 return self.copy()
619
619
620 if self._filesfastpath(match):
620 if self._filesfastpath(match):
621 m = manifestdict(self._nodelen)
621 m = manifestdict(self._nodelen)
622 lm = self._lm
622 lm = self._lm
623 for fn in match.files():
623 for fn in match.files():
624 if fn in lm:
624 if fn in lm:
625 m._lm[fn] = lm[fn]
625 m._lm[fn] = lm[fn]
626 return m
626 return m
627
627
628 m = manifestdict(self._nodelen)
628 m = manifestdict(self._nodelen)
629 m._lm = self._lm.filtercopy(match)
629 m._lm = self._lm.filtercopy(match)
630 return m
630 return m
631
631
632 def diff(
632 def diff(
633 self,
633 self,
634 m2: 'manifestdict',
634 m2: 'manifestdict',
635 match: Optional[matchmod.basematcher] = None,
635 match: Optional[matchmod.basematcher] = None,
636 clean: bool = False,
636 clean: bool = False,
637 ) -> Dict[
637 ) -> Dict[
638 bytes,
638 bytes,
639 Optional[
639 Optional[
640 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
640 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
641 ],
641 ],
642 ]:
642 ]:
643 """Finds changes between the current manifest and m2.
643 """Finds changes between the current manifest and m2.
644
644
645 Args:
645 Args:
646 m2: the manifest to which this manifest should be compared.
646 m2: the manifest to which this manifest should be compared.
647 clean: if true, include files unchanged between these manifests
647 clean: if true, include files unchanged between these manifests
648 with a None value in the returned dictionary.
648 with a None value in the returned dictionary.
649
649
650 The result is returned as a dict with filename as key and
650 The result is returned as a dict with filename as key and
651 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
651 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
652 nodeid in the current/other manifest and fl1/fl2 is the flag
652 nodeid in the current/other manifest and fl1/fl2 is the flag
653 in the current/other manifest. Where the file does not exist,
653 in the current/other manifest. Where the file does not exist,
654 the nodeid will be None and the flags will be the empty
654 the nodeid will be None and the flags will be the empty
655 string.
655 string.
656 """
656 """
657 if match:
657 if match:
658 m1 = self._matches(match)
658 m1 = self._matches(match)
659 m2 = m2._matches(match)
659 m2 = m2._matches(match)
660 return m1.diff(m2, clean=clean)
660 return m1.diff(m2, clean=clean)
661 return self._lm.diff(m2._lm, clean)
661 return self._lm.diff(m2._lm, clean)
662
662
663 def setflag(self, key: bytes, flag: bytes) -> None:
663 def setflag(self, key: bytes, flag: bytes) -> None:
664 if flag not in _manifestflags:
664 if flag not in _manifestflags:
665 raise TypeError(b"Invalid manifest flag set.")
665 raise TypeError(b"Invalid manifest flag set.")
666 self._lm[key] = self[key], flag
666 self._lm[key] = self[key], flag
667
667
668 def get(self, key: bytes, default=None) -> Optional[bytes]:
668 def get(self, key: bytes, default=None) -> Optional[bytes]:
669 try:
669 try:
670 return self._lm[key][0]
670 return self._lm[key][0]
671 except KeyError:
671 except KeyError:
672 return default
672 return default
673
673
674 def flags(self, key: bytes) -> bytes:
674 def flags(self, key: bytes) -> bytes:
675 try:
675 try:
676 return self._lm[key][1]
676 return self._lm[key][1]
677 except KeyError:
677 except KeyError:
678 return b''
678 return b''
679
679
680 def copy(self) -> 'manifestdict':
680 def copy(self) -> 'manifestdict':
681 c = manifestdict(self._nodelen)
681 c = manifestdict(self._nodelen)
682 c._lm = self._lm.copy()
682 c._lm = self._lm.copy()
683 return c
683 return c
684
684
685 def items(self) -> Iterator[Tuple[bytes, bytes]]:
685 def items(self) -> Iterator[Tuple[bytes, bytes]]:
686 return (x[:2] for x in self._lm.iterentries())
686 return (x[:2] for x in self._lm.iterentries())
687
687
688 def iteritems(self) -> Iterator[Tuple[bytes, bytes]]:
688 def iteritems(self) -> Iterator[Tuple[bytes, bytes]]:
689 return (x[:2] for x in self._lm.iterentries())
689 return (x[:2] for x in self._lm.iterentries())
690
690
691 def iterentries(self) -> Iterator[Tuple[bytes, bytes, bytes]]:
691 def iterentries(self) -> Iterator[Tuple[bytes, bytes, bytes]]:
692 return self._lm.iterentries()
692 return self._lm.iterentries()
693
693
694 def text(self) -> ByteString:
694 def text(self) -> ByteString:
695 # most likely uses native version
695 # most likely uses native version
696 return self._lm.text()
696 return self._lm.text()
697
697
698 def fastdelta(
698 def fastdelta(
699 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
699 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
700 ) -> Tuple[ByteString, ByteString]:
700 ) -> Tuple[ByteString, ByteString]:
701 """Given a base manifest text as a bytearray and a list of changes
701 """Given a base manifest text as a bytearray and a list of changes
702 relative to that text, compute a delta that can be used by revlog.
702 relative to that text, compute a delta that can be used by revlog.
703 """
703 """
704 delta = []
704 delta = []
705 dstart = None
705 dstart = None
706 dend = None
706 dend = None
707 dline = [b""]
707 dline = [b""]
708 start = 0
708 start = 0
709 # zero copy representation of base as a buffer
709 # zero copy representation of base as a buffer
710 addbuf = util.buffer(base)
710 addbuf = util.buffer(base)
711
711
712 changes = list(changes)
712 changes = list(changes)
713 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
713 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
714 # start with a readonly loop that finds the offset of
714 # start with a readonly loop that finds the offset of
715 # each line and creates the deltas
715 # each line and creates the deltas
716 for f, todelete in changes:
716 for f, todelete in changes:
717 # bs will either be the index of the item or the insert point
717 # bs will either be the index of the item or the insert point
718 start, end = _msearch(addbuf, f, start)
718 start, end = _msearch(addbuf, f, start)
719 if not todelete:
719 if not todelete:
720 h, fl = self._lm[f]
720 h, fl = self._lm[f]
721 l = b"%s\0%s%s\n" % (f, hex(h), fl)
721 l = b"%s\0%s%s\n" % (f, hex(h), fl)
722 else:
722 else:
723 if start == end:
723 if start == end:
724 # item we want to delete was not found, error out
724 # item we want to delete was not found, error out
725 raise AssertionError(
725 raise AssertionError(
726 _(b"failed to remove %s from manifest") % f
726 _(b"failed to remove %s from manifest") % f
727 )
727 )
728 l = b""
728 l = b""
729 if dstart is not None and dstart <= start and dend >= start:
729 if dstart is not None and dstart <= start and dend >= start:
730 if dend < end:
730 if dend < end:
731 dend = end
731 dend = end
732 if l:
732 if l:
733 dline.append(l)
733 dline.append(l)
734 else:
734 else:
735 if dstart is not None:
735 if dstart is not None:
736 delta.append((dstart, dend, b"".join(dline)))
736 delta.append((dstart, dend, b"".join(dline)))
737 dstart = start
737 dstart = start
738 dend = end
738 dend = end
739 dline = [l]
739 dline = [l]
740
740
741 if dstart is not None:
741 if dstart is not None:
742 delta.append((dstart, dend, b"".join(dline)))
742 delta.append((dstart, dend, b"".join(dline)))
743 # apply the delta to the base, and get a delta for addrevision
743 # apply the delta to the base, and get a delta for addrevision
744 deltatext, arraytext = _addlistdelta(base, delta)
744 deltatext, arraytext = _addlistdelta(base, delta)
745 else:
745 else:
746 # For large changes, it's much cheaper to just build the text and
746 # For large changes, it's much cheaper to just build the text and
747 # diff it.
747 # diff it.
748 arraytext = bytearray(self.text())
748 arraytext = bytearray(self.text())
749 deltatext = mdiff.textdiff(
749 deltatext = mdiff.textdiff(
750 util.buffer(base), util.buffer(arraytext)
750 util.buffer(base), util.buffer(arraytext)
751 )
751 )
752
752
753 return arraytext, deltatext
753 return arraytext, deltatext
754
754
755
755
756 def _msearch(
756 def _msearch(
757 m: ByteString, s: bytes, lo: int = 0, hi: Optional[int] = None
757 m: ByteString, s: bytes, lo: int = 0, hi: Optional[int] = None
758 ) -> Tuple[int, int]:
758 ) -> Tuple[int, int]:
759 """return a tuple (start, end) that says where to find s within m.
759 """return a tuple (start, end) that says where to find s within m.
760
760
761 If the string is found m[start:end] are the line containing
761 If the string is found m[start:end] are the line containing
762 that string. If start == end the string was not found and
762 that string. If start == end the string was not found and
763 they indicate the proper sorted insertion point.
763 they indicate the proper sorted insertion point.
764 """
764 """
765
765
766 def advance(i: int, c: bytes):
766 def advance(i: int, c: bytes):
767 while i < lenm and m[i : i + 1] != c:
767 while i < lenm and m[i : i + 1] != c:
768 i += 1
768 i += 1
769 return i
769 return i
770
770
771 if not s:
771 if not s:
772 return (lo, lo)
772 return (lo, lo)
773 lenm = len(m)
773 lenm = len(m)
774 if not hi:
774 if not hi:
775 hi = lenm
775 hi = lenm
776 while lo < hi:
776 while lo < hi:
777 mid = (lo + hi) // 2
777 mid = (lo + hi) // 2
778 start = mid
778 start = mid
779 while start > 0 and m[start - 1 : start] != b'\n':
779 while start > 0 and m[start - 1 : start] != b'\n':
780 start -= 1
780 start -= 1
781 end = advance(start, b'\0')
781 end = advance(start, b'\0')
782 if bytes(m[start:end]) < s:
782 if bytes(m[start:end]) < s:
783 # we know that after the null there are 40 bytes of sha1
783 # we know that after the null there are 40 bytes of sha1
784 # this translates to the bisect lo = mid + 1
784 # this translates to the bisect lo = mid + 1
785 lo = advance(end + 40, b'\n') + 1
785 lo = advance(end + 40, b'\n') + 1
786 else:
786 else:
787 # this translates to the bisect hi = mid
787 # this translates to the bisect hi = mid
788 hi = start
788 hi = start
789 end = advance(lo, b'\0')
789 end = advance(lo, b'\0')
790 found = m[lo:end]
790 found = m[lo:end]
791 if s == found:
791 if s == found:
792 # we know that after the null there are 40 bytes of sha1
792 # we know that after the null there are 40 bytes of sha1
793 end = advance(end + 40, b'\n')
793 end = advance(end + 40, b'\n')
794 return (lo, end + 1)
794 return (lo, end + 1)
795 else:
795 else:
796 return (lo, lo)
796 return (lo, lo)
797
797
798
798
799 def _checkforbidden(l: Iterable[bytes]) -> None:
799 def _checkforbidden(l: Iterable[bytes]) -> None:
800 """Check filenames for illegal characters."""
800 """Check filenames for illegal characters."""
801 for f in l:
801 for f in l:
802 if b'\n' in f or b'\r' in f:
802 if b'\n' in f or b'\r' in f:
803 raise error.StorageError(
803 raise error.StorageError(
804 _(b"'\\n' and '\\r' disallowed in filenames: %r")
804 _(b"'\\n' and '\\r' disallowed in filenames: %r")
805 % pycompat.bytestr(f)
805 % pycompat.bytestr(f)
806 )
806 )
807
807
808
808
809 # apply the changes collected during the bisect loop to our addlist
809 # apply the changes collected during the bisect loop to our addlist
810 # return a delta suitable for addrevision
810 # return a delta suitable for addrevision
811 def _addlistdelta(
811 def _addlistdelta(
812 addlist: ByteString,
812 addlist: ByteString,
813 x: Iterable[Tuple[int, int, bytes]],
813 x: Iterable[Tuple[int, int, bytes]],
814 ) -> Tuple[bytes, ByteString]:
814 ) -> Tuple[bytes, ByteString]:
815 # for large addlist arrays, building a new array is cheaper
815 # for large addlist arrays, building a new array is cheaper
816 # than repeatedly modifying the existing one
816 # than repeatedly modifying the existing one
817 currentposition = 0
817 currentposition = 0
818 newaddlist = bytearray()
818 newaddlist = bytearray()
819
819
820 for start, end, content in x:
820 for start, end, content in x:
821 newaddlist += addlist[currentposition:start]
821 newaddlist += addlist[currentposition:start]
822 if content:
822 if content:
823 newaddlist += bytearray(content)
823 newaddlist += bytearray(content)
824
824
825 currentposition = end
825 currentposition = end
826
826
827 newaddlist += addlist[currentposition:]
827 newaddlist += addlist[currentposition:]
828
828
829 deltatext = b"".join(
829 deltatext = b"".join(
830 struct.pack(b">lll", start, end, len(content)) + content
830 struct.pack(b">lll", start, end, len(content)) + content
831 for start, end, content in x
831 for start, end, content in x
832 )
832 )
833 return deltatext, newaddlist
833 return deltatext, newaddlist
834
834
835
835
836 def _splittopdir(f: bytes) -> Tuple[bytes, bytes]:
836 def _splittopdir(f: bytes) -> Tuple[bytes, bytes]:
837 if b'/' in f:
837 if b'/' in f:
838 dir, subpath = f.split(b'/', 1)
838 dir, subpath = f.split(b'/', 1)
839 return dir + b'/', subpath
839 return dir + b'/', subpath
840 else:
840 else:
841 return b'', f
841 return b'', f
842
842
843
843
844 _noop = lambda s: None
844 _noop = lambda s: None
845
845
846
846
847 class treemanifest: # (repository.imanifestdict)
847 class treemanifest: # (repository.imanifestdict)
848 _dir: bytes
848 _dir: bytes
849 _dirs: Dict[bytes, 'treemanifest']
849 _dirs: Dict[bytes, 'treemanifest']
850 _dirty: bool
850 _dirty: bool
851 _files: Dict[bytes, bytes]
851 _files: Dict[bytes, bytes]
852 _flags: Dict[bytes, bytes]
852 _flags: Dict[bytes, bytes]
853
853
854 def __init__(self, nodeconstants, dir: bytes = b'', text: bytes = b''):
854 def __init__(self, nodeconstants, dir: bytes = b'', text: bytes = b''):
855 self._dir = dir
855 self._dir = dir
856 self.nodeconstants = nodeconstants
856 self.nodeconstants = nodeconstants
857 self._node = self.nodeconstants.nullid
857 self._node = self.nodeconstants.nullid
858 self._nodelen = self.nodeconstants.nodelen
858 self._nodelen = self.nodeconstants.nodelen
859 self._loadfunc = _noop
859 self._loadfunc = _noop
860 self._copyfunc = _noop
860 self._copyfunc = _noop
861 self._dirty = False
861 self._dirty = False
862 self._dirs = {}
862 self._dirs = {}
863 self._lazydirs: Dict[
863 self._lazydirs: Dict[
864 bytes,
864 bytes,
865 Tuple[bytes, Callable[[bytes, bytes], 'treemanifest'], bool],
865 Tuple[bytes, Callable[[bytes, bytes], 'treemanifest'], bool],
866 ] = {}
866 ] = {}
867 # Using _lazymanifest here is a little slower than plain old dicts
867 # Using _lazymanifest here is a little slower than plain old dicts
868 self._files = {}
868 self._files = {}
869 self._flags = {}
869 self._flags = {}
870 if text:
870 if text:
871
871
872 def readsubtree(subdir, subm):
872 def readsubtree(subdir, subm):
873 raise AssertionError(
873 raise AssertionError(
874 b'treemanifest constructor only accepts flat manifests'
874 b'treemanifest constructor only accepts flat manifests'
875 )
875 )
876
876
877 self.parse(text, readsubtree)
877 self.parse(text, readsubtree)
878 self._dirty = True # Mark flat manifest dirty after parsing
878 self._dirty = True # Mark flat manifest dirty after parsing
879
879
880 def _subpath(self, path: bytes) -> bytes:
880 def _subpath(self, path: bytes) -> bytes:
881 return self._dir + path
881 return self._dir + path
882
882
883 def _loadalllazy(self) -> None:
883 def _loadalllazy(self) -> None:
884 selfdirs = self._dirs
884 selfdirs = self._dirs
885 subpath = self._subpath
885 subpath = self._subpath
886 for d, (node, readsubtree, docopy) in self._lazydirs.items():
886 for d, (node, readsubtree, docopy) in self._lazydirs.items():
887 if docopy:
887 if docopy:
888 selfdirs[d] = readsubtree(subpath(d), node).copy()
888 selfdirs[d] = readsubtree(subpath(d), node).copy()
889 else:
889 else:
890 selfdirs[d] = readsubtree(subpath(d), node)
890 selfdirs[d] = readsubtree(subpath(d), node)
891 self._lazydirs.clear()
891 self._lazydirs.clear()
892
892
893 def _loadlazy(self, d: bytes) -> None:
893 def _loadlazy(self, d: bytes) -> None:
894 v = self._lazydirs.get(d)
894 v = self._lazydirs.get(d)
895 if v is not None:
895 if v is not None:
896 node, readsubtree, docopy = v
896 node, readsubtree, docopy = v
897 if docopy:
897 if docopy:
898 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
898 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
899 else:
899 else:
900 self._dirs[d] = readsubtree(self._subpath(d), node)
900 self._dirs[d] = readsubtree(self._subpath(d), node)
901 del self._lazydirs[d]
901 del self._lazydirs[d]
902
902
903 def _loadchildrensetlazy(
903 def _loadchildrensetlazy(
904 self, visit: Union[Set[bytes], bytes]
904 self, visit: Union[Set[bytes], bytes]
905 ) -> Optional[Set[bytes]]:
905 ) -> Optional[Set[bytes]]:
906 if not visit:
906 if not visit:
907 return None
907 return None
908 if visit == b'all' or visit == b'this':
908 if visit == b'all' or visit == b'this':
909 self._loadalllazy()
909 self._loadalllazy()
910 return None
910 return None
911
911
912 visit = cast(Set[bytes], visit)
912 visit = cast(Set[bytes], visit)
913
913
914 loadlazy = self._loadlazy
914 loadlazy = self._loadlazy
915 for k in visit:
915 for k in visit:
916 loadlazy(k + b'/')
916 loadlazy(k + b'/')
917 return visit
917 return visit
918
918
919 def _loaddifflazy(self, t1: 'treemanifest', t2: 'treemanifest'):
919 def _loaddifflazy(self, t1: 'treemanifest', t2: 'treemanifest'):
920 """load items in t1 and t2 if they're needed for diffing.
920 """load items in t1 and t2 if they're needed for diffing.
921
921
922 The criteria currently is:
922 The criteria currently is:
923 - if it's not present in _lazydirs in either t1 or t2, load it in the
923 - if it's not present in _lazydirs in either t1 or t2, load it in the
924 other (it may already be loaded or it may not exist, doesn't matter)
924 other (it may already be loaded or it may not exist, doesn't matter)
925 - if it's present in _lazydirs in both, compare the nodeid; if it
925 - if it's present in _lazydirs in both, compare the nodeid; if it
926 differs, load it in both
926 differs, load it in both
927 """
927 """
928 toloadlazy = []
928 toloadlazy = []
929 for d, v1 in t1._lazydirs.items():
929 for d, v1 in t1._lazydirs.items():
930 v2 = t2._lazydirs.get(d)
930 v2 = t2._lazydirs.get(d)
931 if v2 is None or v2[0] != v1[0]:
931 if v2 is None or v2[0] != v1[0]:
932 toloadlazy.append(d)
932 toloadlazy.append(d)
933 for d, v1 in t2._lazydirs.items():
933 for d, v1 in t2._lazydirs.items():
934 if d not in t1._lazydirs:
934 if d not in t1._lazydirs:
935 toloadlazy.append(d)
935 toloadlazy.append(d)
936
936
937 for d in toloadlazy:
937 for d in toloadlazy:
938 t1._loadlazy(d)
938 t1._loadlazy(d)
939 t2._loadlazy(d)
939 t2._loadlazy(d)
940
940
941 def __len__(self) -> int:
941 def __len__(self) -> int:
942 self._load()
942 self._load()
943 size = len(self._files)
943 size = len(self._files)
944 self._loadalllazy()
944 self._loadalllazy()
945 for m in self._dirs.values():
945 for m in self._dirs.values():
946 size += m.__len__()
946 size += m.__len__()
947 return size
947 return size
948
948
949 def __nonzero__(self) -> bool:
949 def __nonzero__(self) -> bool:
950 # Faster than "__len__() != 0" since it avoids loading sub-manifests
950 # Faster than "__len__() != 0" since it avoids loading sub-manifests
951 return not self._isempty()
951 return not self._isempty()
952
952
953 __bool__ = __nonzero__
953 __bool__ = __nonzero__
954
954
955 def _isempty(self) -> bool:
955 def _isempty(self) -> bool:
956 self._load() # for consistency; already loaded by all callers
956 self._load() # for consistency; already loaded by all callers
957 # See if we can skip loading everything.
957 # See if we can skip loading everything.
958 if self._files or (
958 if self._files or (
959 self._dirs and any(not m._isempty() for m in self._dirs.values())
959 self._dirs and any(not m._isempty() for m in self._dirs.values())
960 ):
960 ):
961 return False
961 return False
962 self._loadalllazy()
962 self._loadalllazy()
963 return not self._dirs or all(m._isempty() for m in self._dirs.values())
963 return not self._dirs or all(m._isempty() for m in self._dirs.values())
964
964
965 @encoding.strmethod
965 @encoding.strmethod
966 def __repr__(self) -> bytes:
966 def __repr__(self) -> bytes:
967 return (
967 return (
968 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
968 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
969 % (
969 % (
970 self._dir,
970 self._dir,
971 hex(self._node),
971 hex(self._node),
972 bool(self._loadfunc is _noop),
972 bool(self._loadfunc is _noop),
973 self._dirty,
973 self._dirty,
974 id(self),
974 id(self),
975 )
975 )
976 )
976 )
977
977
978 def dir(self) -> bytes:
978 def dir(self) -> bytes:
979 """The directory that this tree manifest represents, including a
979 """The directory that this tree manifest represents, including a
980 trailing '/'. Empty string for the repo root directory."""
980 trailing '/'. Empty string for the repo root directory."""
981 return self._dir
981 return self._dir
982
982
983 def node(self) -> bytes:
983 def node(self) -> bytes:
984 """This node of this instance. nullid for unsaved instances. Should
984 """This node of this instance. nullid for unsaved instances. Should
985 be updated when the instance is read or written from a revlog.
985 be updated when the instance is read or written from a revlog.
986 """
986 """
987 assert not self._dirty
987 assert not self._dirty
988 return self._node
988 return self._node
989
989
990 def setnode(self, node: bytes) -> None:
990 def setnode(self, node: bytes) -> None:
991 self._node = node
991 self._node = node
992 self._dirty = False
992 self._dirty = False
993
993
994 def iterentries(
994 def iterentries(
995 self,
995 self,
996 ) -> Iterator[Tuple[bytes, Union[bytes, 'treemanifest'], bytes]]:
996 ) -> Iterator[Tuple[bytes, Union[bytes, 'treemanifest'], bytes]]:
997 self._load()
997 self._load()
998 self._loadalllazy()
998 self._loadalllazy()
999 for p, n in sorted(
999 for p, n in sorted(
1000 itertools.chain(self._dirs.items(), self._files.items())
1000 itertools.chain(self._dirs.items(), self._files.items())
1001 ):
1001 ):
1002 if p in self._files:
1002 if p in self._files:
1003 yield self._subpath(p), n, self._flags.get(p, b'')
1003 yield self._subpath(p), n, self._flags.get(p, b'')
1004 else:
1004 else:
1005 for x in n.iterentries():
1005 for x in n.iterentries():
1006 yield x
1006 yield x
1007
1007
1008 def items(self) -> Iterator[Tuple[bytes, Union[bytes, 'treemanifest']]]:
1008 def items(self) -> Iterator[Tuple[bytes, Union[bytes, 'treemanifest']]]:
1009 self._load()
1009 self._load()
1010 self._loadalllazy()
1010 self._loadalllazy()
1011 for p, n in sorted(
1011 for p, n in sorted(
1012 itertools.chain(self._dirs.items(), self._files.items())
1012 itertools.chain(self._dirs.items(), self._files.items())
1013 ):
1013 ):
1014 if p in self._files:
1014 if p in self._files:
1015 yield self._subpath(p), n
1015 yield self._subpath(p), n
1016 else:
1016 else:
1017 for f, sn in n.items():
1017 for f, sn in n.items():
1018 yield f, sn
1018 yield f, sn
1019
1019
1020 iteritems = items
1020 iteritems = items
1021
1021
1022 def iterkeys(self) -> Iterator[bytes]:
1022 def iterkeys(self) -> Iterator[bytes]:
1023 self._load()
1023 self._load()
1024 self._loadalllazy()
1024 self._loadalllazy()
1025 for p in sorted(itertools.chain(self._dirs, self._files)):
1025 for p in sorted(itertools.chain(self._dirs, self._files)):
1026 if p in self._files:
1026 if p in self._files:
1027 yield self._subpath(p)
1027 yield self._subpath(p)
1028 else:
1028 else:
1029 for f in self._dirs[p]:
1029 for f in self._dirs[p]:
1030 yield f
1030 yield f
1031
1031
1032 def keys(self) -> List[bytes]:
1032 def keys(self) -> List[bytes]:
1033 return list(self.iterkeys())
1033 return list(self.iterkeys())
1034
1034
1035 def __iter__(self) -> Iterator[bytes]:
1035 def __iter__(self) -> Iterator[bytes]:
1036 return self.iterkeys()
1036 return self.iterkeys()
1037
1037
1038 def __contains__(self, f: bytes) -> bool:
1038 def __contains__(self, f: bytes) -> bool:
1039 if f is None:
1039 if f is None:
1040 return False
1040 return False
1041 self._load()
1041 self._load()
1042 dir, subpath = _splittopdir(f)
1042 dir, subpath = _splittopdir(f)
1043 if dir:
1043 if dir:
1044 self._loadlazy(dir)
1044 self._loadlazy(dir)
1045
1045
1046 if dir not in self._dirs:
1046 if dir not in self._dirs:
1047 return False
1047 return False
1048
1048
1049 return self._dirs[dir].__contains__(subpath)
1049 return self._dirs[dir].__contains__(subpath)
1050 else:
1050 else:
1051 return f in self._files
1051 return f in self._files
1052
1052
1053 def get(self, f: bytes, default: Optional[bytes] = None) -> Optional[bytes]:
1053 def get(self, f: bytes, default: Optional[bytes] = None) -> Optional[bytes]:
1054 self._load()
1054 self._load()
1055 dir, subpath = _splittopdir(f)
1055 dir, subpath = _splittopdir(f)
1056 if dir:
1056 if dir:
1057 self._loadlazy(dir)
1057 self._loadlazy(dir)
1058
1058
1059 if dir not in self._dirs:
1059 if dir not in self._dirs:
1060 return default
1060 return default
1061 return self._dirs[dir].get(subpath, default)
1061 return self._dirs[dir].get(subpath, default)
1062 else:
1062 else:
1063 return self._files.get(f, default)
1063 return self._files.get(f, default)
1064
1064
1065 def __getitem__(self, f: bytes) -> bytes:
1065 def __getitem__(self, f: bytes) -> bytes:
1066 self._load()
1066 self._load()
1067 dir, subpath = _splittopdir(f)
1067 dir, subpath = _splittopdir(f)
1068 if dir:
1068 if dir:
1069 self._loadlazy(dir)
1069 self._loadlazy(dir)
1070
1070
1071 return self._dirs[dir].__getitem__(subpath)
1071 return self._dirs[dir].__getitem__(subpath)
1072 else:
1072 else:
1073 return self._files[f]
1073 return self._files[f]
1074
1074
1075 def flags(self, f: bytes) -> bytes:
1075 def flags(self, f: bytes) -> bytes:
1076 self._load()
1076 self._load()
1077 dir, subpath = _splittopdir(f)
1077 dir, subpath = _splittopdir(f)
1078 if dir:
1078 if dir:
1079 self._loadlazy(dir)
1079 self._loadlazy(dir)
1080
1080
1081 if dir not in self._dirs:
1081 if dir not in self._dirs:
1082 return b''
1082 return b''
1083 return self._dirs[dir].flags(subpath)
1083 return self._dirs[dir].flags(subpath)
1084 else:
1084 else:
1085 if f in self._lazydirs or f in self._dirs:
1085 if f in self._lazydirs or f in self._dirs:
1086 return b''
1086 return b''
1087 return self._flags.get(f, b'')
1087 return self._flags.get(f, b'')
1088
1088
1089 def find(self, f: bytes) -> Tuple[bytes, bytes]:
1089 def find(self, f: bytes) -> Tuple[bytes, bytes]:
1090 self._load()
1090 self._load()
1091 dir, subpath = _splittopdir(f)
1091 dir, subpath = _splittopdir(f)
1092 if dir:
1092 if dir:
1093 self._loadlazy(dir)
1093 self._loadlazy(dir)
1094
1094
1095 return self._dirs[dir].find(subpath)
1095 return self._dirs[dir].find(subpath)
1096 else:
1096 else:
1097 return self._files[f], self._flags.get(f, b'')
1097 return self._files[f], self._flags.get(f, b'')
1098
1098
1099 def __delitem__(self, f: bytes) -> None:
1099 def __delitem__(self, f: bytes) -> None:
1100 self._load()
1100 self._load()
1101 dir, subpath = _splittopdir(f)
1101 dir, subpath = _splittopdir(f)
1102 if dir:
1102 if dir:
1103 self._loadlazy(dir)
1103 self._loadlazy(dir)
1104
1104
1105 self._dirs[dir].__delitem__(subpath)
1105 self._dirs[dir].__delitem__(subpath)
1106 # If the directory is now empty, remove it
1106 # If the directory is now empty, remove it
1107 if self._dirs[dir]._isempty():
1107 if self._dirs[dir]._isempty():
1108 del self._dirs[dir]
1108 del self._dirs[dir]
1109 else:
1109 else:
1110 del self._files[f]
1110 del self._files[f]
1111 if f in self._flags:
1111 if f in self._flags:
1112 del self._flags[f]
1112 del self._flags[f]
1113 self._dirty = True
1113 self._dirty = True
1114
1114
1115 def set(self, f: bytes, node: bytes, flags: bytes) -> None:
1115 def set(self, f: bytes, node: bytes, flags: bytes) -> None:
1116 """Set both the node and the flags for path f."""
1116 """Set both the node and the flags for path f."""
1117 assert node is not None
1117 assert node is not None
1118 if flags not in _manifestflags:
1118 if flags not in _manifestflags:
1119 raise TypeError(b"Invalid manifest flag set.")
1119 raise TypeError(b"Invalid manifest flag set.")
1120 self._load()
1120 self._load()
1121 dir, subpath = _splittopdir(f)
1121 dir, subpath = _splittopdir(f)
1122 if dir:
1122 if dir:
1123 self._loadlazy(dir)
1123 self._loadlazy(dir)
1124 if dir not in self._dirs:
1124 if dir not in self._dirs:
1125 self._dirs[dir] = treemanifest(
1125 self._dirs[dir] = treemanifest(
1126 self.nodeconstants, self._subpath(dir)
1126 self.nodeconstants, self._subpath(dir)
1127 )
1127 )
1128 self._dirs[dir].set(subpath, node, flags)
1128 self._dirs[dir].set(subpath, node, flags)
1129 else:
1129 else:
1130 assert len(node) in (20, 32)
1130 assert len(node) in (20, 32)
1131 self._files[f] = node
1131 self._files[f] = node
1132 self._flags[f] = flags
1132 self._flags[f] = flags
1133 self._dirty = True
1133 self._dirty = True
1134
1134
1135 def __setitem__(self, f: bytes, n: bytes) -> None:
1135 def __setitem__(self, f: bytes, n: bytes) -> None:
1136 assert n is not None
1136 assert n is not None
1137 self._load()
1137 self._load()
1138 dir, subpath = _splittopdir(f)
1138 dir, subpath = _splittopdir(f)
1139 if dir:
1139 if dir:
1140 self._loadlazy(dir)
1140 self._loadlazy(dir)
1141 if dir not in self._dirs:
1141 if dir not in self._dirs:
1142 self._dirs[dir] = treemanifest(
1142 self._dirs[dir] = treemanifest(
1143 self.nodeconstants, self._subpath(dir)
1143 self.nodeconstants, self._subpath(dir)
1144 )
1144 )
1145 self._dirs[dir].__setitem__(subpath, n)
1145 self._dirs[dir].__setitem__(subpath, n)
1146 else:
1146 else:
1147 # manifest nodes are either 20 bytes or 32 bytes,
1147 # manifest nodes are either 20 bytes or 32 bytes,
1148 # depending on the hash in use. Assert this as historically
1148 # depending on the hash in use. Assert this as historically
1149 # sometimes extra bytes were added.
1149 # sometimes extra bytes were added.
1150 assert len(n) in (20, 32)
1150 assert len(n) in (20, 32)
1151 self._files[f] = n
1151 self._files[f] = n
1152 self._dirty = True
1152 self._dirty = True
1153
1153
1154 def _load(self) -> None:
1154 def _load(self) -> None:
1155 if self._loadfunc is not _noop:
1155 if self._loadfunc is not _noop:
1156 lf, self._loadfunc = self._loadfunc, _noop
1156 lf, self._loadfunc = self._loadfunc, _noop
1157 lf(self)
1157 lf(self)
1158 elif self._copyfunc is not _noop:
1158 elif self._copyfunc is not _noop:
1159 cf, self._copyfunc = self._copyfunc, _noop
1159 cf, self._copyfunc = self._copyfunc, _noop
1160 cf(self)
1160 cf(self)
1161
1161
1162 def setflag(self, f: bytes, flags: bytes) -> None:
1162 def setflag(self, f: bytes, flags: bytes) -> None:
1163 """Set the flags (symlink, executable) for path f."""
1163 """Set the flags (symlink, executable) for path f."""
1164 if flags not in _manifestflags:
1164 if flags not in _manifestflags:
1165 raise TypeError(b"Invalid manifest flag set.")
1165 raise TypeError(b"Invalid manifest flag set.")
1166 self._load()
1166 self._load()
1167 dir, subpath = _splittopdir(f)
1167 dir, subpath = _splittopdir(f)
1168 if dir:
1168 if dir:
1169 self._loadlazy(dir)
1169 self._loadlazy(dir)
1170 if dir not in self._dirs:
1170 if dir not in self._dirs:
1171 self._dirs[dir] = treemanifest(
1171 self._dirs[dir] = treemanifest(
1172 self.nodeconstants, self._subpath(dir)
1172 self.nodeconstants, self._subpath(dir)
1173 )
1173 )
1174 self._dirs[dir].setflag(subpath, flags)
1174 self._dirs[dir].setflag(subpath, flags)
1175 else:
1175 else:
1176 self._flags[f] = flags
1176 self._flags[f] = flags
1177 self._dirty = True
1177 self._dirty = True
1178
1178
1179 def copy(self) -> 'treemanifest':
1179 def copy(self) -> 'treemanifest':
1180 copy = treemanifest(self.nodeconstants, self._dir)
1180 copy = treemanifest(self.nodeconstants, self._dir)
1181 copy._node = self._node
1181 copy._node = self._node
1182 copy._dirty = self._dirty
1182 copy._dirty = self._dirty
1183 if self._copyfunc is _noop:
1183 if self._copyfunc is _noop:
1184
1184
1185 def _copyfunc(s):
1185 def _copyfunc(s):
1186 self._load()
1186 self._load()
1187 s._lazydirs = {
1187 s._lazydirs = {
1188 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1188 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1189 }
1189 }
1190 sdirs = s._dirs
1190 sdirs = s._dirs
1191 for d, v in self._dirs.items():
1191 for d, v in self._dirs.items():
1192 sdirs[d] = v.copy()
1192 sdirs[d] = v.copy()
1193 s._files = dict.copy(self._files)
1193 s._files = dict.copy(self._files)
1194 s._flags = dict.copy(self._flags)
1194 s._flags = dict.copy(self._flags)
1195
1195
1196 if self._loadfunc is _noop:
1196 if self._loadfunc is _noop:
1197 _copyfunc(copy)
1197 _copyfunc(copy)
1198 else:
1198 else:
1199 copy._copyfunc = _copyfunc
1199 copy._copyfunc = _copyfunc
1200 else:
1200 else:
1201 copy._copyfunc = self._copyfunc
1201 copy._copyfunc = self._copyfunc
1202 return copy
1202 return copy
1203
1203
1204 def filesnotin(
1204 def filesnotin(
1205 self, m2: 'treemanifest', match: Optional[matchmod.basematcher] = None
1205 self, m2: 'treemanifest', match: Optional[matchmod.basematcher] = None
1206 ) -> Set[bytes]:
1206 ) -> Set[bytes]:
1207 '''Set of files in this manifest that are not in the other'''
1207 '''Set of files in this manifest that are not in the other'''
1208 if match and not match.always():
1208 if match and not match.always():
1209 m1 = self._matches(match)
1209 m1 = self._matches(match)
1210 m2 = m2._matches(match)
1210 m2 = m2._matches(match)
1211 return m1.filesnotin(m2)
1211 return m1.filesnotin(m2)
1212
1212
1213 files = set()
1213 files = set()
1214
1214
1215 def _filesnotin(t1, t2):
1215 def _filesnotin(t1, t2):
1216 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1216 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1217 return
1217 return
1218 t1._load()
1218 t1._load()
1219 t2._load()
1219 t2._load()
1220 self._loaddifflazy(t1, t2)
1220 self._loaddifflazy(t1, t2)
1221 for d, m1 in t1._dirs.items():
1221 for d, m1 in t1._dirs.items():
1222 if d in t2._dirs:
1222 if d in t2._dirs:
1223 m2 = t2._dirs[d]
1223 m2 = t2._dirs[d]
1224 _filesnotin(m1, m2)
1224 _filesnotin(m1, m2)
1225 else:
1225 else:
1226 files.update(m1.iterkeys())
1226 files.update(m1.iterkeys())
1227
1227
1228 for fn in t1._files:
1228 for fn in t1._files:
1229 if fn not in t2._files:
1229 if fn not in t2._files:
1230 files.add(t1._subpath(fn))
1230 files.add(t1._subpath(fn))
1231
1231
1232 _filesnotin(self, m2)
1232 _filesnotin(self, m2)
1233 return files
1233 return files
1234
1234
1235 @propertycache
1235 @propertycache
1236 def _alldirs(self) -> pathutil.dirs:
1236 def _alldirs(self) -> pathutil.dirs:
1237 return pathutil.dirs(self)
1237 return pathutil.dirs(self)
1238
1238
1239 def dirs(self) -> pathutil.dirs:
1239 def dirs(self) -> pathutil.dirs:
1240 return self._alldirs
1240 return self._alldirs
1241
1241
1242 def hasdir(self, dir: bytes) -> bool:
1242 def hasdir(self, dir: bytes) -> bool:
1243 self._load()
1243 self._load()
1244 topdir, subdir = _splittopdir(dir)
1244 topdir, subdir = _splittopdir(dir)
1245 if topdir:
1245 if topdir:
1246 self._loadlazy(topdir)
1246 self._loadlazy(topdir)
1247 if topdir in self._dirs:
1247 if topdir in self._dirs:
1248 return self._dirs[topdir].hasdir(subdir)
1248 return self._dirs[topdir].hasdir(subdir)
1249 return False
1249 return False
1250 dirslash = dir + b'/'
1250 dirslash = dir + b'/'
1251 return dirslash in self._dirs or dirslash in self._lazydirs
1251 return dirslash in self._dirs or dirslash in self._lazydirs
1252
1252
1253 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1253 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1254 """Generates matching file names.
1254 """Generates matching file names.
1255
1255
1256 It also reports nonexistent files by marking them bad with match.bad().
1256 It also reports nonexistent files by marking them bad with match.bad().
1257 """
1257 """
1258 if match.always():
1258 if match.always():
1259 for f in iter(self):
1259 for f in iter(self):
1260 yield f
1260 yield f
1261 return
1261 return
1262
1262
1263 fset = set(match.files())
1263 fset = set(match.files())
1264
1264
1265 for fn in self._walk(match):
1265 for fn in self._walk(match):
1266 if fn in fset:
1266 if fn in fset:
1267 # specified pattern is the exact name
1267 # specified pattern is the exact name
1268 fset.remove(fn)
1268 fset.remove(fn)
1269 yield fn
1269 yield fn
1270
1270
1271 # for dirstate.walk, files=[''] means "walk the whole tree".
1271 # for dirstate.walk, files=[''] means "walk the whole tree".
1272 # follow that here, too
1272 # follow that here, too
1273 fset.discard(b'')
1273 fset.discard(b'')
1274
1274
1275 for fn in sorted(fset):
1275 for fn in sorted(fset):
1276 if not self.hasdir(fn):
1276 if not self.hasdir(fn):
1277 match.bad(fn, None)
1277 match.bad(fn, None)
1278
1278
1279 def _walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1279 def _walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1280 '''Recursively generates matching file names for walk().'''
1280 '''Recursively generates matching file names for walk().'''
1281 visit = match.visitchildrenset(self._dir[:-1])
1281 visit = match.visitchildrenset(self._dir[:-1])
1282 if not visit:
1282 if not visit:
1283 return
1283 return
1284
1284
1285 # yield this dir's files and walk its submanifests
1285 # yield this dir's files and walk its submanifests
1286 self._load()
1286 self._load()
1287 visit = self._loadchildrensetlazy(visit)
1287 visit = self._loadchildrensetlazy(visit)
1288 for p in sorted(list(self._dirs) + list(self._files)):
1288 for p in sorted(list(self._dirs) + list(self._files)):
1289 if p in self._files:
1289 if p in self._files:
1290 fullp = self._subpath(p)
1290 fullp = self._subpath(p)
1291 if match(fullp):
1291 if match(fullp):
1292 yield fullp
1292 yield fullp
1293 else:
1293 else:
1294 if not visit or p[:-1] in visit:
1294 if not visit or p[:-1] in visit:
1295 for f in self._dirs[p]._walk(match):
1295 for f in self._dirs[p]._walk(match):
1296 yield f
1296 yield f
1297
1297
1298 def _matches(self, match: matchmod.basematcher) -> 'treemanifest':
1298 def _matches(self, match: matchmod.basematcher) -> 'treemanifest':
1299 """recursively generate a new manifest filtered by the match argument."""
1299 """recursively generate a new manifest filtered by the match argument."""
1300 if match.always():
1300 if match.always():
1301 return self.copy()
1301 return self.copy()
1302 return self._matches_inner(match)
1302 return self._matches_inner(match)
1303
1303
1304 def _matches_inner(self, match: matchmod.basematcher) -> 'treemanifest':
1304 def _matches_inner(self, match: matchmod.basematcher) -> 'treemanifest':
1305 if match.always():
1305 if match.always():
1306 return self.copy()
1306 return self.copy()
1307
1307
1308 visit = match.visitchildrenset(self._dir[:-1])
1308 visit = match.visitchildrenset(self._dir[:-1])
1309 if visit == b'all':
1309 if visit == b'all':
1310 return self.copy()
1310 return self.copy()
1311 ret = treemanifest(self.nodeconstants, self._dir)
1311 ret = treemanifest(self.nodeconstants, self._dir)
1312 if not visit:
1312 if not visit:
1313 return ret
1313 return ret
1314
1314
1315 self._load()
1315 self._load()
1316 for fn in self._files:
1316 for fn in self._files:
1317 # While visitchildrenset *usually* lists only subdirs, this is
1317 # While visitchildrenset *usually* lists only subdirs, this is
1318 # actually up to the matcher and may have some files in the set().
1318 # actually up to the matcher and may have some files in the set().
1319 # If visit == 'this', we should obviously look at the files in this
1319 # If visit == 'this', we should obviously look at the files in this
1320 # directory; if visit is a set, and fn is in it, we should inspect
1320 # directory; if visit is a set, and fn is in it, we should inspect
1321 # fn (but no need to inspect things not in the set).
1321 # fn (but no need to inspect things not in the set).
1322 if visit != b'this' and fn not in visit:
1322 if visit != b'this' and fn not in visit:
1323 continue
1323 continue
1324 fullp = self._subpath(fn)
1324 fullp = self._subpath(fn)
1325 # visitchildrenset isn't perfect, we still need to call the regular
1325 # visitchildrenset isn't perfect, we still need to call the regular
1326 # matcher code to further filter results.
1326 # matcher code to further filter results.
1327 if not match(fullp):
1327 if not match(fullp):
1328 continue
1328 continue
1329 ret._files[fn] = self._files[fn]
1329 ret._files[fn] = self._files[fn]
1330 if fn in self._flags:
1330 if fn in self._flags:
1331 ret._flags[fn] = self._flags[fn]
1331 ret._flags[fn] = self._flags[fn]
1332
1332
1333 visit = self._loadchildrensetlazy(visit)
1333 visit = self._loadchildrensetlazy(visit)
1334 for dir, subm in self._dirs.items():
1334 for dir, subm in self._dirs.items():
1335 if visit and dir[:-1] not in visit:
1335 if visit and dir[:-1] not in visit:
1336 continue
1336 continue
1337 m = subm._matches_inner(match)
1337 m = subm._matches_inner(match)
1338 if not m._isempty():
1338 if not m._isempty():
1339 ret._dirs[dir] = m
1339 ret._dirs[dir] = m
1340
1340
1341 if not ret._isempty():
1341 if not ret._isempty():
1342 ret._dirty = True
1342 ret._dirty = True
1343 return ret
1343 return ret
1344
1344
1345 def fastdelta(
1345 def fastdelta(
1346 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
1346 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
1347 ) -> ByteString:
1347 ) -> ByteString:
1348 raise FastdeltaUnavailable()
1348 raise FastdeltaUnavailable()
1349
1349
1350 def diff(
1350 def diff(
1351 self,
1351 self,
1352 m2: 'treemanifest',
1352 m2: 'treemanifest',
1353 match: Optional[matchmod.basematcher] = None,
1353 match: Optional[matchmod.basematcher] = None,
1354 clean: bool = False,
1354 clean: bool = False,
1355 ) -> Dict[
1355 ) -> Dict[
1356 bytes,
1356 bytes,
1357 Optional[
1357 Optional[
1358 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
1358 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
1359 ],
1359 ],
1360 ]:
1360 ]:
1361 """Finds changes between the current manifest and m2.
1361 """Finds changes between the current manifest and m2.
1362
1362
1363 Args:
1363 Args:
1364 m2: the manifest to which this manifest should be compared.
1364 m2: the manifest to which this manifest should be compared.
1365 clean: if true, include files unchanged between these manifests
1365 clean: if true, include files unchanged between these manifests
1366 with a None value in the returned dictionary.
1366 with a None value in the returned dictionary.
1367
1367
1368 The result is returned as a dict with filename as key and
1368 The result is returned as a dict with filename as key and
1369 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1369 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1370 nodeid in the current/other manifest and fl1/fl2 is the flag
1370 nodeid in the current/other manifest and fl1/fl2 is the flag
1371 in the current/other manifest. Where the file does not exist,
1371 in the current/other manifest. Where the file does not exist,
1372 the nodeid will be None and the flags will be the empty
1372 the nodeid will be None and the flags will be the empty
1373 string.
1373 string.
1374 """
1374 """
1375 if match and not match.always():
1375 if match and not match.always():
1376 m1 = self._matches(match)
1376 m1 = self._matches(match)
1377 m2 = m2._matches(match)
1377 m2 = m2._matches(match)
1378 return m1.diff(m2, clean=clean)
1378 return m1.diff(m2, clean=clean)
1379 result = {}
1379 result = {}
1380 emptytree = treemanifest(self.nodeconstants)
1380 emptytree = treemanifest(self.nodeconstants)
1381
1381
1382 def _iterativediff(t1, t2, stack):
1382 def _iterativediff(t1, t2, stack):
1383 """compares two tree manifests and append new tree-manifests which
1383 """compares two tree manifests and append new tree-manifests which
1384 needs to be compared to stack"""
1384 needs to be compared to stack"""
1385 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1385 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1386 return
1386 return
1387 t1._load()
1387 t1._load()
1388 t2._load()
1388 t2._load()
1389 self._loaddifflazy(t1, t2)
1389 self._loaddifflazy(t1, t2)
1390
1390
1391 for d, m1 in t1._dirs.items():
1391 for d, m1 in t1._dirs.items():
1392 m2 = t2._dirs.get(d, emptytree)
1392 m2 = t2._dirs.get(d, emptytree)
1393 stack.append((m1, m2))
1393 stack.append((m1, m2))
1394
1394
1395 for d, m2 in t2._dirs.items():
1395 for d, m2 in t2._dirs.items():
1396 if d not in t1._dirs:
1396 if d not in t1._dirs:
1397 stack.append((emptytree, m2))
1397 stack.append((emptytree, m2))
1398
1398
1399 for fn, n1 in t1._files.items():
1399 for fn, n1 in t1._files.items():
1400 fl1 = t1._flags.get(fn, b'')
1400 fl1 = t1._flags.get(fn, b'')
1401 n2 = t2._files.get(fn, None)
1401 n2 = t2._files.get(fn, None)
1402 fl2 = t2._flags.get(fn, b'')
1402 fl2 = t2._flags.get(fn, b'')
1403 if n1 != n2 or fl1 != fl2:
1403 if n1 != n2 or fl1 != fl2:
1404 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1404 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1405 elif clean:
1405 elif clean:
1406 result[t1._subpath(fn)] = None
1406 result[t1._subpath(fn)] = None
1407
1407
1408 for fn, n2 in t2._files.items():
1408 for fn, n2 in t2._files.items():
1409 if fn not in t1._files:
1409 if fn not in t1._files:
1410 fl2 = t2._flags.get(fn, b'')
1410 fl2 = t2._flags.get(fn, b'')
1411 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1411 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1412
1412
1413 stackls = []
1413 stackls = []
1414 _iterativediff(self, m2, stackls)
1414 _iterativediff(self, m2, stackls)
1415 while stackls:
1415 while stackls:
1416 t1, t2 = stackls.pop()
1416 t1, t2 = stackls.pop()
1417 # stackls is populated in the function call
1417 # stackls is populated in the function call
1418 _iterativediff(t1, t2, stackls)
1418 _iterativediff(t1, t2, stackls)
1419 return result
1419 return result
1420
1420
1421 def unmodifiedsince(self, m2: 'treemanifest') -> bool:
1421 def unmodifiedsince(self, m2: 'treemanifest') -> bool:
1422 return not self._dirty and not m2._dirty and self._node == m2._node
1422 return not self._dirty and not m2._dirty and self._node == m2._node
1423
1423
1424 def parse(
1424 def parse(
1425 self,
1425 self,
1426 text: bytes,
1426 text: bytes,
1427 readsubtree: Callable[[bytes, bytes], 'treemanifest'],
1427 readsubtree: Callable[[bytes, bytes], 'treemanifest'],
1428 ) -> None:
1428 ) -> None:
1429 selflazy = self._lazydirs
1429 selflazy = self._lazydirs
1430 for f, n, fl in _parse(self._nodelen, text):
1430 for f, n, fl in _parse(self._nodelen, text):
1431 if fl == b't':
1431 if fl == b't':
1432 f = f + b'/'
1432 f = f + b'/'
1433 # False below means "doesn't need to be copied" and can use the
1433 # False below means "doesn't need to be copied" and can use the
1434 # cached value from readsubtree directly.
1434 # cached value from readsubtree directly.
1435 selflazy[f] = (n, readsubtree, False)
1435 selflazy[f] = (n, readsubtree, False)
1436 elif b'/' in f:
1436 elif b'/' in f:
1437 # This is a flat manifest, so use __setitem__ and setflag rather
1437 # This is a flat manifest, so use __setitem__ and setflag rather
1438 # than assigning directly to _files and _flags, so we can
1438 # than assigning directly to _files and _flags, so we can
1439 # assign a path in a subdirectory, and to mark dirty (compared
1439 # assign a path in a subdirectory, and to mark dirty (compared
1440 # to nullid).
1440 # to nullid).
1441 self[f] = n
1441 self[f] = n
1442 if fl:
1442 if fl:
1443 self.setflag(f, fl)
1443 self.setflag(f, fl)
1444 else:
1444 else:
1445 # Assigning to _files and _flags avoids marking as dirty,
1445 # Assigning to _files and _flags avoids marking as dirty,
1446 # and should be a little faster.
1446 # and should be a little faster.
1447 self._files[f] = n
1447 self._files[f] = n
1448 if fl:
1448 if fl:
1449 self._flags[f] = fl
1449 self._flags[f] = fl
1450
1450
1451 def text(self) -> ByteString:
1451 def text(self) -> ByteString:
1452 """Get the full data of this manifest as a bytestring."""
1452 """Get the full data of this manifest as a bytestring."""
1453 self._load()
1453 self._load()
1454 return _text(self.iterentries())
1454 return _text(self.iterentries())
1455
1455
1456 def dirtext(self) -> ByteString:
1456 def dirtext(self) -> ByteString:
1457 """Get the full data of this directory as a bytestring. Make sure that
1457 """Get the full data of this directory as a bytestring. Make sure that
1458 any submanifests have been written first, so their nodeids are correct.
1458 any submanifests have been written first, so their nodeids are correct.
1459 """
1459 """
1460 self._load()
1460 self._load()
1461 flags = self.flags
1461 flags = self.flags
1462 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1462 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1463 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1463 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1464 files = [(f, self._files[f], flags(f)) for f in self._files]
1464 files = [(f, self._files[f], flags(f)) for f in self._files]
1465 return _text(sorted(dirs + files + lazydirs))
1465 return _text(sorted(dirs + files + lazydirs))
1466
1466
1467 def read(
1467 def read(
1468 self,
1468 self,
1469 gettext: Callable[[], ByteString],
1469 gettext: Callable[[], ByteString],
1470 readsubtree: Callable[[bytes, bytes], 'treemanifest'],
1470 readsubtree: Callable[[bytes, bytes], 'treemanifest'],
1471 ) -> None:
1471 ) -> None:
1472 def _load_for_read(s):
1472 def _load_for_read(s):
1473 s.parse(gettext(), readsubtree)
1473 s.parse(gettext(), readsubtree)
1474 s._dirty = False
1474 s._dirty = False
1475
1475
1476 self._loadfunc = _load_for_read
1476 self._loadfunc = _load_for_read
1477
1477
1478 def writesubtrees(
1478 def writesubtrees(
1479 self,
1479 self,
1480 m1: 'treemanifest',
1480 m1: 'treemanifest',
1481 m2: 'treemanifest',
1481 m2: 'treemanifest',
1482 writesubtree: Callable[
1482 writesubtree: Callable[
1483 [
1483 [
1484 Callable[['treemanifest'], None],
1484 Callable[['treemanifest'], None],
1485 bytes,
1485 bytes,
1486 bytes,
1486 bytes,
1487 matchmod.basematcher,
1487 matchmod.basematcher,
1488 ],
1488 ],
1489 None,
1489 None,
1490 ],
1490 ],
1491 match: matchmod.basematcher,
1491 match: matchmod.basematcher,
1492 ) -> None:
1492 ) -> None:
1493 self._load() # for consistency; should never have any effect here
1493 self._load() # for consistency; should never have any effect here
1494 m1._load()
1494 m1._load()
1495 m2._load()
1495 m2._load()
1496 emptytree = treemanifest(self.nodeconstants)
1496 emptytree = treemanifest(self.nodeconstants)
1497
1497
1498 def getnode(m, d):
1498 def getnode(m, d):
1499 ld = m._lazydirs.get(d)
1499 ld = m._lazydirs.get(d)
1500 if ld:
1500 if ld:
1501 return ld[0]
1501 return ld[0]
1502 tree = m._dirs.get(d, emptytree)
1502 tree = m._dirs.get(d, emptytree)
1503 assert tree is not None # helps pytype
1503 assert tree is not None # helps pytype
1504 return tree._node
1504 return tree._node
1505
1505
1506 # let's skip investigating things that `match` says we do not need.
1506 # let's skip investigating things that `match` says we do not need.
1507 visit = match.visitchildrenset(self._dir[:-1])
1507 visit = match.visitchildrenset(self._dir[:-1])
1508 visit = self._loadchildrensetlazy(visit)
1508 visit = self._loadchildrensetlazy(visit)
1509 if visit == b'this' or visit == b'all':
1509 if visit == b'this' or visit == b'all':
1510 visit = None
1510 visit = None
1511 for d, subm in self._dirs.items():
1511 for d, subm in self._dirs.items():
1512 if visit and d[:-1] not in visit:
1512 if visit and d[:-1] not in visit:
1513 continue
1513 continue
1514 subp1 = getnode(m1, d)
1514 subp1 = getnode(m1, d)
1515 subp2 = getnode(m2, d)
1515 subp2 = getnode(m2, d)
1516 if subp1 == self.nodeconstants.nullid:
1516 if subp1 == self.nodeconstants.nullid:
1517 subp1, subp2 = subp2, subp1
1517 subp1, subp2 = subp2, subp1
1518 writesubtree(subm, subp1, subp2, match)
1518 writesubtree(subm, subp1, subp2, match)
1519
1519
1520 def walksubtrees(
1520 def walksubtrees(
1521 self, matcher: Optional[matchmod.basematcher] = None
1521 self, matcher: Optional[matchmod.basematcher] = None
1522 ) -> Iterator['treemanifest']:
1522 ) -> Iterator['treemanifest']:
1523 """Returns an iterator of the subtrees of this manifest, including this
1523 """Returns an iterator of the subtrees of this manifest, including this
1524 manifest itself.
1524 manifest itself.
1525
1525
1526 If `matcher` is provided, it only returns subtrees that match.
1526 If `matcher` is provided, it only returns subtrees that match.
1527 """
1527 """
1528 if matcher and not matcher.visitdir(self._dir[:-1]):
1528 if matcher and not matcher.visitdir(self._dir[:-1]):
1529 return
1529 return
1530 if not matcher or matcher(self._dir[:-1]):
1530 if not matcher or matcher(self._dir[:-1]):
1531 yield self
1531 yield self
1532
1532
1533 self._load()
1533 self._load()
1534 # OPT: use visitchildrenset to avoid loading everything.
1534 # OPT: use visitchildrenset to avoid loading everything.
1535 self._loadalllazy()
1535 self._loadalllazy()
1536 for d, subm in self._dirs.items():
1536 for d, subm in self._dirs.items():
1537 for subtree in subm.walksubtrees(matcher=matcher):
1537 for subtree in subm.walksubtrees(matcher=matcher):
1538 yield subtree
1538 yield subtree
1539
1539
1540
1540
1541 class manifestfulltextcache(util.lrucachedict):
1541 class manifestfulltextcache(util.lrucachedict):
1542 """File-backed LRU cache for the manifest cache
1542 """File-backed LRU cache for the manifest cache
1543
1543
1544 File consists of entries, up to EOF:
1544 File consists of entries, up to EOF:
1545
1545
1546 - 20 bytes node, 4 bytes length, <length> manifest data
1546 - 20 bytes node, 4 bytes length, <length> manifest data
1547
1547
1548 These are written in reverse cache order (oldest to newest).
1548 These are written in reverse cache order (oldest to newest).
1549
1549
1550 """
1550 """
1551
1551
1552 _file = b'manifestfulltextcache'
1552 _file = b'manifestfulltextcache'
1553
1553
1554 def __init__(self, max):
1554 def __init__(self, max):
1555 super(manifestfulltextcache, self).__init__(max)
1555 super(manifestfulltextcache, self).__init__(max)
1556 self._dirty = False
1556 self._dirty = False
1557 self._read = False
1557 self._read = False
1558 self._opener = None
1558 self._opener = None
1559
1559
1560 def read(self):
1560 def read(self):
1561 if self._read or self._opener is None:
1561 if self._read or self._opener is None:
1562 return
1562 return
1563
1563
1564 try:
1564 try:
1565 with self._opener(self._file) as fp:
1565 with self._opener(self._file) as fp:
1566 set = super(manifestfulltextcache, self).__setitem__
1566 set = super(manifestfulltextcache, self).__setitem__
1567 # ignore trailing data, this is a cache, corruption is skipped
1567 # ignore trailing data, this is a cache, corruption is skipped
1568 while True:
1568 while True:
1569 # TODO do we need to do work here for sha1 portability?
1569 # TODO do we need to do work here for sha1 portability?
1570 node = fp.read(20)
1570 node = fp.read(20)
1571 if len(node) < 20:
1571 if len(node) < 20:
1572 break
1572 break
1573 try:
1573 try:
1574 size = struct.unpack(b'>L', fp.read(4))[0]
1574 size = struct.unpack(b'>L', fp.read(4))[0]
1575 except struct.error:
1575 except struct.error:
1576 break
1576 break
1577 value = bytearray(fp.read(size))
1577 value = bytearray(fp.read(size))
1578 if len(value) != size:
1578 if len(value) != size:
1579 break
1579 break
1580 set(node, value)
1580 set(node, value)
1581 except IOError:
1581 except IOError:
1582 # the file is allowed to be missing
1582 # the file is allowed to be missing
1583 pass
1583 pass
1584
1584
1585 self._read = True
1585 self._read = True
1586 self._dirty = False
1586 self._dirty = False
1587
1587
1588 def write(self):
1588 def write(self):
1589 if not self._dirty or self._opener is None:
1589 if not self._dirty or self._opener is None:
1590 return
1590 return
1591 # rotate backwards to the first used node
1591 # rotate backwards to the first used node
1592 try:
1592 try:
1593 with self._opener(
1593 with self._opener(
1594 self._file, b'w', atomictemp=True, checkambig=True
1594 self._file, b'w', atomictemp=True, checkambig=True
1595 ) as fp:
1595 ) as fp:
1596 node = self._head.prev
1596 node = self._head.prev
1597 while True:
1597 while True:
1598 if node.key in self._cache:
1598 if node.key in self._cache:
1599 fp.write(node.key)
1599 fp.write(node.key)
1600 fp.write(struct.pack(b'>L', len(node.value)))
1600 fp.write(struct.pack(b'>L', len(node.value)))
1601 fp.write(node.value)
1601 fp.write(node.value)
1602 if node is self._head:
1602 if node is self._head:
1603 break
1603 break
1604 node = node.prev
1604 node = node.prev
1605 except IOError:
1605 except IOError:
1606 # We could not write the cache (eg: permission error)
1606 # We could not write the cache (eg: permission error)
1607 # the content can be missing.
1607 # the content can be missing.
1608 #
1608 #
1609 # We could try harder and see if we could recreate a wcache
1609 # We could try harder and see if we could recreate a wcache
1610 # directory were we coudl write too.
1610 # directory were we coudl write too.
1611 #
1611 #
1612 # XXX the error pass silently, having some way to issue an error
1612 # XXX the error pass silently, having some way to issue an error
1613 # log `ui.log` would be nice.
1613 # log `ui.log` would be nice.
1614 pass
1614 pass
1615
1615
1616 def __len__(self):
1616 def __len__(self):
1617 if not self._read:
1617 if not self._read:
1618 self.read()
1618 self.read()
1619 return super(manifestfulltextcache, self).__len__()
1619 return super(manifestfulltextcache, self).__len__()
1620
1620
1621 def __contains__(self, k):
1621 def __contains__(self, k):
1622 if not self._read:
1622 if not self._read:
1623 self.read()
1623 self.read()
1624 return super(manifestfulltextcache, self).__contains__(k)
1624 return super(manifestfulltextcache, self).__contains__(k)
1625
1625
1626 def __iter__(self):
1626 def __iter__(self):
1627 if not self._read:
1627 if not self._read:
1628 self.read()
1628 self.read()
1629 return super(manifestfulltextcache, self).__iter__()
1629 return super(manifestfulltextcache, self).__iter__()
1630
1630
1631 def __getitem__(self, k):
1631 def __getitem__(self, k):
1632 if not self._read:
1632 if not self._read:
1633 self.read()
1633 self.read()
1634 # the cache lru order can change on read
1634 # the cache lru order can change on read
1635 setdirty = self._cache.get(k) is not self._head
1635 setdirty = self._cache.get(k) is not self._head
1636 value = super(manifestfulltextcache, self).__getitem__(k)
1636 value = super(manifestfulltextcache, self).__getitem__(k)
1637 if setdirty:
1637 if setdirty:
1638 self._dirty = True
1638 self._dirty = True
1639 return value
1639 return value
1640
1640
1641 def __setitem__(self, k, v):
1641 def __setitem__(self, k, v):
1642 if not self._read:
1642 if not self._read:
1643 self.read()
1643 self.read()
1644 super(manifestfulltextcache, self).__setitem__(k, v)
1644 super(manifestfulltextcache, self).__setitem__(k, v)
1645 self._dirty = True
1645 self._dirty = True
1646
1646
1647 def __delitem__(self, k):
1647 def __delitem__(self, k):
1648 if not self._read:
1648 if not self._read:
1649 self.read()
1649 self.read()
1650 super(manifestfulltextcache, self).__delitem__(k)
1650 super(manifestfulltextcache, self).__delitem__(k)
1651 self._dirty = True
1651 self._dirty = True
1652
1652
1653 def get(self, k, default=None):
1653 def get(self, k, default=None):
1654 if not self._read:
1654 if not self._read:
1655 self.read()
1655 self.read()
1656 return super(manifestfulltextcache, self).get(k, default=default)
1656 return super(manifestfulltextcache, self).get(k, default=default)
1657
1657
1658 def clear(self, clear_persisted_data=False):
1658 def clear(self, clear_persisted_data=False):
1659 super(manifestfulltextcache, self).clear()
1659 super(manifestfulltextcache, self).clear()
1660 if clear_persisted_data:
1660 if clear_persisted_data:
1661 self._dirty = True
1661 self._dirty = True
1662 self.write()
1662 self.write()
1663 self._read = False
1663 self._read = False
1664
1664
1665
1665
1666 # and upper bound of what we expect from compression
1666 # and upper bound of what we expect from compression
1667 # (real live value seems to be "3")
1667 # (real live value seems to be "3")
1668 MAXCOMPRESSION = 3
1668 MAXCOMPRESSION = 3
1669
1669
1670
1670
1671 class FastdeltaUnavailable(Exception):
1671 class FastdeltaUnavailable(Exception):
1672 """Exception raised when fastdelta isn't usable on a manifest."""
1672 """Exception raised when fastdelta isn't usable on a manifest."""
1673
1673
1674
1674
1675 class manifestrevlog: # (repository.imanifeststorage)
1675 class manifestrevlog: # (repository.imanifeststorage)
1676 """A revlog that stores manifest texts. This is responsible for caching the
1676 """A revlog that stores manifest texts. This is responsible for caching the
1677 full-text manifest contents.
1677 full-text manifest contents.
1678 """
1678 """
1679
1679
1680 def __init__(
1680 def __init__(
1681 self,
1681 self,
1682 nodeconstants,
1682 nodeconstants,
1683 opener,
1683 opener,
1684 tree=b'',
1684 tree=b'',
1685 dirlogcache=None,
1685 dirlogcache=None,
1686 treemanifest=False,
1686 treemanifest=False,
1687 ):
1687 ):
1688 """Constructs a new manifest revlog
1688 """Constructs a new manifest revlog
1689
1689
1690 `indexfile` - used by extensions to have two manifests at once, like
1690 `indexfile` - used by extensions to have two manifests at once, like
1691 when transitioning between flatmanifeset and treemanifests.
1691 when transitioning between flatmanifeset and treemanifests.
1692
1692
1693 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1693 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1694 options can also be used to make this a tree manifest revlog. The opener
1694 options can also be used to make this a tree manifest revlog. The opener
1695 option takes precedence, so if it is set to True, we ignore whatever
1695 option takes precedence, so if it is set to True, we ignore whatever
1696 value is passed in to the constructor.
1696 value is passed in to the constructor.
1697 """
1697 """
1698 self.nodeconstants = nodeconstants
1698 self.nodeconstants = nodeconstants
1699 # During normal operations, we expect to deal with not more than four
1699 # During normal operations, we expect to deal with not more than four
1700 # revs at a time (such as during commit --amend). When rebasing large
1700 # revs at a time (such as during commit --amend). When rebasing large
1701 # stacks of commits, the number can go up, hence the config knob below.
1701 # stacks of commits, the number can go up, hence the config knob below.
1702 cachesize = 4
1702 cachesize = 4
1703 optiontreemanifest = False
1703 optiontreemanifest = False
1704 persistentnodemap = False
1704 persistentnodemap = False
1705 opts = getattr(opener, 'options', None)
1705 opts = getattr(opener, 'options', None)
1706 if opts is not None:
1706 if opts is not None:
1707 cachesize = opts.get(b'manifestcachesize', cachesize)
1707 cachesize = opts.get(b'manifestcachesize', cachesize)
1708 optiontreemanifest = opts.get(b'treemanifest', False)
1708 optiontreemanifest = opts.get(b'treemanifest', False)
1709 persistentnodemap = opts.get(b'persistent-nodemap', False)
1709 persistentnodemap = opts.get(b'persistent-nodemap', False)
1710
1710
1711 self._treeondisk = optiontreemanifest or treemanifest
1711 self._treeondisk = optiontreemanifest or treemanifest
1712
1712
1713 self._fulltextcache = manifestfulltextcache(cachesize)
1713 self._fulltextcache = manifestfulltextcache(cachesize)
1714
1714
1715 if tree:
1715 if tree:
1716 assert self._treeondisk, (tree, b'opts is %r' % opts)
1716 assert self._treeondisk, (tree, b'opts is %r' % opts)
1717
1717
1718 radix = b'00manifest'
1718 radix = b'00manifest'
1719 if tree:
1719 if tree:
1720 radix = b"meta/" + tree + radix
1720 radix = b"meta/" + tree + radix
1721
1721
1722 self.tree = tree
1722 self.tree = tree
1723
1723
1724 # The dirlogcache is kept on the root manifest log
1724 # The dirlogcache is kept on the root manifest log
1725 if tree:
1725 if tree:
1726 self._dirlogcache = dirlogcache
1726 self._dirlogcache = dirlogcache
1727 else:
1727 else:
1728 self._dirlogcache = {b'': self}
1728 self._dirlogcache = {b'': self}
1729
1729
1730 self._revlog = revlog.revlog(
1730 self._revlog = revlog.revlog(
1731 opener,
1731 opener,
1732 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1732 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1733 radix=radix,
1733 radix=radix,
1734 # only root indexfile is cached
1734 # only root indexfile is cached
1735 checkambig=not bool(tree),
1735 checkambig=not bool(tree),
1736 mmaplargeindex=True,
1736 mmaplargeindex=True,
1737 upperboundcomp=MAXCOMPRESSION,
1737 upperboundcomp=MAXCOMPRESSION,
1738 persistentnodemap=persistentnodemap,
1738 persistentnodemap=persistentnodemap,
1739 )
1739 )
1740
1740
1741 self.index = self._revlog.index
1741 self.index = self._revlog.index
1742
1742
1743 def get_revlog(self):
1743 def get_revlog(self):
1744 """return an actual revlog instance if any
1744 """return an actual revlog instance if any
1745
1745
1746 This exist because a lot of code leverage the fact the underlying
1746 This exist because a lot of code leverage the fact the underlying
1747 storage is a revlog for optimization, so giving simple way to access
1747 storage is a revlog for optimization, so giving simple way to access
1748 the revlog instance helps such code.
1748 the revlog instance helps such code.
1749 """
1749 """
1750 return self._revlog
1750 return self._revlog
1751
1751
1752 def _setupmanifestcachehooks(self, repo):
1752 def _setupmanifestcachehooks(self, repo):
1753 """Persist the manifestfulltextcache on lock release"""
1753 """Persist the manifestfulltextcache on lock release"""
1754 if not hasattr(repo, '_wlockref'):
1754 if not hasattr(repo, '_wlockref'):
1755 return
1755 return
1756
1756
1757 self._fulltextcache._opener = repo.wcachevfs
1757 self._fulltextcache._opener = repo.wcachevfs
1758 if repo._currentlock(repo._wlockref) is None:
1758 if repo._currentlock(repo._wlockref) is None:
1759 return
1759 return
1760
1760
1761 reporef = weakref.ref(repo)
1761 reporef = weakref.ref(repo)
1762 manifestrevlogref = weakref.ref(self)
1762 manifestrevlogref = weakref.ref(self)
1763
1763
1764 def persistmanifestcache(success):
1764 def persistmanifestcache(success):
1765 # Repo is in an unknown state, do not persist.
1765 # Repo is in an unknown state, do not persist.
1766 if not success:
1766 if not success:
1767 return
1767 return
1768
1768
1769 repo = reporef()
1769 repo = reporef()
1770 self = manifestrevlogref()
1770 self = manifestrevlogref()
1771 if repo is None or self is None:
1771 if repo is None or self is None:
1772 return
1772 return
1773 if repo.manifestlog.getstorage(b'') is not self:
1773 if repo.manifestlog.getstorage(b'') is not self:
1774 # there's a different manifest in play now, abort
1774 # there's a different manifest in play now, abort
1775 return
1775 return
1776 self._fulltextcache.write()
1776 self._fulltextcache.write()
1777
1777
1778 repo._afterlock(persistmanifestcache)
1778 repo._afterlock(persistmanifestcache)
1779
1779
1780 @property
1780 @property
1781 def fulltextcache(self):
1781 def fulltextcache(self):
1782 return self._fulltextcache
1782 return self._fulltextcache
1783
1783
1784 def clearcaches(self, clear_persisted_data: bool = False) -> None:
1784 def clearcaches(self, clear_persisted_data: bool = False) -> None:
1785 self._revlog.clearcaches()
1785 self._revlog.clearcaches()
1786 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1786 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1787 self._dirlogcache = {self.tree: self}
1787 self._dirlogcache = {self.tree: self}
1788
1788
1789 def dirlog(self, d):
1789 def dirlog(self, d):
1790 if d:
1790 if d:
1791 assert self._treeondisk
1791 assert self._treeondisk
1792 if d not in self._dirlogcache:
1792 if d not in self._dirlogcache:
1793 mfrevlog = manifestrevlog(
1793 mfrevlog = manifestrevlog(
1794 self.nodeconstants,
1794 self.nodeconstants,
1795 self.opener,
1795 self.opener,
1796 d,
1796 d,
1797 self._dirlogcache,
1797 self._dirlogcache,
1798 treemanifest=self._treeondisk,
1798 treemanifest=self._treeondisk,
1799 )
1799 )
1800 self._dirlogcache[d] = mfrevlog
1800 self._dirlogcache[d] = mfrevlog
1801 return self._dirlogcache[d]
1801 return self._dirlogcache[d]
1802
1802
1803 def add(
1803 def add(
1804 self,
1804 self,
1805 m,
1805 m,
1806 transaction,
1806 transaction,
1807 link,
1807 link,
1808 p1,
1808 p1,
1809 p2,
1809 p2,
1810 added: Iterable[bytes],
1810 added: Iterable[bytes],
1811 removed: Iterable[bytes],
1811 removed: Iterable[bytes],
1812 readtree=None,
1812 readtree=None,
1813 match=None,
1813 match=None,
1814 ):
1814 ):
1815 """add some manifest entry in to the manifest log
1815 """add some manifest entry in to the manifest log
1816
1816
1817 input:
1817 input:
1818
1818
1819 m: the manifest dict we want to store
1819 m: the manifest dict we want to store
1820 transaction: the open transaction
1820 transaction: the open transaction
1821 p1: manifest-node of p1
1821 p1: manifest-node of p1
1822 p2: manifest-node of p2
1822 p2: manifest-node of p2
1823 added: file added/changed compared to parent
1823 added: file added/changed compared to parent
1824 removed: file removed compared to parent
1824 removed: file removed compared to parent
1825
1825
1826 tree manifest input:
1826 tree manifest input:
1827
1827
1828 readtree: a function to read a subtree
1828 readtree: a function to read a subtree
1829 match: a filematcher for the subpart of the tree manifest
1829 match: a filematcher for the subpart of the tree manifest
1830 """
1830 """
1831 try:
1831 try:
1832 if p1 not in self.fulltextcache:
1832 if p1 not in self.fulltextcache:
1833 raise FastdeltaUnavailable()
1833 raise FastdeltaUnavailable()
1834 # If our first parent is in the manifest cache, we can
1834 # If our first parent is in the manifest cache, we can
1835 # compute a delta here using properties we know about the
1835 # compute a delta here using properties we know about the
1836 # manifest up-front, which may save time later for the
1836 # manifest up-front, which may save time later for the
1837 # revlog layer.
1837 # revlog layer.
1838
1838
1839 _checkforbidden(added)
1839 _checkforbidden(added)
1840 # combine the changed lists into one sorted iterator
1840 # combine the changed lists into one sorted iterator
1841 work = heapq.merge(
1841 work = heapq.merge(
1842 [(x, False) for x in sorted(added)],
1842 [(x, False) for x in sorted(added)],
1843 [(x, True) for x in sorted(removed)],
1843 [(x, True) for x in sorted(removed)],
1844 )
1844 )
1845
1845
1846 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1846 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1847 cachedelta = self._revlog.rev(p1), deltatext
1847 cachedelta = self._revlog.rev(p1), deltatext
1848 text = util.buffer(arraytext)
1848 text = util.buffer(arraytext)
1849 rev = self._revlog.addrevision(
1849 rev = self._revlog.addrevision(
1850 text, transaction, link, p1, p2, cachedelta
1850 text, transaction, link, p1, p2, cachedelta
1851 )
1851 )
1852 n = self._revlog.node(rev)
1852 n = self._revlog.node(rev)
1853 except FastdeltaUnavailable:
1853 except FastdeltaUnavailable:
1854 # The first parent manifest isn't already loaded or the
1854 # The first parent manifest isn't already loaded or the
1855 # manifest implementation doesn't support fastdelta, so
1855 # manifest implementation doesn't support fastdelta, so
1856 # we'll just encode a fulltext of the manifest and pass
1856 # we'll just encode a fulltext of the manifest and pass
1857 # that through to the revlog layer, and let it handle the
1857 # that through to the revlog layer, and let it handle the
1858 # delta process.
1858 # delta process.
1859 if self._treeondisk:
1859 if self._treeondisk:
1860 assert readtree, b"readtree must be set for treemanifest writes"
1860 assert readtree, b"readtree must be set for treemanifest writes"
1861 assert match, b"match must be specified for treemanifest writes"
1861 assert match, b"match must be specified for treemanifest writes"
1862 m1 = readtree(self.tree, p1)
1862 m1 = readtree(self.tree, p1)
1863 m2 = readtree(self.tree, p2)
1863 m2 = readtree(self.tree, p2)
1864 n = self._addtree(
1864 n = self._addtree(
1865 m, transaction, link, m1, m2, readtree, match=match
1865 m, transaction, link, m1, m2, readtree, match=match
1866 )
1866 )
1867 arraytext = None
1867 arraytext = None
1868 else:
1868 else:
1869 text = m.text()
1869 text = m.text()
1870 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1870 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1871 n = self._revlog.node(rev)
1871 n = self._revlog.node(rev)
1872 arraytext = bytearray(text)
1872 arraytext = bytearray(text)
1873
1873
1874 if arraytext is not None:
1874 if arraytext is not None:
1875 self.fulltextcache[n] = arraytext
1875 self.fulltextcache[n] = arraytext
1876
1876
1877 return n
1877 return n
1878
1878
1879 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1879 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1880 # If the manifest is unchanged compared to one parent,
1880 # If the manifest is unchanged compared to one parent,
1881 # don't write a new revision
1881 # don't write a new revision
1882 if self.tree != b'' and (
1882 if self.tree != b'' and (
1883 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1883 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1884 ):
1884 ):
1885 return m.node()
1885 return m.node()
1886
1886
1887 def writesubtree(subm, subp1, subp2, match):
1887 def writesubtree(subm, subp1, subp2, match):
1888 sublog = self.dirlog(subm.dir())
1888 sublog = self.dirlog(subm.dir())
1889 sublog.add(
1889 sublog.add(
1890 subm,
1890 subm,
1891 transaction,
1891 transaction,
1892 link,
1892 link,
1893 subp1,
1893 subp1,
1894 subp2,
1894 subp2,
1895 None,
1895 None,
1896 None,
1896 None,
1897 readtree=readtree,
1897 readtree=readtree,
1898 match=match,
1898 match=match,
1899 )
1899 )
1900
1900
1901 m.writesubtrees(m1, m2, writesubtree, match)
1901 m.writesubtrees(m1, m2, writesubtree, match)
1902 text = m.dirtext()
1902 text = m.dirtext()
1903 n = None
1903 n = None
1904 if self.tree != b'':
1904 if self.tree != b'':
1905 # Double-check whether contents are unchanged to one parent
1905 # Double-check whether contents are unchanged to one parent
1906 if text == m1.dirtext():
1906 if text == m1.dirtext():
1907 n = m1.node()
1907 n = m1.node()
1908 elif text == m2.dirtext():
1908 elif text == m2.dirtext():
1909 n = m2.node()
1909 n = m2.node()
1910
1910
1911 if not n:
1911 if not n:
1912 rev = self._revlog.addrevision(
1912 rev = self._revlog.addrevision(
1913 text, transaction, link, m1.node(), m2.node()
1913 text, transaction, link, m1.node(), m2.node()
1914 )
1914 )
1915 n = self._revlog.node(rev)
1915 n = self._revlog.node(rev)
1916
1916
1917 # Save nodeid so parent manifest can calculate its nodeid
1917 # Save nodeid so parent manifest can calculate its nodeid
1918 m.setnode(n)
1918 m.setnode(n)
1919 return n
1919 return n
1920
1920
1921 def __len__(self):
1921 def __len__(self):
1922 return len(self._revlog)
1922 return len(self._revlog)
1923
1923
1924 def __iter__(self):
1924 def __iter__(self):
1925 return self._revlog.__iter__()
1925 return self._revlog.__iter__()
1926
1926
1927 def rev(self, node):
1927 def rev(self, node):
1928 return self._revlog.rev(node)
1928 return self._revlog.rev(node)
1929
1929
1930 def node(self, rev):
1930 def node(self, rev):
1931 return self._revlog.node(rev)
1931 return self._revlog.node(rev)
1932
1932
1933 def lookup(self, value):
1933 def lookup(self, value):
1934 return self._revlog.lookup(value)
1934 return self._revlog.lookup(value)
1935
1935
1936 def parentrevs(self, rev):
1936 def parentrevs(self, rev):
1937 return self._revlog.parentrevs(rev)
1937 return self._revlog.parentrevs(rev)
1938
1938
1939 def parents(self, node):
1939 def parents(self, node):
1940 return self._revlog.parents(node)
1940 return self._revlog.parents(node)
1941
1941
1942 def linkrev(self, rev):
1942 def linkrev(self, rev):
1943 return self._revlog.linkrev(rev)
1943 return self._revlog.linkrev(rev)
1944
1944
1945 def checksize(self):
1945 def checksize(self):
1946 return self._revlog.checksize()
1946 return self._revlog.checksize()
1947
1947
1948 def revision(self, node):
1948 def revision(self, node):
1949 return self._revlog.revision(node)
1949 return self._revlog.revision(node)
1950
1950
1951 def rawdata(self, node):
1951 def rawdata(self, node):
1952 return self._revlog.rawdata(node)
1952 return self._revlog.rawdata(node)
1953
1953
1954 def revdiff(self, rev1, rev2):
1954 def revdiff(self, rev1, rev2):
1955 return self._revlog.revdiff(rev1, rev2)
1955 return self._revlog.revdiff(rev1, rev2)
1956
1956
1957 def cmp(self, node, text):
1957 def cmp(self, node, text):
1958 return self._revlog.cmp(node, text)
1958 return self._revlog.cmp(node, text)
1959
1959
1960 def deltaparent(self, rev):
1960 def deltaparent(self, rev):
1961 return self._revlog.deltaparent(rev)
1961 return self._revlog.deltaparent(rev)
1962
1962
1963 def emitrevisions(
1963 def emitrevisions(
1964 self,
1964 self,
1965 nodes,
1965 nodes,
1966 nodesorder=None,
1966 nodesorder=None,
1967 revisiondata=False,
1967 revisiondata=False,
1968 assumehaveparentrevisions=False,
1968 assumehaveparentrevisions=False,
1969 deltamode=repository.CG_DELTAMODE_STD,
1969 deltamode=repository.CG_DELTAMODE_STD,
1970 sidedata_helpers=None,
1970 sidedata_helpers=None,
1971 debug_info=None,
1971 debug_info=None,
1972 ):
1972 ):
1973 return self._revlog.emitrevisions(
1973 return self._revlog.emitrevisions(
1974 nodes,
1974 nodes,
1975 nodesorder=nodesorder,
1975 nodesorder=nodesorder,
1976 revisiondata=revisiondata,
1976 revisiondata=revisiondata,
1977 assumehaveparentrevisions=assumehaveparentrevisions,
1977 assumehaveparentrevisions=assumehaveparentrevisions,
1978 deltamode=deltamode,
1978 deltamode=deltamode,
1979 sidedata_helpers=sidedata_helpers,
1979 sidedata_helpers=sidedata_helpers,
1980 debug_info=debug_info,
1980 debug_info=debug_info,
1981 )
1981 )
1982
1982
1983 def addgroup(
1983 def addgroup(
1984 self,
1984 self,
1985 deltas,
1985 deltas,
1986 linkmapper,
1986 linkmapper,
1987 transaction,
1987 transaction,
1988 alwayscache=False,
1988 alwayscache=False,
1989 addrevisioncb=None,
1989 addrevisioncb=None,
1990 duplicaterevisioncb=None,
1990 duplicaterevisioncb=None,
1991 debug_info=None,
1991 debug_info=None,
1992 delta_base_reuse_policy=None,
1992 delta_base_reuse_policy=None,
1993 ):
1993 ):
1994 return self._revlog.addgroup(
1994 return self._revlog.addgroup(
1995 deltas,
1995 deltas,
1996 linkmapper,
1996 linkmapper,
1997 transaction,
1997 transaction,
1998 alwayscache=alwayscache,
1998 alwayscache=alwayscache,
1999 addrevisioncb=addrevisioncb,
1999 addrevisioncb=addrevisioncb,
2000 duplicaterevisioncb=duplicaterevisioncb,
2000 duplicaterevisioncb=duplicaterevisioncb,
2001 debug_info=debug_info,
2001 debug_info=debug_info,
2002 delta_base_reuse_policy=delta_base_reuse_policy,
2002 delta_base_reuse_policy=delta_base_reuse_policy,
2003 )
2003 )
2004
2004
2005 def rawsize(self, rev):
2005 def rawsize(self, rev):
2006 return self._revlog.rawsize(rev)
2006 return self._revlog.rawsize(rev)
2007
2007
2008 def getstrippoint(self, minlink):
2008 def getstrippoint(self, minlink):
2009 return self._revlog.getstrippoint(minlink)
2009 return self._revlog.getstrippoint(minlink)
2010
2010
2011 def strip(self, minlink, transaction):
2011 def strip(self, minlink, transaction):
2012 return self._revlog.strip(minlink, transaction)
2012 return self._revlog.strip(minlink, transaction)
2013
2013
2014 def files(self):
2014 def files(self):
2015 return self._revlog.files()
2015 return self._revlog.files()
2016
2016
2017 def clone(self, tr, destrevlog, **kwargs):
2017 def clone(self, tr, destrevlog, **kwargs):
2018 if not isinstance(destrevlog, manifestrevlog):
2018 if not isinstance(destrevlog, manifestrevlog):
2019 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
2019 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
2020
2020
2021 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
2021 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
2022
2022
2023 def storageinfo(
2023 def storageinfo(
2024 self,
2024 self,
2025 exclusivefiles=False,
2025 exclusivefiles=False,
2026 sharedfiles=False,
2026 sharedfiles=False,
2027 revisionscount=False,
2027 revisionscount=False,
2028 trackedsize=False,
2028 trackedsize=False,
2029 storedsize=False,
2029 storedsize=False,
2030 ):
2030 ):
2031 return self._revlog.storageinfo(
2031 return self._revlog.storageinfo(
2032 exclusivefiles=exclusivefiles,
2032 exclusivefiles=exclusivefiles,
2033 sharedfiles=sharedfiles,
2033 sharedfiles=sharedfiles,
2034 revisionscount=revisionscount,
2034 revisionscount=revisionscount,
2035 trackedsize=trackedsize,
2035 trackedsize=trackedsize,
2036 storedsize=storedsize,
2036 storedsize=storedsize,
2037 )
2037 )
2038
2038
2039 @property
2039 @property
2040 def opener(self):
2040 def opener(self):
2041 return self._revlog.opener
2041 return self._revlog.opener
2042
2042
2043 @opener.setter
2043 @opener.setter
2044 def opener(self, value):
2044 def opener(self, value):
2045 self._revlog.opener = value
2045 self._revlog.opener = value
2046
2046
2047
2047
2048 AnyManifestCtx = Union['ManifestCtx', 'TreeManifestCtx']
2048 AnyManifestCtx = Union['ManifestCtx', 'TreeManifestCtx']
2049 # TODO: drop this in favor of repository.imanifestdict
2049 # TODO: drop this in favor of repository.imanifestdict
2050 AnyManifestDict = Union[manifestdict, treemanifest]
2050 AnyManifestDict = Union[manifestdict, treemanifest]
2051
2051
2052
2052
2053 class ManifestLog:
2053 class manifestlog: # (repository.imanifestlog)
2054 """A collection class representing the collection of manifest snapshots
2054 """A collection class representing the collection of manifest snapshots
2055 referenced by commits in the repository.
2055 referenced by commits in the repository.
2056
2056
2057 In this situation, 'manifest' refers to the abstract concept of a snapshot
2057 In this situation, 'manifest' refers to the abstract concept of a snapshot
2058 of the list of files in the given commit. Consumers of the output of this
2058 of the list of files in the given commit. Consumers of the output of this
2059 class do not care about the implementation details of the actual manifests
2059 class do not care about the implementation details of the actual manifests
2060 they receive (i.e. tree or flat or lazily loaded, etc)."""
2060 they receive (i.e. tree or flat or lazily loaded, etc)."""
2061
2061
2062 def __init__(self, opener, repo, rootstore, narrowmatch):
2062 def __init__(self, opener, repo, rootstore, narrowmatch):
2063 self.nodeconstants = repo.nodeconstants
2063 self.nodeconstants = repo.nodeconstants
2064 usetreemanifest = False
2064 usetreemanifest = False
2065 cachesize = 4
2065 cachesize = 4
2066
2066
2067 opts = getattr(opener, 'options', None)
2067 opts = getattr(opener, 'options', None)
2068 if opts is not None:
2068 if opts is not None:
2069 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
2069 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
2070 cachesize = opts.get(b'manifestcachesize', cachesize)
2070 cachesize = opts.get(b'manifestcachesize', cachesize)
2071
2071
2072 self._treemanifests = usetreemanifest
2072 self._treemanifests = usetreemanifest
2073
2073
2074 self._rootstore = rootstore
2074 self._rootstore = rootstore
2075 self._rootstore._setupmanifestcachehooks(repo)
2075 self._rootstore._setupmanifestcachehooks(repo)
2076 self._narrowmatch = narrowmatch
2076 self._narrowmatch = narrowmatch
2077
2077
2078 # A cache of the manifestctx or treemanifestctx for each directory
2078 # A cache of the manifestctx or treemanifestctx for each directory
2079 self._dirmancache = {}
2079 self._dirmancache = {}
2080 self._dirmancache[b''] = util.lrucachedict(cachesize)
2080 self._dirmancache[b''] = util.lrucachedict(cachesize)
2081
2081
2082 self._cachesize = cachesize
2082 self._cachesize = cachesize
2083
2083
2084 def __getitem__(self, node):
2084 def __getitem__(self, node):
2085 """Retrieves the manifest instance for the given node. Throws a
2085 """Retrieves the manifest instance for the given node. Throws a
2086 LookupError if not found.
2086 LookupError if not found.
2087 """
2087 """
2088 return self.get(b'', node)
2088 return self.get(b'', node)
2089
2089
2090 @property
2090 @property
2091 def narrowed(self):
2091 def narrowed(self):
2092 return not (self._narrowmatch is None or self._narrowmatch.always())
2092 return not (self._narrowmatch is None or self._narrowmatch.always())
2093
2093
2094 def get(
2094 def get(
2095 self, tree: bytes, node: bytes, verify: bool = True
2095 self, tree: bytes, node: bytes, verify: bool = True
2096 ) -> AnyManifestCtx:
2096 ) -> AnyManifestCtx:
2097 """Retrieves the manifest instance for the given node. Throws a
2097 """Retrieves the manifest instance for the given node. Throws a
2098 LookupError if not found.
2098 LookupError if not found.
2099
2099
2100 `verify` - if True an exception will be thrown if the node is not in
2100 `verify` - if True an exception will be thrown if the node is not in
2101 the revlog
2101 the revlog
2102 """
2102 """
2103 if node in self._dirmancache.get(tree, ()):
2103 if node in self._dirmancache.get(tree, ()):
2104 return self._dirmancache[tree][node]
2104 return self._dirmancache[tree][node]
2105
2105
2106 if not self._narrowmatch.always():
2106 if not self._narrowmatch.always():
2107 if not self._narrowmatch.visitdir(tree[:-1]):
2107 if not self._narrowmatch.visitdir(tree[:-1]):
2108 return excludeddirmanifestctx(self.nodeconstants, tree, node)
2108 return excludeddirmanifestctx(self.nodeconstants, tree, node)
2109 if tree:
2109 if tree:
2110 if self._rootstore._treeondisk:
2110 if self._rootstore._treeondisk:
2111 if verify:
2111 if verify:
2112 # Side-effect is LookupError is raised if node doesn't
2112 # Side-effect is LookupError is raised if node doesn't
2113 # exist.
2113 # exist.
2114 self.getstorage(tree).rev(node)
2114 self.getstorage(tree).rev(node)
2115
2115
2116 m = treemanifestctx(self, tree, node)
2116 m = treemanifestctx(self, tree, node)
2117 else:
2117 else:
2118 raise error.Abort(
2118 raise error.Abort(
2119 _(
2119 _(
2120 b"cannot ask for manifest directory '%s' in a flat "
2120 b"cannot ask for manifest directory '%s' in a flat "
2121 b"manifest"
2121 b"manifest"
2122 )
2122 )
2123 % tree
2123 % tree
2124 )
2124 )
2125 else:
2125 else:
2126 if verify:
2126 if verify:
2127 # Side-effect is LookupError is raised if node doesn't exist.
2127 # Side-effect is LookupError is raised if node doesn't exist.
2128 self._rootstore.rev(node)
2128 self._rootstore.rev(node)
2129
2129
2130 if self._treemanifests:
2130 if self._treemanifests:
2131 m = treemanifestctx(self, b'', node)
2131 m = treemanifestctx(self, b'', node)
2132 else:
2132 else:
2133 m = manifestctx(self, node)
2133 m = manifestctx(self, node)
2134
2134
2135 if node != self.nodeconstants.nullid:
2135 if node != self.nodeconstants.nullid:
2136 mancache = self._dirmancache.get(tree)
2136 mancache = self._dirmancache.get(tree)
2137 if not mancache:
2137 if not mancache:
2138 mancache = util.lrucachedict(self._cachesize)
2138 mancache = util.lrucachedict(self._cachesize)
2139 self._dirmancache[tree] = mancache
2139 self._dirmancache[tree] = mancache
2140 mancache[node] = m
2140 mancache[node] = m
2141 return m
2141 return m
2142
2142
2143 def getstorage(self, tree):
2143 def getstorage(self, tree):
2144 return self._rootstore.dirlog(tree)
2144 return self._rootstore.dirlog(tree)
2145
2145
2146 def clearcaches(self, clear_persisted_data: bool = False) -> None:
2146 def clearcaches(self, clear_persisted_data: bool = False) -> None:
2147 self._dirmancache.clear()
2147 self._dirmancache.clear()
2148 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2148 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2149
2149
2150 def rev(self, node) -> int:
2150 def rev(self, node) -> int:
2151 return self._rootstore.rev(node)
2151 return self._rootstore.rev(node)
2152
2152
2153 def update_caches(self, transaction) -> None:
2153 def update_caches(self, transaction) -> None:
2154 return self._rootstore._revlog.update_caches(transaction=transaction)
2154 return self._rootstore._revlog.update_caches(transaction=transaction)
2155
2155
2156
2156
2157 manifestlog = interfaceutil.implementer(repository.imanifestlog)(ManifestLog)
2158
2159 if typing.TYPE_CHECKING:
2160 manifestlog = ManifestLog
2161
2162
2163 class MemManifestCtx:
2157 class MemManifestCtx:
2164 _manifestdict: manifestdict
2158 _manifestdict: manifestdict
2165
2159
2166 def __init__(self, manifestlog):
2160 def __init__(self, manifestlog):
2167 self._manifestlog = manifestlog
2161 self._manifestlog = manifestlog
2168 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2162 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2169
2163
2170 def _storage(self) -> manifestrevlog:
2164 def _storage(self) -> manifestrevlog:
2171 return self._manifestlog.getstorage(b'')
2165 return self._manifestlog.getstorage(b'')
2172
2166
2173 def copy(self) -> 'MemManifestCtx':
2167 def copy(self) -> 'MemManifestCtx':
2174 memmf = memmanifestctx(self._manifestlog)
2168 memmf = memmanifestctx(self._manifestlog)
2175 memmf._manifestdict = self.read().copy()
2169 memmf._manifestdict = self.read().copy()
2176 return memmf
2170 return memmf
2177
2171
2178 def read(self) -> 'manifestdict':
2172 def read(self) -> 'manifestdict':
2179 return self._manifestdict
2173 return self._manifestdict
2180
2174
2181 def write(self, transaction, link, p1, p2, added, removed, match=None):
2175 def write(self, transaction, link, p1, p2, added, removed, match=None):
2182 return self._storage().add(
2176 return self._storage().add(
2183 self._manifestdict,
2177 self._manifestdict,
2184 transaction,
2178 transaction,
2185 link,
2179 link,
2186 p1,
2180 p1,
2187 p2,
2181 p2,
2188 added,
2182 added,
2189 removed,
2183 removed,
2190 match=match,
2184 match=match,
2191 )
2185 )
2192
2186
2193
2187
2194 memmanifestctx = interfaceutil.implementer(
2188 memmanifestctx = interfaceutil.implementer(
2195 repository.imanifestrevisionwritable
2189 repository.imanifestrevisionwritable
2196 )(MemManifestCtx)
2190 )(MemManifestCtx)
2197
2191
2198 if typing.TYPE_CHECKING:
2192 if typing.TYPE_CHECKING:
2199 memmanifestctx = MemManifestCtx
2193 memmanifestctx = MemManifestCtx
2200
2194
2201
2195
2202 class ManifestCtx:
2196 class ManifestCtx:
2203 """A class representing a single revision of a manifest, including its
2197 """A class representing a single revision of a manifest, including its
2204 contents, its parent revs, and its linkrev.
2198 contents, its parent revs, and its linkrev.
2205 """
2199 """
2206
2200
2207 _data: Optional[manifestdict]
2201 _data: Optional[manifestdict]
2208
2202
2209 def __init__(self, manifestlog, node):
2203 def __init__(self, manifestlog, node):
2210 self._manifestlog = manifestlog
2204 self._manifestlog = manifestlog
2211 self._data = None
2205 self._data = None
2212
2206
2213 self._node = node
2207 self._node = node
2214
2208
2215 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2209 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2216 # but let's add it later when something needs it and we can load it
2210 # but let's add it later when something needs it and we can load it
2217 # lazily.
2211 # lazily.
2218 # self.p1, self.p2 = store.parents(node)
2212 # self.p1, self.p2 = store.parents(node)
2219 # rev = store.rev(node)
2213 # rev = store.rev(node)
2220 # self.linkrev = store.linkrev(rev)
2214 # self.linkrev = store.linkrev(rev)
2221
2215
2222 def _storage(self) -> 'manifestrevlog':
2216 def _storage(self) -> 'manifestrevlog':
2223 return self._manifestlog.getstorage(b'')
2217 return self._manifestlog.getstorage(b'')
2224
2218
2225 def node(self) -> bytes:
2219 def node(self) -> bytes:
2226 return self._node
2220 return self._node
2227
2221
2228 def copy(self) -> MemManifestCtx:
2222 def copy(self) -> MemManifestCtx:
2229 memmf = memmanifestctx(self._manifestlog)
2223 memmf = memmanifestctx(self._manifestlog)
2230 memmf._manifestdict = self.read().copy()
2224 memmf._manifestdict = self.read().copy()
2231 return memmf
2225 return memmf
2232
2226
2233 @propertycache
2227 @propertycache
2234 def parents(self) -> Tuple[bytes, bytes]:
2228 def parents(self) -> Tuple[bytes, bytes]:
2235 return self._storage().parents(self._node)
2229 return self._storage().parents(self._node)
2236
2230
2237 def read(self) -> 'manifestdict':
2231 def read(self) -> 'manifestdict':
2238 if self._data is None:
2232 if self._data is None:
2239 nc = self._manifestlog.nodeconstants
2233 nc = self._manifestlog.nodeconstants
2240 if self._node == nc.nullid:
2234 if self._node == nc.nullid:
2241 self._data = manifestdict(nc.nodelen)
2235 self._data = manifestdict(nc.nodelen)
2242 else:
2236 else:
2243 store = self._storage()
2237 store = self._storage()
2244 if self._node in store.fulltextcache:
2238 if self._node in store.fulltextcache:
2245 text = pycompat.bytestr(store.fulltextcache[self._node])
2239 text = pycompat.bytestr(store.fulltextcache[self._node])
2246 else:
2240 else:
2247 text = store.revision(self._node)
2241 text = store.revision(self._node)
2248 arraytext = bytearray(text)
2242 arraytext = bytearray(text)
2249 store.fulltextcache[self._node] = arraytext
2243 store.fulltextcache[self._node] = arraytext
2250 self._data = manifestdict(nc.nodelen, text)
2244 self._data = manifestdict(nc.nodelen, text)
2251 return self._data
2245 return self._data
2252
2246
2253 def readfast(self, shallow: bool = False) -> 'manifestdict':
2247 def readfast(self, shallow: bool = False) -> 'manifestdict':
2254 """Calls either readdelta or read, based on which would be less work.
2248 """Calls either readdelta or read, based on which would be less work.
2255 readdelta is called if the delta is against the p1, and therefore can be
2249 readdelta is called if the delta is against the p1, and therefore can be
2256 read quickly.
2250 read quickly.
2257
2251
2258 If `shallow` is True, nothing changes since this is a flat manifest.
2252 If `shallow` is True, nothing changes since this is a flat manifest.
2259 """
2253 """
2260 util.nouideprecwarn(
2254 util.nouideprecwarn(
2261 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2255 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2262 b"6.9",
2256 b"6.9",
2263 stacklevel=2,
2257 stacklevel=2,
2264 )
2258 )
2265 store = self._storage()
2259 store = self._storage()
2266 r = store.rev(self._node)
2260 r = store.rev(self._node)
2267 deltaparent = store.deltaparent(r)
2261 deltaparent = store.deltaparent(r)
2268 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2262 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2269 return self.readdelta()
2263 return self.readdelta()
2270 return self.read()
2264 return self.read()
2271
2265
2272 def readdelta(self, shallow: bool = False) -> 'manifestdict':
2266 def readdelta(self, shallow: bool = False) -> 'manifestdict':
2273 """Returns a manifest containing just the entries that are present
2267 """Returns a manifest containing just the entries that are present
2274 in this manifest, but not in its p1 manifest. This is efficient to read
2268 in this manifest, but not in its p1 manifest. This is efficient to read
2275 if the revlog delta is already p1.
2269 if the revlog delta is already p1.
2276
2270
2277 Changing the value of `shallow` has no effect on flat manifests.
2271 Changing the value of `shallow` has no effect on flat manifests.
2278 """
2272 """
2279 util.nouideprecwarn(
2273 util.nouideprecwarn(
2280 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2274 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2281 b"6.9",
2275 b"6.9",
2282 stacklevel=2,
2276 stacklevel=2,
2283 )
2277 )
2284 store = self._storage()
2278 store = self._storage()
2285 r = store.rev(self._node)
2279 r = store.rev(self._node)
2286 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2280 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2287 return manifestdict(store.nodeconstants.nodelen, d)
2281 return manifestdict(store.nodeconstants.nodelen, d)
2288
2282
2289 def read_any_fast_delta(
2283 def read_any_fast_delta(
2290 self,
2284 self,
2291 valid_bases: Optional[Collection[int]] = None,
2285 valid_bases: Optional[Collection[int]] = None,
2292 *,
2286 *,
2293 shallow: bool = False,
2287 shallow: bool = False,
2294 ) -> Tuple[Optional[int], manifestdict]:
2288 ) -> Tuple[Optional[int], manifestdict]:
2295 """see `imanifestrevisionstored` documentation"""
2289 """see `imanifestrevisionstored` documentation"""
2296 store = self._storage()
2290 store = self._storage()
2297 r = store.rev(self._node)
2291 r = store.rev(self._node)
2298 deltaparent = store.deltaparent(r)
2292 deltaparent = store.deltaparent(r)
2299 if valid_bases is None:
2293 if valid_bases is None:
2300 # make sure the next check is True
2294 # make sure the next check is True
2301 valid_bases = (deltaparent,)
2295 valid_bases = (deltaparent,)
2302 if deltaparent != nullrev and deltaparent in valid_bases:
2296 if deltaparent != nullrev and deltaparent in valid_bases:
2303 d = mdiff.patchtext(store.revdiff(deltaparent, r))
2297 d = mdiff.patchtext(store.revdiff(deltaparent, r))
2304 return (
2298 return (
2305 deltaparent,
2299 deltaparent,
2306 manifestdict(store.nodeconstants.nodelen, d),
2300 manifestdict(store.nodeconstants.nodelen, d),
2307 )
2301 )
2308 return (None, self.read())
2302 return (None, self.read())
2309
2303
2310 def read_delta_parents(
2304 def read_delta_parents(
2311 self,
2305 self,
2312 *,
2306 *,
2313 shallow: bool = False,
2307 shallow: bool = False,
2314 exact: bool = True,
2308 exact: bool = True,
2315 ) -> manifestdict:
2309 ) -> manifestdict:
2316 """see `interface.imanifestrevisionbase` documentations"""
2310 """see `interface.imanifestrevisionbase` documentations"""
2317 store = self._storage()
2311 store = self._storage()
2318 r = store.rev(self._node)
2312 r = store.rev(self._node)
2319 deltaparent = store.deltaparent(r)
2313 deltaparent = store.deltaparent(r)
2320 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2314 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2321 if not exact and deltaparent in parents:
2315 if not exact and deltaparent in parents:
2322 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2316 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2323 return manifestdict(store.nodeconstants.nodelen, d)
2317 return manifestdict(store.nodeconstants.nodelen, d)
2324 elif not exact or len(parents) == 0:
2318 elif not exact or len(parents) == 0:
2325 return self.read()
2319 return self.read()
2326 elif len(parents) == 1:
2320 elif len(parents) == 1:
2327 p = parents[0]
2321 p = parents[0]
2328 d = mdiff.patchtext(store.revdiff(p, r))
2322 d = mdiff.patchtext(store.revdiff(p, r))
2329 return manifestdict(store.nodeconstants.nodelen, d)
2323 return manifestdict(store.nodeconstants.nodelen, d)
2330 else:
2324 else:
2331 p1, p2 = parents
2325 p1, p2 = parents
2332 d1 = mdiff.patchtext(store.revdiff(p1, r))
2326 d1 = mdiff.patchtext(store.revdiff(p1, r))
2333 d2 = mdiff.patchtext(store.revdiff(p2, r))
2327 d2 = mdiff.patchtext(store.revdiff(p2, r))
2334 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2328 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2335 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2329 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2336 md = manifestdict(store.nodeconstants.nodelen)
2330 md = manifestdict(store.nodeconstants.nodelen)
2337 for f, new_node, new_flag in d1.iterentries():
2331 for f, new_node, new_flag in d1.iterentries():
2338 if f not in d2:
2332 if f not in d2:
2339 continue
2333 continue
2340 if new_node is not None:
2334 if new_node is not None:
2341 md.set(f, new_node, new_flag)
2335 md.set(f, new_node, new_flag)
2342 return md
2336 return md
2343
2337
2344 def read_delta_new_entries(self, *, shallow=False) -> manifestdict:
2338 def read_delta_new_entries(self, *, shallow=False) -> manifestdict:
2345 """see `interface.imanifestrevisionbase` documentations"""
2339 """see `interface.imanifestrevisionbase` documentations"""
2346 # If we are using narrow, returning a delta against an arbitrary
2340 # If we are using narrow, returning a delta against an arbitrary
2347 # changeset might return file outside the narrowspec. This can create
2341 # changeset might return file outside the narrowspec. This can create
2348 # issue when running validation server side with strict security as
2342 # issue when running validation server side with strict security as
2349 # push from low priviledge usage might be seen as adding new revision
2343 # push from low priviledge usage might be seen as adding new revision
2350 # for files they cannot touch. So we are strict if narrow is involved.
2344 # for files they cannot touch. So we are strict if narrow is involved.
2351 if self._manifestlog.narrowed:
2345 if self._manifestlog.narrowed:
2352 return self.read_delta_parents(shallow=shallow, exact=True)
2346 return self.read_delta_parents(shallow=shallow, exact=True)
2353 store = self._storage()
2347 store = self._storage()
2354 r = store.rev(self._node)
2348 r = store.rev(self._node)
2355 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2349 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2356 return manifestdict(store.nodeconstants.nodelen, d)
2350 return manifestdict(store.nodeconstants.nodelen, d)
2357
2351
2358 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2352 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2359 return self.read().find(key)
2353 return self.read().find(key)
2360
2354
2361
2355
2362 manifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2356 manifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2363 ManifestCtx
2357 ManifestCtx
2364 )
2358 )
2365
2359
2366 if typing.TYPE_CHECKING:
2360 if typing.TYPE_CHECKING:
2367 manifestctx = ManifestCtx
2361 manifestctx = ManifestCtx
2368
2362
2369
2363
2370 class MemTreeManifestCtx:
2364 class MemTreeManifestCtx:
2371 _treemanifest: treemanifest
2365 _treemanifest: treemanifest
2372
2366
2373 def __init__(self, manifestlog, dir=b''):
2367 def __init__(self, manifestlog, dir=b''):
2374 self._manifestlog = manifestlog
2368 self._manifestlog = manifestlog
2375 self._dir = dir
2369 self._dir = dir
2376 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2370 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2377
2371
2378 def _storage(self) -> manifestrevlog:
2372 def _storage(self) -> manifestrevlog:
2379 return self._manifestlog.getstorage(b'')
2373 return self._manifestlog.getstorage(b'')
2380
2374
2381 def copy(self) -> 'MemTreeManifestCtx':
2375 def copy(self) -> 'MemTreeManifestCtx':
2382 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2376 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2383 memmf._treemanifest = self._treemanifest.copy()
2377 memmf._treemanifest = self._treemanifest.copy()
2384 return memmf
2378 return memmf
2385
2379
2386 def read(self) -> 'treemanifest':
2380 def read(self) -> 'treemanifest':
2387 return self._treemanifest
2381 return self._treemanifest
2388
2382
2389 def write(self, transaction, link, p1, p2, added, removed, match=None):
2383 def write(self, transaction, link, p1, p2, added, removed, match=None):
2390 def readtree(dir, node):
2384 def readtree(dir, node):
2391 return self._manifestlog.get(dir, node).read()
2385 return self._manifestlog.get(dir, node).read()
2392
2386
2393 return self._storage().add(
2387 return self._storage().add(
2394 self._treemanifest,
2388 self._treemanifest,
2395 transaction,
2389 transaction,
2396 link,
2390 link,
2397 p1,
2391 p1,
2398 p2,
2392 p2,
2399 added,
2393 added,
2400 removed,
2394 removed,
2401 readtree=readtree,
2395 readtree=readtree,
2402 match=match,
2396 match=match,
2403 )
2397 )
2404
2398
2405
2399
2406 memtreemanifestctx = interfaceutil.implementer(
2400 memtreemanifestctx = interfaceutil.implementer(
2407 repository.imanifestrevisionwritable
2401 repository.imanifestrevisionwritable
2408 )(MemTreeManifestCtx)
2402 )(MemTreeManifestCtx)
2409
2403
2410 if typing.TYPE_CHECKING:
2404 if typing.TYPE_CHECKING:
2411 memtreemanifestctx = MemTreeManifestCtx
2405 memtreemanifestctx = MemTreeManifestCtx
2412
2406
2413
2407
2414 class TreeManifestCtx:
2408 class TreeManifestCtx:
2415 _data: Optional[treemanifest]
2409 _data: Optional[treemanifest]
2416
2410
2417 def __init__(self, manifestlog, dir, node):
2411 def __init__(self, manifestlog, dir, node):
2418 self._manifestlog = manifestlog
2412 self._manifestlog = manifestlog
2419 self._dir = dir
2413 self._dir = dir
2420 self._data = None
2414 self._data = None
2421
2415
2422 self._node = node
2416 self._node = node
2423
2417
2424 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2418 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2425 # we can instantiate treemanifestctx objects for directories we don't
2419 # we can instantiate treemanifestctx objects for directories we don't
2426 # have on disk.
2420 # have on disk.
2427 # self.p1, self.p2 = store.parents(node)
2421 # self.p1, self.p2 = store.parents(node)
2428 # rev = store.rev(node)
2422 # rev = store.rev(node)
2429 # self.linkrev = store.linkrev(rev)
2423 # self.linkrev = store.linkrev(rev)
2430
2424
2431 def _storage(self) -> manifestrevlog:
2425 def _storage(self) -> manifestrevlog:
2432 narrowmatch = self._manifestlog._narrowmatch
2426 narrowmatch = self._manifestlog._narrowmatch
2433 if not narrowmatch.always():
2427 if not narrowmatch.always():
2434 if not narrowmatch.visitdir(self._dir[:-1]):
2428 if not narrowmatch.visitdir(self._dir[:-1]):
2435 return excludedmanifestrevlog(
2429 return excludedmanifestrevlog(
2436 self._manifestlog.nodeconstants, self._dir
2430 self._manifestlog.nodeconstants, self._dir
2437 )
2431 )
2438 return self._manifestlog.getstorage(self._dir)
2432 return self._manifestlog.getstorage(self._dir)
2439
2433
2440 def read(self) -> 'treemanifest':
2434 def read(self) -> 'treemanifest':
2441 if self._data is None:
2435 if self._data is None:
2442 store = self._storage()
2436 store = self._storage()
2443 if self._node == self._manifestlog.nodeconstants.nullid:
2437 if self._node == self._manifestlog.nodeconstants.nullid:
2444 self._data = treemanifest(self._manifestlog.nodeconstants)
2438 self._data = treemanifest(self._manifestlog.nodeconstants)
2445 # TODO accessing non-public API
2439 # TODO accessing non-public API
2446 elif store._treeondisk:
2440 elif store._treeondisk:
2447 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2441 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2448
2442
2449 def gettext():
2443 def gettext():
2450 return store.revision(self._node)
2444 return store.revision(self._node)
2451
2445
2452 def readsubtree(dir, subm):
2446 def readsubtree(dir, subm):
2453 # Set verify to False since we need to be able to create
2447 # Set verify to False since we need to be able to create
2454 # subtrees for trees that don't exist on disk.
2448 # subtrees for trees that don't exist on disk.
2455 return self._manifestlog.get(dir, subm, verify=False).read()
2449 return self._manifestlog.get(dir, subm, verify=False).read()
2456
2450
2457 m.read(gettext, readsubtree)
2451 m.read(gettext, readsubtree)
2458 m.setnode(self._node)
2452 m.setnode(self._node)
2459 self._data = m
2453 self._data = m
2460 else:
2454 else:
2461 if self._node in store.fulltextcache:
2455 if self._node in store.fulltextcache:
2462 text = pycompat.bytestr(store.fulltextcache[self._node])
2456 text = pycompat.bytestr(store.fulltextcache[self._node])
2463 else:
2457 else:
2464 text = store.revision(self._node)
2458 text = store.revision(self._node)
2465 arraytext = bytearray(text)
2459 arraytext = bytearray(text)
2466 store.fulltextcache[self._node] = arraytext
2460 store.fulltextcache[self._node] = arraytext
2467 self._data = treemanifest(
2461 self._data = treemanifest(
2468 self._manifestlog.nodeconstants, dir=self._dir, text=text
2462 self._manifestlog.nodeconstants, dir=self._dir, text=text
2469 )
2463 )
2470
2464
2471 return self._data
2465 return self._data
2472
2466
2473 def node(self) -> bytes:
2467 def node(self) -> bytes:
2474 return self._node
2468 return self._node
2475
2469
2476 def copy(self) -> 'MemTreeManifestCtx':
2470 def copy(self) -> 'MemTreeManifestCtx':
2477 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2471 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2478 memmf._treemanifest = self.read().copy()
2472 memmf._treemanifest = self.read().copy()
2479 return memmf
2473 return memmf
2480
2474
2481 @propertycache
2475 @propertycache
2482 def parents(self) -> Tuple[bytes, bytes]:
2476 def parents(self) -> Tuple[bytes, bytes]:
2483 return self._storage().parents(self._node)
2477 return self._storage().parents(self._node)
2484
2478
2485 def readdelta(self, shallow: bool = False) -> AnyManifestDict:
2479 def readdelta(self, shallow: bool = False) -> AnyManifestDict:
2486 """see `imanifestrevisionstored` documentation"""
2480 """see `imanifestrevisionstored` documentation"""
2487 util.nouideprecwarn(
2481 util.nouideprecwarn(
2488 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2482 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2489 b"6.9",
2483 b"6.9",
2490 stacklevel=2,
2484 stacklevel=2,
2491 )
2485 )
2492 store = self._storage()
2486 store = self._storage()
2493 if shallow:
2487 if shallow:
2494 r = store.rev(self._node)
2488 r = store.rev(self._node)
2495 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2489 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2496 return manifestdict(store.nodeconstants.nodelen, d)
2490 return manifestdict(store.nodeconstants.nodelen, d)
2497 else:
2491 else:
2498 # Need to perform a slow delta
2492 # Need to perform a slow delta
2499 r0 = store.deltaparent(store.rev(self._node))
2493 r0 = store.deltaparent(store.rev(self._node))
2500 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2494 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2501 m1 = self.read()
2495 m1 = self.read()
2502 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2496 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2503 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2497 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2504 if n1:
2498 if n1:
2505 md[f] = n1
2499 md[f] = n1
2506 if fl1:
2500 if fl1:
2507 md.setflag(f, fl1)
2501 md.setflag(f, fl1)
2508 return md
2502 return md
2509
2503
2510 def read_any_fast_delta(
2504 def read_any_fast_delta(
2511 self,
2505 self,
2512 valid_bases: Optional[Collection[int]] = None,
2506 valid_bases: Optional[Collection[int]] = None,
2513 *,
2507 *,
2514 shallow: bool = False,
2508 shallow: bool = False,
2515 ) -> Tuple[Optional[int], AnyManifestDict]:
2509 ) -> Tuple[Optional[int], AnyManifestDict]:
2516 """see `imanifestrevisionstored` documentation"""
2510 """see `imanifestrevisionstored` documentation"""
2517 store = self._storage()
2511 store = self._storage()
2518 r = store.rev(self._node)
2512 r = store.rev(self._node)
2519 deltaparent = store.deltaparent(r)
2513 deltaparent = store.deltaparent(r)
2520
2514
2521 if valid_bases is None:
2515 if valid_bases is None:
2522 # make sure the next check is True
2516 # make sure the next check is True
2523 valid_bases = (deltaparent,)
2517 valid_bases = (deltaparent,)
2524 can_use_delta = deltaparent != nullrev and deltaparent in valid_bases
2518 can_use_delta = deltaparent != nullrev and deltaparent in valid_bases
2525
2519
2526 if shallow:
2520 if shallow:
2527 if can_use_delta:
2521 if can_use_delta:
2528 return (deltaparent, self._read_storage_delta_shallow())
2522 return (deltaparent, self._read_storage_delta_shallow())
2529 else:
2523 else:
2530 d = store.revision(self._node)
2524 d = store.revision(self._node)
2531 return (None, manifestdict(store.nodeconstants.nodelen, d))
2525 return (None, manifestdict(store.nodeconstants.nodelen, d))
2532 else:
2526 else:
2533 # note: This use "slow_delta" here is cargo culted from the previous
2527 # note: This use "slow_delta" here is cargo culted from the previous
2534 # implementation. I am not sure it make sense since the goal here is to
2528 # implementation. I am not sure it make sense since the goal here is to
2535 # be fast, so why are we computing a delta? On the other hand, tree
2529 # be fast, so why are we computing a delta? On the other hand, tree
2536 # manifest delta as fairly "cheap" and allow for skipping whole part of
2530 # manifest delta as fairly "cheap" and allow for skipping whole part of
2537 # the tree that a full read would access. So it might be a good idea.
2531 # the tree that a full read would access. So it might be a good idea.
2538 #
2532 #
2539 # If we realize we don't need delta here, we should simply use:
2533 # If we realize we don't need delta here, we should simply use:
2540 #
2534 #
2541 # return (None, self.read())
2535 # return (None, self.read())
2542 if can_use_delta:
2536 if can_use_delta:
2543 return (None, self._read_storage_slow_delta(base=deltaparent))
2537 return (None, self._read_storage_slow_delta(base=deltaparent))
2544 else:
2538 else:
2545 parents = [
2539 parents = [
2546 p
2540 p
2547 for p in store.parentrevs(r)
2541 for p in store.parentrevs(r)
2548 if p is not nullrev and p in valid_bases
2542 if p is not nullrev and p in valid_bases
2549 ]
2543 ]
2550 if parents:
2544 if parents:
2551 best_base = max(parents)
2545 best_base = max(parents)
2552 else:
2546 else:
2553 best_base = max(valid_bases)
2547 best_base = max(valid_bases)
2554 return (None, self._read_storage_slow_delta(base=best_base))
2548 return (None, self._read_storage_slow_delta(base=best_base))
2555
2549
2556 def _read_storage_delta_shallow(self) -> manifestdict:
2550 def _read_storage_delta_shallow(self) -> manifestdict:
2557 store = self._storage()
2551 store = self._storage()
2558 r = store.rev(self._node)
2552 r = store.rev(self._node)
2559 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2553 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2560 return manifestdict(store.nodeconstants.nodelen, d)
2554 return manifestdict(store.nodeconstants.nodelen, d)
2561
2555
2562 def _read_storage_slow_delta(self, base) -> 'treemanifest':
2556 def _read_storage_slow_delta(self, base) -> 'treemanifest':
2563 store = self._storage()
2557 store = self._storage()
2564 if base is None:
2558 if base is None:
2565 base = store.deltaparent(store.rev(self._node))
2559 base = store.deltaparent(store.rev(self._node))
2566 m0 = self._manifestlog.get(self._dir, store.node(base)).read()
2560 m0 = self._manifestlog.get(self._dir, store.node(base)).read()
2567 m1 = self.read()
2561 m1 = self.read()
2568 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2562 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2569 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2563 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2570 if n1:
2564 if n1:
2571 md[f] = n1
2565 md[f] = n1
2572 if fl1:
2566 if fl1:
2573 md.setflag(f, fl1)
2567 md.setflag(f, fl1)
2574 return md
2568 return md
2575
2569
2576 def read_delta_parents(
2570 def read_delta_parents(
2577 self,
2571 self,
2578 *,
2572 *,
2579 shallow: bool = False,
2573 shallow: bool = False,
2580 exact: bool = True,
2574 exact: bool = True,
2581 ) -> AnyManifestDict:
2575 ) -> AnyManifestDict:
2582 """see `interface.imanifestrevisionbase` documentations"""
2576 """see `interface.imanifestrevisionbase` documentations"""
2583 store = self._storage()
2577 store = self._storage()
2584 r = store.rev(self._node)
2578 r = store.rev(self._node)
2585 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2579 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2586 if not exact:
2580 if not exact:
2587 return self.read_any_fast_delta(parents, shallow=shallow)[1]
2581 return self.read_any_fast_delta(parents, shallow=shallow)[1]
2588 elif len(parents) == 0:
2582 elif len(parents) == 0:
2589 if shallow:
2583 if shallow:
2590 d = store.revision(self._node)
2584 d = store.revision(self._node)
2591 return manifestdict(store.nodeconstants.nodelen, d)
2585 return manifestdict(store.nodeconstants.nodelen, d)
2592 else:
2586 else:
2593 return self.read()
2587 return self.read()
2594 elif len(parents) == 1:
2588 elif len(parents) == 1:
2595 p = parents[0]
2589 p = parents[0]
2596 if shallow:
2590 if shallow:
2597 d = mdiff.patchtext(store.revdiff(p, r))
2591 d = mdiff.patchtext(store.revdiff(p, r))
2598 return manifestdict(store.nodeconstants.nodelen, d)
2592 return manifestdict(store.nodeconstants.nodelen, d)
2599 else:
2593 else:
2600 return self._read_storage_slow_delta(base=p)
2594 return self._read_storage_slow_delta(base=p)
2601 else:
2595 else:
2602 p1, p2 = parents
2596 p1, p2 = parents
2603 if shallow:
2597 if shallow:
2604 d1 = mdiff.patchtext(store.revdiff(p1, r))
2598 d1 = mdiff.patchtext(store.revdiff(p1, r))
2605 d2 = mdiff.patchtext(store.revdiff(p2, r))
2599 d2 = mdiff.patchtext(store.revdiff(p2, r))
2606 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2600 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2607 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2601 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2608 md = manifestdict(store.nodeconstants.nodelen)
2602 md = manifestdict(store.nodeconstants.nodelen)
2609 for f, new_node, new_flag in d1.iterentries():
2603 for f, new_node, new_flag in d1.iterentries():
2610 if f not in d2:
2604 if f not in d2:
2611 continue
2605 continue
2612 if new_node is not None:
2606 if new_node is not None:
2613 md.set(f, new_node, new_flag)
2607 md.set(f, new_node, new_flag)
2614 return md
2608 return md
2615 else:
2609 else:
2616 m1 = self._manifestlog.get(self._dir, store.node(p1)).read()
2610 m1 = self._manifestlog.get(self._dir, store.node(p1)).read()
2617 m2 = self._manifestlog.get(self._dir, store.node(p2)).read()
2611 m2 = self._manifestlog.get(self._dir, store.node(p2)).read()
2618 mc = self.read()
2612 mc = self.read()
2619 d1 = m1.diff(mc)
2613 d1 = m1.diff(mc)
2620 d2 = m2.diff(mc)
2614 d2 = m2.diff(mc)
2621 md = treemanifest(
2615 md = treemanifest(
2622 self._manifestlog.nodeconstants,
2616 self._manifestlog.nodeconstants,
2623 dir=self._dir,
2617 dir=self._dir,
2624 )
2618 )
2625 for f, new_node, new_flag in d1.iterentries():
2619 for f, new_node, new_flag in d1.iterentries():
2626 if f not in d2:
2620 if f not in d2:
2627 continue
2621 continue
2628 if new_node is not None:
2622 if new_node is not None:
2629 md.set(f, new_node, new_flag)
2623 md.set(f, new_node, new_flag)
2630 return md
2624 return md
2631
2625
2632 def read_delta_new_entries(
2626 def read_delta_new_entries(
2633 self, *, shallow: bool = False
2627 self, *, shallow: bool = False
2634 ) -> AnyManifestDict:
2628 ) -> AnyManifestDict:
2635 """see `interface.imanifestrevisionbase` documentations"""
2629 """see `interface.imanifestrevisionbase` documentations"""
2636 # If we are using narrow, returning a delta against an arbitrary
2630 # If we are using narrow, returning a delta against an arbitrary
2637 # changeset might return file outside the narrowspec. This can create
2631 # changeset might return file outside the narrowspec. This can create
2638 # issue when running validation server side with strict security as
2632 # issue when running validation server side with strict security as
2639 # push from low priviledge usage might be seen as adding new revision
2633 # push from low priviledge usage might be seen as adding new revision
2640 # for files they cannot touch. So we are strict if narrow is involved.
2634 # for files they cannot touch. So we are strict if narrow is involved.
2641 if self._manifestlog.narrowed:
2635 if self._manifestlog.narrowed:
2642 return self.read_delta_parents(shallow=shallow, exact=True)
2636 return self.read_delta_parents(shallow=shallow, exact=True)
2643 # delegate to existing another existing method for simplicity
2637 # delegate to existing another existing method for simplicity
2644 store = self._storage()
2638 store = self._storage()
2645 r = store.rev(self._node)
2639 r = store.rev(self._node)
2646 bases = (store.deltaparent(r),)
2640 bases = (store.deltaparent(r),)
2647 return self.read_any_fast_delta(bases, shallow=shallow)[1]
2641 return self.read_any_fast_delta(bases, shallow=shallow)[1]
2648
2642
2649 def readfast(self, shallow=False) -> AnyManifestDict:
2643 def readfast(self, shallow=False) -> AnyManifestDict:
2650 """Calls either readdelta or read, based on which would be less work.
2644 """Calls either readdelta or read, based on which would be less work.
2651 readdelta is called if the delta is against the p1, and therefore can be
2645 readdelta is called if the delta is against the p1, and therefore can be
2652 read quickly.
2646 read quickly.
2653
2647
2654 If `shallow` is True, it only returns the entries from this manifest,
2648 If `shallow` is True, it only returns the entries from this manifest,
2655 and not any submanifests.
2649 and not any submanifests.
2656 """
2650 """
2657 util.nouideprecwarn(
2651 util.nouideprecwarn(
2658 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2652 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2659 b"6.9",
2653 b"6.9",
2660 stacklevel=2,
2654 stacklevel=2,
2661 )
2655 )
2662 store = self._storage()
2656 store = self._storage()
2663 r = store.rev(self._node)
2657 r = store.rev(self._node)
2664 deltaparent = store.deltaparent(r)
2658 deltaparent = store.deltaparent(r)
2665 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2659 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2666 return self.readdelta(shallow=shallow)
2660 return self.readdelta(shallow=shallow)
2667
2661
2668 if shallow:
2662 if shallow:
2669 return manifestdict(
2663 return manifestdict(
2670 store.nodeconstants.nodelen, store.revision(self._node)
2664 store.nodeconstants.nodelen, store.revision(self._node)
2671 )
2665 )
2672 else:
2666 else:
2673 return self.read()
2667 return self.read()
2674
2668
2675 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2669 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2676 return self.read().find(key)
2670 return self.read().find(key)
2677
2671
2678
2672
2679 treemanifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2673 treemanifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2680 TreeManifestCtx
2674 TreeManifestCtx
2681 )
2675 )
2682
2676
2683 if typing.TYPE_CHECKING:
2677 if typing.TYPE_CHECKING:
2684 treemanifestctx = TreeManifestCtx
2678 treemanifestctx = TreeManifestCtx
2685
2679
2686
2680
2687 class excludeddir(treemanifest):
2681 class excludeddir(treemanifest):
2688 """Stand-in for a directory that is excluded from the repository.
2682 """Stand-in for a directory that is excluded from the repository.
2689
2683
2690 With narrowing active on a repository that uses treemanifests,
2684 With narrowing active on a repository that uses treemanifests,
2691 some of the directory revlogs will be excluded from the resulting
2685 some of the directory revlogs will be excluded from the resulting
2692 clone. This is a huge storage win for clients, but means we need
2686 clone. This is a huge storage win for clients, but means we need
2693 some sort of pseudo-manifest to surface to internals so we can
2687 some sort of pseudo-manifest to surface to internals so we can
2694 detect a merge conflict outside the narrowspec. That's what this
2688 detect a merge conflict outside the narrowspec. That's what this
2695 class is: it stands in for a directory whose node is known, but
2689 class is: it stands in for a directory whose node is known, but
2696 whose contents are unknown.
2690 whose contents are unknown.
2697 """
2691 """
2698
2692
2699 _files: Dict[bytes, bytes]
2693 _files: Dict[bytes, bytes]
2700 _flags: Dict[bytes, bytes]
2694 _flags: Dict[bytes, bytes]
2701
2695
2702 def __init__(self, nodeconstants, dir, node):
2696 def __init__(self, nodeconstants, dir, node):
2703 super(excludeddir, self).__init__(nodeconstants, dir)
2697 super(excludeddir, self).__init__(nodeconstants, dir)
2704 self._node = node
2698 self._node = node
2705 # Add an empty file, which will be included by iterators and such,
2699 # Add an empty file, which will be included by iterators and such,
2706 # appearing as the directory itself (i.e. something like "dir/")
2700 # appearing as the directory itself (i.e. something like "dir/")
2707 self._files[b''] = node
2701 self._files[b''] = node
2708 self._flags[b''] = b't'
2702 self._flags[b''] = b't'
2709
2703
2710 # Manifests outside the narrowspec should never be modified, so avoid
2704 # Manifests outside the narrowspec should never be modified, so avoid
2711 # copying. This makes a noticeable difference when there are very many
2705 # copying. This makes a noticeable difference when there are very many
2712 # directories outside the narrowspec. Also, it makes sense for the copy to
2706 # directories outside the narrowspec. Also, it makes sense for the copy to
2713 # be of the same type as the original, which would not happen with the
2707 # be of the same type as the original, which would not happen with the
2714 # super type's copy().
2708 # super type's copy().
2715 def copy(self):
2709 def copy(self):
2716 return self
2710 return self
2717
2711
2718
2712
2719 class excludeddirmanifestctx(treemanifestctx):
2713 class excludeddirmanifestctx(treemanifestctx):
2720 """context wrapper for excludeddir - see that docstring for rationale"""
2714 """context wrapper for excludeddir - see that docstring for rationale"""
2721
2715
2722 def __init__(self, nodeconstants, dir, node):
2716 def __init__(self, nodeconstants, dir, node):
2723 self.nodeconstants = nodeconstants
2717 self.nodeconstants = nodeconstants
2724 self._dir = dir
2718 self._dir = dir
2725 self._node = node
2719 self._node = node
2726
2720
2727 def read(self):
2721 def read(self):
2728 return excludeddir(self.nodeconstants, self._dir, self._node)
2722 return excludeddir(self.nodeconstants, self._dir, self._node)
2729
2723
2730 def readfast(self, shallow=False):
2724 def readfast(self, shallow=False):
2731 # special version of readfast since we don't have underlying storage
2725 # special version of readfast since we don't have underlying storage
2732 return self.read()
2726 return self.read()
2733
2727
2734 def write(self, *args):
2728 def write(self, *args):
2735 raise error.ProgrammingError(
2729 raise error.ProgrammingError(
2736 b'attempt to write manifest from excluded dir %s' % self._dir
2730 b'attempt to write manifest from excluded dir %s' % self._dir
2737 )
2731 )
2738
2732
2739
2733
2740 class excludedmanifestrevlog(manifestrevlog):
2734 class excludedmanifestrevlog(manifestrevlog):
2741 """Stand-in for excluded treemanifest revlogs.
2735 """Stand-in for excluded treemanifest revlogs.
2742
2736
2743 When narrowing is active on a treemanifest repository, we'll have
2737 When narrowing is active on a treemanifest repository, we'll have
2744 references to directories we can't see due to the revlog being
2738 references to directories we can't see due to the revlog being
2745 skipped. This class exists to conform to the manifestrevlog
2739 skipped. This class exists to conform to the manifestrevlog
2746 interface for those directories and proactively prevent writes to
2740 interface for those directories and proactively prevent writes to
2747 outside the narrowspec.
2741 outside the narrowspec.
2748 """
2742 """
2749
2743
2750 def __init__(self, nodeconstants, dir):
2744 def __init__(self, nodeconstants, dir):
2751 self.nodeconstants = nodeconstants
2745 self.nodeconstants = nodeconstants
2752 self._dir = dir
2746 self._dir = dir
2753
2747
2754 def __len__(self):
2748 def __len__(self):
2755 raise error.ProgrammingError(
2749 raise error.ProgrammingError(
2756 b'attempt to get length of excluded dir %s' % self._dir
2750 b'attempt to get length of excluded dir %s' % self._dir
2757 )
2751 )
2758
2752
2759 def rev(self, node):
2753 def rev(self, node):
2760 raise error.ProgrammingError(
2754 raise error.ProgrammingError(
2761 b'attempt to get rev from excluded dir %s' % self._dir
2755 b'attempt to get rev from excluded dir %s' % self._dir
2762 )
2756 )
2763
2757
2764 def linkrev(self, node):
2758 def linkrev(self, node):
2765 raise error.ProgrammingError(
2759 raise error.ProgrammingError(
2766 b'attempt to get linkrev from excluded dir %s' % self._dir
2760 b'attempt to get linkrev from excluded dir %s' % self._dir
2767 )
2761 )
2768
2762
2769 def node(self, rev):
2763 def node(self, rev):
2770 raise error.ProgrammingError(
2764 raise error.ProgrammingError(
2771 b'attempt to get node from excluded dir %s' % self._dir
2765 b'attempt to get node from excluded dir %s' % self._dir
2772 )
2766 )
2773
2767
2774 def add(self, *args, **kwargs):
2768 def add(self, *args, **kwargs):
2775 # We should never write entries in dirlogs outside the narrow clone.
2769 # We should never write entries in dirlogs outside the narrow clone.
2776 # However, the method still gets called from writesubtree() in
2770 # However, the method still gets called from writesubtree() in
2777 # _addtree(), so we need to handle it. We should possibly make that
2771 # _addtree(), so we need to handle it. We should possibly make that
2778 # avoid calling add() with a clean manifest (_dirty is always False
2772 # avoid calling add() with a clean manifest (_dirty is always False
2779 # in excludeddir instances).
2773 # in excludeddir instances).
2780 pass
2774 pass
@@ -1,281 +1,281
1 # statichttprepo.py - simple http repository class for mercurial
1 # statichttprepo.py - simple http repository class for mercurial
2 #
2 #
3 # This provides read-only repo access to repositories exported via static http
3 # This provides read-only repo access to repositories exported via static http
4 #
4 #
5 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 from __future__ import annotations
10 from __future__ import annotations
11
11
12 import errno
12 import errno
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import sha1nodeconstants
15 from .node import sha1nodeconstants
16 from . import (
16 from . import (
17 branchmap,
17 branchmap,
18 changelog,
18 changelog,
19 error,
19 error,
20 localrepo,
20 localrepo,
21 manifest,
21 manifest,
22 namespaces,
22 namespaces,
23 pathutil,
23 pathutil,
24 pycompat,
24 pycompat,
25 requirements as requirementsmod,
25 requirements as requirementsmod,
26 url,
26 url,
27 util,
27 util,
28 vfs as vfsmod,
28 vfs as vfsmod,
29 )
29 )
30 from .utils import (
30 from .utils import (
31 urlutil,
31 urlutil,
32 )
32 )
33
33
34 urlerr = util.urlerr
34 urlerr = util.urlerr
35 urlreq = util.urlreq
35 urlreq = util.urlreq
36
36
37
37
38 class httprangereader:
38 class httprangereader:
39 def __init__(self, url, opener):
39 def __init__(self, url, opener):
40 # we assume opener has HTTPRangeHandler
40 # we assume opener has HTTPRangeHandler
41 self.url = url
41 self.url = url
42 self.pos = 0
42 self.pos = 0
43 self.opener = opener
43 self.opener = opener
44 self.name = url
44 self.name = url
45
45
46 def __enter__(self):
46 def __enter__(self):
47 return self
47 return self
48
48
49 def __exit__(self, exc_type, exc_value, traceback):
49 def __exit__(self, exc_type, exc_value, traceback):
50 self.close()
50 self.close()
51
51
52 def seek(self, pos):
52 def seek(self, pos):
53 self.pos = pos
53 self.pos = pos
54
54
55 def read(self, n: int = -1):
55 def read(self, n: int = -1):
56 req = urlreq.request(pycompat.strurl(self.url))
56 req = urlreq.request(pycompat.strurl(self.url))
57 end = ''
57 end = ''
58
58
59 if n == 0:
59 if n == 0:
60 return b''
60 return b''
61 elif n > 0:
61 elif n > 0:
62 end = "%d" % (self.pos + n - 1)
62 end = "%d" % (self.pos + n - 1)
63 if self.pos or end:
63 if self.pos or end:
64 req.add_header('Range', 'bytes=%d-%s' % (self.pos, end))
64 req.add_header('Range', 'bytes=%d-%s' % (self.pos, end))
65
65
66 try:
66 try:
67 f = self.opener.open(req)
67 f = self.opener.open(req)
68 data = f.read()
68 data = f.read()
69 code = f.code
69 code = f.code
70 except urlerr.httperror as inst:
70 except urlerr.httperror as inst:
71 num = inst.code == 404 and errno.ENOENT or None
71 num = inst.code == 404 and errno.ENOENT or None
72 # Explicitly convert the exception to str as Py3 will try
72 # Explicitly convert the exception to str as Py3 will try
73 # convert it to local encoding and with as the HTTPResponse
73 # convert it to local encoding and with as the HTTPResponse
74 # instance doesn't support encode.
74 # instance doesn't support encode.
75 raise IOError(num, str(inst))
75 raise IOError(num, str(inst))
76 except urlerr.urlerror as inst:
76 except urlerr.urlerror as inst:
77 raise IOError(None, inst.reason)
77 raise IOError(None, inst.reason)
78
78
79 if code == 200:
79 if code == 200:
80 # HTTPRangeHandler does nothing if remote does not support
80 # HTTPRangeHandler does nothing if remote does not support
81 # Range headers and returns the full entity. Let's slice it.
81 # Range headers and returns the full entity. Let's slice it.
82 if n > 0 and (self.pos + n) < len(data):
82 if n > 0 and (self.pos + n) < len(data):
83 data = data[self.pos : self.pos + n]
83 data = data[self.pos : self.pos + n]
84 elif self.pos < len(data):
84 elif self.pos < len(data):
85 data = data[self.pos :]
85 data = data[self.pos :]
86 else:
86 else:
87 data = b''
87 data = b''
88 elif 0 < n < len(data):
88 elif 0 < n < len(data):
89 data = data[:n]
89 data = data[:n]
90 self.pos += len(data)
90 self.pos += len(data)
91 return data
91 return data
92
92
93 def readlines(self):
93 def readlines(self):
94 return self.read().splitlines(True)
94 return self.read().splitlines(True)
95
95
96 def __iter__(self):
96 def __iter__(self):
97 return iter(self.readlines())
97 return iter(self.readlines())
98
98
99 def close(self):
99 def close(self):
100 pass
100 pass
101
101
102
102
103 # _RangeError and _HTTPRangeHandler were originally in byterange.py,
103 # _RangeError and _HTTPRangeHandler were originally in byterange.py,
104 # which was itself extracted from urlgrabber. See the last version of
104 # which was itself extracted from urlgrabber. See the last version of
105 # byterange.py from history if you need more information.
105 # byterange.py from history if you need more information.
106 class _RangeError(IOError):
106 class _RangeError(IOError):
107 """Error raised when an unsatisfiable range is requested."""
107 """Error raised when an unsatisfiable range is requested."""
108
108
109
109
110 class _HTTPRangeHandler(urlreq.basehandler):
110 class _HTTPRangeHandler(urlreq.basehandler):
111 """Handler that enables HTTP Range headers.
111 """Handler that enables HTTP Range headers.
112
112
113 This was extremely simple. The Range header is a HTTP feature to
113 This was extremely simple. The Range header is a HTTP feature to
114 begin with so all this class does is tell urllib2 that the
114 begin with so all this class does is tell urllib2 that the
115 "206 Partial Content" response from the HTTP server is what we
115 "206 Partial Content" response from the HTTP server is what we
116 expected.
116 expected.
117 """
117 """
118
118
119 def http_error_206(self, req, fp, code, msg, hdrs):
119 def http_error_206(self, req, fp, code, msg, hdrs):
120 # 206 Partial Content Response
120 # 206 Partial Content Response
121 r = urlreq.addinfourl(fp, hdrs, req.get_full_url())
121 r = urlreq.addinfourl(fp, hdrs, req.get_full_url())
122 r.code = code
122 r.code = code
123 r.msg = msg
123 r.msg = msg
124 return r
124 return r
125
125
126 def http_error_416(self, req, fp, code, msg, hdrs):
126 def http_error_416(self, req, fp, code, msg, hdrs):
127 # HTTP's Range Not Satisfiable error
127 # HTTP's Range Not Satisfiable error
128 raise _RangeError('Requested Range Not Satisfiable')
128 raise _RangeError('Requested Range Not Satisfiable')
129
129
130
130
131 def build_opener(ui, authinfo):
131 def build_opener(ui, authinfo):
132 # urllib cannot handle URLs with embedded user or passwd
132 # urllib cannot handle URLs with embedded user or passwd
133 urlopener = url.opener(ui, authinfo)
133 urlopener = url.opener(ui, authinfo)
134 urlopener.add_handler(_HTTPRangeHandler())
134 urlopener.add_handler(_HTTPRangeHandler())
135
135
136 class statichttpvfs(vfsmod.abstractvfs):
136 class statichttpvfs(vfsmod.abstractvfs):
137 def __init__(self, base):
137 def __init__(self, base):
138 self.base = base
138 self.base = base
139 self.options = {}
139 self.options = {}
140
140
141 def __call__(self, path, mode=b'r', *args, **kw):
141 def __call__(self, path, mode=b'r', *args, **kw):
142 if mode not in (b'r', b'rb'):
142 if mode not in (b'r', b'rb'):
143 raise IOError('Permission denied')
143 raise IOError('Permission denied')
144 f = b"/".join((self.base, urlreq.quote(path)))
144 f = b"/".join((self.base, urlreq.quote(path)))
145 return httprangereader(f, urlopener)
145 return httprangereader(f, urlopener)
146
146
147 def _auditpath(self, path: bytes, mode: bytes) -> None:
147 def _auditpath(self, path: bytes, mode: bytes) -> None:
148 raise NotImplementedError
148 raise NotImplementedError
149
149
150 def join(self, path, *insidef):
150 def join(self, path, *insidef):
151 if path:
151 if path:
152 return pathutil.join(self.base, path, *insidef)
152 return pathutil.join(self.base, path, *insidef)
153 else:
153 else:
154 return self.base
154 return self.base
155
155
156 return statichttpvfs
156 return statichttpvfs
157
157
158
158
159 class statichttppeer(localrepo.localpeer):
159 class statichttppeer(localrepo.localpeer):
160 def local(self):
160 def local(self):
161 return None
161 return None
162
162
163 def canpush(self):
163 def canpush(self):
164 return False
164 return False
165
165
166
166
167 class statichttprepository(
167 class statichttprepository(
168 localrepo.localrepository, localrepo.revlogfilestorage
168 localrepo.localrepository, localrepo.revlogfilestorage
169 ):
169 ):
170 supported = localrepo.localrepository._basesupported
170 supported = localrepo.localrepository._basesupported
171
171
172 manifestlog: manifest.ManifestLog
172 manifestlog: manifest.manifestlog
173
173
174 def __init__(self, ui, path):
174 def __init__(self, ui, path):
175 self._url = path
175 self._url = path
176 self.ui = ui
176 self.ui = ui
177
177
178 self.root = path
178 self.root = path
179 u = urlutil.url(path.rstrip(b'/') + b"/.hg")
179 u = urlutil.url(path.rstrip(b'/') + b"/.hg")
180 self.path, authinfo = u.authinfo()
180 self.path, authinfo = u.authinfo()
181
181
182 vfsclass = build_opener(ui, authinfo)
182 vfsclass = build_opener(ui, authinfo)
183 self.vfs = vfsclass(self.path)
183 self.vfs = vfsclass(self.path)
184 self.cachevfs = vfsclass(self.vfs.join(b'cache'))
184 self.cachevfs = vfsclass(self.vfs.join(b'cache'))
185 self._phasedefaults = []
185 self._phasedefaults = []
186
186
187 self.names = namespaces.namespaces()
187 self.names = namespaces.namespaces()
188 self.filtername = None
188 self.filtername = None
189 self._extrafilterid = None
189 self._extrafilterid = None
190 self._wanted_sidedata = set()
190 self._wanted_sidedata = set()
191 self.features = set()
191 self.features = set()
192
192
193 try:
193 try:
194 requirements = set(self.vfs.read(b'requires').splitlines())
194 requirements = set(self.vfs.read(b'requires').splitlines())
195 except FileNotFoundError:
195 except FileNotFoundError:
196 requirements = set()
196 requirements = set()
197
197
198 # check if it is a non-empty old-style repository
198 # check if it is a non-empty old-style repository
199 try:
199 try:
200 with self.vfs(b"00changelog.i") as fp:
200 with self.vfs(b"00changelog.i") as fp:
201 fp.read(1)
201 fp.read(1)
202 except FileNotFoundError:
202 except FileNotFoundError:
203 # we do not care about empty old-style repositories here
203 # we do not care about empty old-style repositories here
204 msg = _(b"'%s' does not appear to be an hg repository") % path
204 msg = _(b"'%s' does not appear to be an hg repository") % path
205 raise error.RepoError(msg)
205 raise error.RepoError(msg)
206 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
206 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
207 storevfs = vfsclass(self.vfs.join(b'store'))
207 storevfs = vfsclass(self.vfs.join(b'store'))
208 requirements |= set(storevfs.read(b'requires').splitlines())
208 requirements |= set(storevfs.read(b'requires').splitlines())
209
209
210 supportedrequirements = localrepo.gathersupportedrequirements(ui)
210 supportedrequirements = localrepo.gathersupportedrequirements(ui)
211 localrepo.ensurerequirementsrecognized(
211 localrepo.ensurerequirementsrecognized(
212 requirements, supportedrequirements
212 requirements, supportedrequirements
213 )
213 )
214 localrepo.ensurerequirementscompatible(ui, requirements)
214 localrepo.ensurerequirementscompatible(ui, requirements)
215 self.nodeconstants = sha1nodeconstants
215 self.nodeconstants = sha1nodeconstants
216 self.nullid = self.nodeconstants.nullid
216 self.nullid = self.nodeconstants.nullid
217
217
218 # setup store
218 # setup store
219 self.store = localrepo.makestore(requirements, self.path, vfsclass)
219 self.store = localrepo.makestore(requirements, self.path, vfsclass)
220 self.spath = self.store.path
220 self.spath = self.store.path
221 self.svfs = self.store.opener
221 self.svfs = self.store.opener
222 self.sjoin = self.store.join
222 self.sjoin = self.store.join
223 self._filecache = {}
223 self._filecache = {}
224 self.requirements = requirements
224 self.requirements = requirements
225
225
226 rootmanifest = manifest.manifestrevlog(self.nodeconstants, self.svfs)
226 rootmanifest = manifest.manifestrevlog(self.nodeconstants, self.svfs)
227 self.manifestlog = manifest.manifestlog(
227 self.manifestlog = manifest.manifestlog(
228 self.svfs, self, rootmanifest, self.narrowmatch()
228 self.svfs, self, rootmanifest, self.narrowmatch()
229 )
229 )
230 self.changelog = changelog.changelog(self.svfs)
230 self.changelog = changelog.changelog(self.svfs)
231 self._tags = None
231 self._tags = None
232 self.nodetagscache = None
232 self.nodetagscache = None
233 self._branchcaches = branchmap.BranchMapCache()
233 self._branchcaches = branchmap.BranchMapCache()
234 self._revbranchcache = None
234 self._revbranchcache = None
235 self.encodepats = None
235 self.encodepats = None
236 self.decodepats = None
236 self.decodepats = None
237 self._transref = None
237 self._transref = None
238 self._dirstate = None
238 self._dirstate = None
239
239
240 def _restrictcapabilities(self, caps):
240 def _restrictcapabilities(self, caps):
241 caps = super(statichttprepository, self)._restrictcapabilities(caps)
241 caps = super(statichttprepository, self)._restrictcapabilities(caps)
242 return caps.difference([b"pushkey"])
242 return caps.difference([b"pushkey"])
243
243
244 def url(self):
244 def url(self):
245 return self._url
245 return self._url
246
246
247 def local(self):
247 def local(self):
248 return False
248 return False
249
249
250 def peer(self, path=None, remotehidden=False):
250 def peer(self, path=None, remotehidden=False):
251 return statichttppeer(self, path=path, remotehidden=remotehidden)
251 return statichttppeer(self, path=path, remotehidden=remotehidden)
252
252
253 def wlock(self, wait=True):
253 def wlock(self, wait=True):
254 raise error.LockUnavailable(
254 raise error.LockUnavailable(
255 0,
255 0,
256 pycompat.sysstr(_(b'lock not available')),
256 pycompat.sysstr(_(b'lock not available')),
257 b'lock',
257 b'lock',
258 _(b'cannot lock static-http repository'),
258 _(b'cannot lock static-http repository'),
259 )
259 )
260
260
261 def lock(self, wait=True):
261 def lock(self, wait=True):
262 raise error.LockUnavailable(
262 raise error.LockUnavailable(
263 0,
263 0,
264 pycompat.sysstr(_(b'lock not available')),
264 pycompat.sysstr(_(b'lock not available')),
265 b'lock',
265 b'lock',
266 _(b'cannot lock static-http repository'),
266 _(b'cannot lock static-http repository'),
267 )
267 )
268
268
269 def _writecaches(self):
269 def _writecaches(self):
270 pass # statichttprepository are read only
270 pass # statichttprepository are read only
271
271
272
272
273 def make_peer(
273 def make_peer(
274 ui, path, create, intents=None, createopts=None, remotehidden=False
274 ui, path, create, intents=None, createopts=None, remotehidden=False
275 ):
275 ):
276 if create:
276 if create:
277 raise error.Abort(_(b'cannot create new static-http repository'))
277 raise error.Abort(_(b'cannot create new static-http repository'))
278 url = path.loc[7:]
278 url = path.loc[7:]
279 return statichttprepository(ui, url).peer(
279 return statichttprepository(ui, url).peer(
280 path=path, remotehidden=remotehidden
280 path=path, remotehidden=remotehidden
281 )
281 )
@@ -1,1258 +1,1258
1 # store.py - repository store handling for Mercurial)
1 # store.py - repository store handling for Mercurial)
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import annotations
8 from __future__ import annotations
9
9
10 import collections
10 import collections
11 import functools
11 import functools
12 import os
12 import os
13 import re
13 import re
14 import stat
14 import stat
15 import typing
15 import typing
16
16
17 from typing import (
17 from typing import (
18 Generator,
18 Generator,
19 List,
19 List,
20 Optional,
20 Optional,
21 )
21 )
22
22
23 from .i18n import _
23 from .i18n import _
24 from .thirdparty import attr
24 from .thirdparty import attr
25
25
26 # Force pytype to use the non-vendored package
26 # Force pytype to use the non-vendored package
27 if typing.TYPE_CHECKING:
27 if typing.TYPE_CHECKING:
28 # noinspection PyPackageRequirements
28 # noinspection PyPackageRequirements
29 import attr
29 import attr
30
30
31 from .node import hex
31 from .node import hex
32 from .revlogutils.constants import (
32 from .revlogutils.constants import (
33 INDEX_HEADER,
33 INDEX_HEADER,
34 KIND_CHANGELOG,
34 KIND_CHANGELOG,
35 KIND_FILELOG,
35 KIND_FILELOG,
36 KIND_MANIFESTLOG,
36 KIND_MANIFESTLOG,
37 )
37 )
38 from . import (
38 from . import (
39 changelog,
39 changelog,
40 error,
40 error,
41 filelog,
41 filelog,
42 manifest,
42 manifest,
43 policy,
43 policy,
44 pycompat,
44 pycompat,
45 revlog as revlogmod,
45 revlog as revlogmod,
46 util,
46 util,
47 vfs as vfsmod,
47 vfs as vfsmod,
48 )
48 )
49 from .utils import hashutil
49 from .utils import hashutil
50
50
51 parsers = policy.importmod('parsers')
51 parsers = policy.importmod('parsers')
52 # how much bytes should be read from fncache in one read
52 # how much bytes should be read from fncache in one read
53 # It is done to prevent loading large fncache files into memory
53 # It is done to prevent loading large fncache files into memory
54 fncache_chunksize = 10**6
54 fncache_chunksize = 10**6
55
55
56
56
57 def _match_tracked_entry(entry: "BaseStoreEntry", matcher):
57 def _match_tracked_entry(entry: "BaseStoreEntry", matcher):
58 """parses a fncache entry and returns whether the entry is tracking a path
58 """parses a fncache entry and returns whether the entry is tracking a path
59 matched by matcher or not.
59 matched by matcher or not.
60
60
61 If matcher is None, returns True"""
61 If matcher is None, returns True"""
62
62
63 if matcher is None:
63 if matcher is None:
64 return True
64 return True
65
65
66 # TODO: make this safe for other entry types. Currently, the various
66 # TODO: make this safe for other entry types. Currently, the various
67 # store.data_entry generators only yield RevlogStoreEntry, so the
67 # store.data_entry generators only yield RevlogStoreEntry, so the
68 # attributes do exist on `entry`.
68 # attributes do exist on `entry`.
69 # pytype: disable=attribute-error
69 # pytype: disable=attribute-error
70 if entry.is_filelog:
70 if entry.is_filelog:
71 return matcher(entry.target_id)
71 return matcher(entry.target_id)
72 elif entry.is_manifestlog:
72 elif entry.is_manifestlog:
73 return matcher.visitdir(entry.target_id.rstrip(b'/'))
73 return matcher.visitdir(entry.target_id.rstrip(b'/'))
74 # pytype: enable=attribute-error
74 # pytype: enable=attribute-error
75 raise error.ProgrammingError(b"cannot process entry %r" % entry)
75 raise error.ProgrammingError(b"cannot process entry %r" % entry)
76
76
77
77
78 # This avoids a collision between a file named foo and a dir named
78 # This avoids a collision between a file named foo and a dir named
79 # foo.i or foo.d
79 # foo.i or foo.d
80 def _encodedir(path):
80 def _encodedir(path):
81 """
81 """
82 >>> _encodedir(b'data/foo.i')
82 >>> _encodedir(b'data/foo.i')
83 'data/foo.i'
83 'data/foo.i'
84 >>> _encodedir(b'data/foo.i/bla.i')
84 >>> _encodedir(b'data/foo.i/bla.i')
85 'data/foo.i.hg/bla.i'
85 'data/foo.i.hg/bla.i'
86 >>> _encodedir(b'data/foo.i.hg/bla.i')
86 >>> _encodedir(b'data/foo.i.hg/bla.i')
87 'data/foo.i.hg.hg/bla.i'
87 'data/foo.i.hg.hg/bla.i'
88 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
88 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
89 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
89 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
90 """
90 """
91 return (
91 return (
92 path.replace(b".hg/", b".hg.hg/")
92 path.replace(b".hg/", b".hg.hg/")
93 .replace(b".i/", b".i.hg/")
93 .replace(b".i/", b".i.hg/")
94 .replace(b".d/", b".d.hg/")
94 .replace(b".d/", b".d.hg/")
95 )
95 )
96
96
97
97
98 encodedir = getattr(parsers, 'encodedir', _encodedir)
98 encodedir = getattr(parsers, 'encodedir', _encodedir)
99
99
100
100
101 def decodedir(path):
101 def decodedir(path):
102 """
102 """
103 >>> decodedir(b'data/foo.i')
103 >>> decodedir(b'data/foo.i')
104 'data/foo.i'
104 'data/foo.i'
105 >>> decodedir(b'data/foo.i.hg/bla.i')
105 >>> decodedir(b'data/foo.i.hg/bla.i')
106 'data/foo.i/bla.i'
106 'data/foo.i/bla.i'
107 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
107 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
108 'data/foo.i.hg/bla.i'
108 'data/foo.i.hg/bla.i'
109 """
109 """
110 if b".hg/" not in path:
110 if b".hg/" not in path:
111 return path
111 return path
112 return (
112 return (
113 path.replace(b".d.hg/", b".d/")
113 path.replace(b".d.hg/", b".d/")
114 .replace(b".i.hg/", b".i/")
114 .replace(b".i.hg/", b".i/")
115 .replace(b".hg.hg/", b".hg/")
115 .replace(b".hg.hg/", b".hg/")
116 )
116 )
117
117
118
118
119 def _reserved():
119 def _reserved():
120 """characters that are problematic for filesystems
120 """characters that are problematic for filesystems
121
121
122 * ascii escapes (0..31)
122 * ascii escapes (0..31)
123 * ascii hi (126..255)
123 * ascii hi (126..255)
124 * windows specials
124 * windows specials
125
125
126 these characters will be escaped by encodefunctions
126 these characters will be escaped by encodefunctions
127 """
127 """
128 winreserved = [ord(x) for x in u'\\:*?"<>|']
128 winreserved = [ord(x) for x in u'\\:*?"<>|']
129 for x in range(32):
129 for x in range(32):
130 yield x
130 yield x
131 for x in range(126, 256):
131 for x in range(126, 256):
132 yield x
132 yield x
133 for x in winreserved:
133 for x in winreserved:
134 yield x
134 yield x
135
135
136
136
137 def _buildencodefun():
137 def _buildencodefun():
138 """
138 """
139 >>> enc, dec = _buildencodefun()
139 >>> enc, dec = _buildencodefun()
140
140
141 >>> enc(b'nothing/special.txt')
141 >>> enc(b'nothing/special.txt')
142 'nothing/special.txt'
142 'nothing/special.txt'
143 >>> dec(b'nothing/special.txt')
143 >>> dec(b'nothing/special.txt')
144 'nothing/special.txt'
144 'nothing/special.txt'
145
145
146 >>> enc(b'HELLO')
146 >>> enc(b'HELLO')
147 '_h_e_l_l_o'
147 '_h_e_l_l_o'
148 >>> dec(b'_h_e_l_l_o')
148 >>> dec(b'_h_e_l_l_o')
149 'HELLO'
149 'HELLO'
150
150
151 >>> enc(b'hello:world?')
151 >>> enc(b'hello:world?')
152 'hello~3aworld~3f'
152 'hello~3aworld~3f'
153 >>> dec(b'hello~3aworld~3f')
153 >>> dec(b'hello~3aworld~3f')
154 'hello:world?'
154 'hello:world?'
155
155
156 >>> enc(b'the\\x07quick\\xADshot')
156 >>> enc(b'the\\x07quick\\xADshot')
157 'the~07quick~adshot'
157 'the~07quick~adshot'
158 >>> dec(b'the~07quick~adshot')
158 >>> dec(b'the~07quick~adshot')
159 'the\\x07quick\\xadshot'
159 'the\\x07quick\\xadshot'
160 """
160 """
161 e = b'_'
161 e = b'_'
162 xchr = pycompat.bytechr
162 xchr = pycompat.bytechr
163 asciistr = list(map(xchr, range(127)))
163 asciistr = list(map(xchr, range(127)))
164 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
164 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
165
165
166 cmap = {x: x for x in asciistr}
166 cmap = {x: x for x in asciistr}
167 for x in _reserved():
167 for x in _reserved():
168 cmap[xchr(x)] = b"~%02x" % x
168 cmap[xchr(x)] = b"~%02x" % x
169 for x in capitals + [ord(e)]:
169 for x in capitals + [ord(e)]:
170 cmap[xchr(x)] = e + xchr(x).lower()
170 cmap[xchr(x)] = e + xchr(x).lower()
171
171
172 dmap = {}
172 dmap = {}
173 for k, v in cmap.items():
173 for k, v in cmap.items():
174 dmap[v] = k
174 dmap[v] = k
175
175
176 def decode(s):
176 def decode(s):
177 i = 0
177 i = 0
178 while i < len(s):
178 while i < len(s):
179 for l in range(1, 4):
179 for l in range(1, 4):
180 try:
180 try:
181 yield dmap[s[i : i + l]]
181 yield dmap[s[i : i + l]]
182 i += l
182 i += l
183 break
183 break
184 except KeyError:
184 except KeyError:
185 pass
185 pass
186 else:
186 else:
187 raise KeyError
187 raise KeyError
188
188
189 return (
189 return (
190 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
190 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
191 lambda s: b''.join(list(decode(s))),
191 lambda s: b''.join(list(decode(s))),
192 )
192 )
193
193
194
194
195 _encodefname, _decodefname = _buildencodefun()
195 _encodefname, _decodefname = _buildencodefun()
196
196
197
197
198 def encodefilename(s):
198 def encodefilename(s):
199 """
199 """
200 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
200 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
201 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
201 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
202 """
202 """
203 return _encodefname(encodedir(s))
203 return _encodefname(encodedir(s))
204
204
205
205
206 def decodefilename(s):
206 def decodefilename(s):
207 """
207 """
208 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
208 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
209 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
209 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
210 """
210 """
211 return decodedir(_decodefname(s))
211 return decodedir(_decodefname(s))
212
212
213
213
214 def _buildlowerencodefun():
214 def _buildlowerencodefun():
215 """
215 """
216 >>> f = _buildlowerencodefun()
216 >>> f = _buildlowerencodefun()
217 >>> f(b'nothing/special.txt')
217 >>> f(b'nothing/special.txt')
218 'nothing/special.txt'
218 'nothing/special.txt'
219 >>> f(b'HELLO')
219 >>> f(b'HELLO')
220 'hello'
220 'hello'
221 >>> f(b'hello:world?')
221 >>> f(b'hello:world?')
222 'hello~3aworld~3f'
222 'hello~3aworld~3f'
223 >>> f(b'the\\x07quick\\xADshot')
223 >>> f(b'the\\x07quick\\xADshot')
224 'the~07quick~adshot'
224 'the~07quick~adshot'
225 """
225 """
226 xchr = pycompat.bytechr
226 xchr = pycompat.bytechr
227 cmap = {xchr(x): xchr(x) for x in range(127)}
227 cmap = {xchr(x): xchr(x) for x in range(127)}
228 for x in _reserved():
228 for x in _reserved():
229 cmap[xchr(x)] = b"~%02x" % x
229 cmap[xchr(x)] = b"~%02x" % x
230 for x in range(ord(b"A"), ord(b"Z") + 1):
230 for x in range(ord(b"A"), ord(b"Z") + 1):
231 cmap[xchr(x)] = xchr(x).lower()
231 cmap[xchr(x)] = xchr(x).lower()
232
232
233 def lowerencode(s):
233 def lowerencode(s):
234 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
234 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
235
235
236 return lowerencode
236 return lowerencode
237
237
238
238
239 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
239 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
240
240
241 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
241 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
242 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
242 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
243 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
243 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
244
244
245
245
246 def _auxencode(path, dotencode):
246 def _auxencode(path, dotencode):
247 """
247 """
248 Encodes filenames containing names reserved by Windows or which end in
248 Encodes filenames containing names reserved by Windows or which end in
249 period or space. Does not touch other single reserved characters c.
249 period or space. Does not touch other single reserved characters c.
250 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
250 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
251 Additionally encodes space or period at the beginning, if dotencode is
251 Additionally encodes space or period at the beginning, if dotencode is
252 True. Parameter path is assumed to be all lowercase.
252 True. Parameter path is assumed to be all lowercase.
253 A segment only needs encoding if a reserved name appears as a
253 A segment only needs encoding if a reserved name appears as a
254 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
254 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
255 doesn't need encoding.
255 doesn't need encoding.
256
256
257 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
257 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
258 >>> _auxencode(s.split(b'/'), True)
258 >>> _auxencode(s.split(b'/'), True)
259 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
259 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
260 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
260 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
261 >>> _auxencode(s.split(b'/'), False)
261 >>> _auxencode(s.split(b'/'), False)
262 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
262 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
263 >>> _auxencode([b'foo. '], True)
263 >>> _auxencode([b'foo. '], True)
264 ['foo.~20']
264 ['foo.~20']
265 >>> _auxencode([b' .foo'], True)
265 >>> _auxencode([b' .foo'], True)
266 ['~20.foo']
266 ['~20.foo']
267 """
267 """
268 for i, n in enumerate(path):
268 for i, n in enumerate(path):
269 if not n:
269 if not n:
270 continue
270 continue
271 if dotencode and n[0] in b'. ':
271 if dotencode and n[0] in b'. ':
272 n = b"~%02x" % ord(n[0:1]) + n[1:]
272 n = b"~%02x" % ord(n[0:1]) + n[1:]
273 path[i] = n
273 path[i] = n
274 else:
274 else:
275 l = n.find(b'.')
275 l = n.find(b'.')
276 if l == -1:
276 if l == -1:
277 l = len(n)
277 l = len(n)
278 if (l == 3 and n[:3] in _winres3) or (
278 if (l == 3 and n[:3] in _winres3) or (
279 l == 4
279 l == 4
280 and n[3:4] <= b'9'
280 and n[3:4] <= b'9'
281 and n[3:4] >= b'1'
281 and n[3:4] >= b'1'
282 and n[:3] in _winres4
282 and n[:3] in _winres4
283 ):
283 ):
284 # encode third letter ('aux' -> 'au~78')
284 # encode third letter ('aux' -> 'au~78')
285 ec = b"~%02x" % ord(n[2:3])
285 ec = b"~%02x" % ord(n[2:3])
286 n = n[0:2] + ec + n[3:]
286 n = n[0:2] + ec + n[3:]
287 path[i] = n
287 path[i] = n
288 if n[-1] in b'. ':
288 if n[-1] in b'. ':
289 # encode last period or space ('foo...' -> 'foo..~2e')
289 # encode last period or space ('foo...' -> 'foo..~2e')
290 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
290 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
291 return path
291 return path
292
292
293
293
294 _maxstorepathlen = 120
294 _maxstorepathlen = 120
295 _dirprefixlen = 8
295 _dirprefixlen = 8
296 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
296 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
297
297
298
298
299 def _hashencode(path, dotencode):
299 def _hashencode(path, dotencode):
300 digest = hex(hashutil.sha1(path).digest())
300 digest = hex(hashutil.sha1(path).digest())
301 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
301 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
302 parts = _auxencode(le, dotencode)
302 parts = _auxencode(le, dotencode)
303 basename = parts[-1]
303 basename = parts[-1]
304 _root, ext = os.path.splitext(basename)
304 _root, ext = os.path.splitext(basename)
305 sdirs = []
305 sdirs = []
306 sdirslen = 0
306 sdirslen = 0
307 for p in parts[:-1]:
307 for p in parts[:-1]:
308 d = p[:_dirprefixlen]
308 d = p[:_dirprefixlen]
309 if d[-1] in b'. ':
309 if d[-1] in b'. ':
310 # Windows can't access dirs ending in period or space
310 # Windows can't access dirs ending in period or space
311 d = d[:-1] + b'_'
311 d = d[:-1] + b'_'
312 if sdirslen == 0:
312 if sdirslen == 0:
313 t = len(d)
313 t = len(d)
314 else:
314 else:
315 t = sdirslen + 1 + len(d)
315 t = sdirslen + 1 + len(d)
316 if t > _maxshortdirslen:
316 if t > _maxshortdirslen:
317 break
317 break
318 sdirs.append(d)
318 sdirs.append(d)
319 sdirslen = t
319 sdirslen = t
320 dirs = b'/'.join(sdirs)
320 dirs = b'/'.join(sdirs)
321 if len(dirs) > 0:
321 if len(dirs) > 0:
322 dirs += b'/'
322 dirs += b'/'
323 res = b'dh/' + dirs + digest + ext
323 res = b'dh/' + dirs + digest + ext
324 spaceleft = _maxstorepathlen - len(res)
324 spaceleft = _maxstorepathlen - len(res)
325 if spaceleft > 0:
325 if spaceleft > 0:
326 filler = basename[:spaceleft]
326 filler = basename[:spaceleft]
327 res = b'dh/' + dirs + filler + digest + ext
327 res = b'dh/' + dirs + filler + digest + ext
328 return res
328 return res
329
329
330
330
331 def _hybridencode(path, dotencode):
331 def _hybridencode(path, dotencode):
332 """encodes path with a length limit
332 """encodes path with a length limit
333
333
334 Encodes all paths that begin with 'data/', according to the following.
334 Encodes all paths that begin with 'data/', according to the following.
335
335
336 Default encoding (reversible):
336 Default encoding (reversible):
337
337
338 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
338 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
339 characters are encoded as '~xx', where xx is the two digit hex code
339 characters are encoded as '~xx', where xx is the two digit hex code
340 of the character (see encodefilename).
340 of the character (see encodefilename).
341 Relevant path components consisting of Windows reserved filenames are
341 Relevant path components consisting of Windows reserved filenames are
342 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
342 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
343
343
344 Hashed encoding (not reversible):
344 Hashed encoding (not reversible):
345
345
346 If the default-encoded path is longer than _maxstorepathlen, a
346 If the default-encoded path is longer than _maxstorepathlen, a
347 non-reversible hybrid hashing of the path is done instead.
347 non-reversible hybrid hashing of the path is done instead.
348 This encoding uses up to _dirprefixlen characters of all directory
348 This encoding uses up to _dirprefixlen characters of all directory
349 levels of the lowerencoded path, but not more levels than can fit into
349 levels of the lowerencoded path, but not more levels than can fit into
350 _maxshortdirslen.
350 _maxshortdirslen.
351 Then follows the filler followed by the sha digest of the full path.
351 Then follows the filler followed by the sha digest of the full path.
352 The filler is the beginning of the basename of the lowerencoded path
352 The filler is the beginning of the basename of the lowerencoded path
353 (the basename is everything after the last path separator). The filler
353 (the basename is everything after the last path separator). The filler
354 is as long as possible, filling in characters from the basename until
354 is as long as possible, filling in characters from the basename until
355 the encoded path has _maxstorepathlen characters (or all chars of the
355 the encoded path has _maxstorepathlen characters (or all chars of the
356 basename have been taken).
356 basename have been taken).
357 The extension (e.g. '.i' or '.d') is preserved.
357 The extension (e.g. '.i' or '.d') is preserved.
358
358
359 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
359 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
360 encoding was used.
360 encoding was used.
361 """
361 """
362 path = encodedir(path)
362 path = encodedir(path)
363 ef = _encodefname(path).split(b'/')
363 ef = _encodefname(path).split(b'/')
364 res = b'/'.join(_auxencode(ef, dotencode))
364 res = b'/'.join(_auxencode(ef, dotencode))
365 if len(res) > _maxstorepathlen:
365 if len(res) > _maxstorepathlen:
366 res = _hashencode(path, dotencode)
366 res = _hashencode(path, dotencode)
367 return res
367 return res
368
368
369
369
370 def _pathencode(path):
370 def _pathencode(path):
371 de = encodedir(path)
371 de = encodedir(path)
372 if len(path) > _maxstorepathlen:
372 if len(path) > _maxstorepathlen:
373 return _hashencode(de, True)
373 return _hashencode(de, True)
374 ef = _encodefname(de).split(b'/')
374 ef = _encodefname(de).split(b'/')
375 res = b'/'.join(_auxencode(ef, True))
375 res = b'/'.join(_auxencode(ef, True))
376 if len(res) > _maxstorepathlen:
376 if len(res) > _maxstorepathlen:
377 return _hashencode(de, True)
377 return _hashencode(de, True)
378 return res
378 return res
379
379
380
380
381 _pathencode = getattr(parsers, 'pathencode', _pathencode)
381 _pathencode = getattr(parsers, 'pathencode', _pathencode)
382
382
383
383
384 def _plainhybridencode(f):
384 def _plainhybridencode(f):
385 return _hybridencode(f, False)
385 return _hybridencode(f, False)
386
386
387
387
388 def _calcmode(vfs):
388 def _calcmode(vfs):
389 try:
389 try:
390 # files in .hg/ will be created using this mode
390 # files in .hg/ will be created using this mode
391 mode = vfs.stat().st_mode
391 mode = vfs.stat().st_mode
392 # avoid some useless chmods
392 # avoid some useless chmods
393 if (0o777 & ~util.umask) == (0o777 & mode):
393 if (0o777 & ~util.umask) == (0o777 & mode):
394 mode = None
394 mode = None
395 except OSError:
395 except OSError:
396 mode = None
396 mode = None
397 return mode
397 return mode
398
398
399
399
400 _data = [
400 _data = [
401 b'bookmarks',
401 b'bookmarks',
402 b'narrowspec',
402 b'narrowspec',
403 b'data',
403 b'data',
404 b'meta',
404 b'meta',
405 b'00manifest.d',
405 b'00manifest.d',
406 b'00manifest.i',
406 b'00manifest.i',
407 b'00changelog.d',
407 b'00changelog.d',
408 b'00changelog.i',
408 b'00changelog.i',
409 b'phaseroots',
409 b'phaseroots',
410 b'obsstore',
410 b'obsstore',
411 b'requires',
411 b'requires',
412 ]
412 ]
413
413
414 REVLOG_FILES_EXT = (
414 REVLOG_FILES_EXT = (
415 b'.i',
415 b'.i',
416 b'.idx',
416 b'.idx',
417 b'.d',
417 b'.d',
418 b'.dat',
418 b'.dat',
419 b'.n',
419 b'.n',
420 b'.nd',
420 b'.nd',
421 b'.sda',
421 b'.sda',
422 )
422 )
423 # file extension that also use a `-SOMELONGIDHASH.ext` form
423 # file extension that also use a `-SOMELONGIDHASH.ext` form
424 REVLOG_FILES_LONG_EXT = (
424 REVLOG_FILES_LONG_EXT = (
425 b'.nd',
425 b'.nd',
426 b'.idx',
426 b'.idx',
427 b'.dat',
427 b'.dat',
428 b'.sda',
428 b'.sda',
429 )
429 )
430 # files that are "volatile" and might change between listing and streaming
430 # files that are "volatile" and might change between listing and streaming
431 #
431 #
432 # note: the ".nd" file are nodemap data and won't "change" but they might be
432 # note: the ".nd" file are nodemap data and won't "change" but they might be
433 # deleted.
433 # deleted.
434 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
434 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
435
435
436 # some exception to the above matching
436 # some exception to the above matching
437 #
437 #
438 # XXX This is currently not in use because of issue6542
438 # XXX This is currently not in use because of issue6542
439 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
439 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
440
440
441
441
442 def is_revlog(f, kind, st):
442 def is_revlog(f, kind, st):
443 if kind != stat.S_IFREG:
443 if kind != stat.S_IFREG:
444 return False
444 return False
445 if f.endswith(REVLOG_FILES_EXT):
445 if f.endswith(REVLOG_FILES_EXT):
446 return True
446 return True
447 return False
447 return False
448
448
449
449
450 def is_revlog_file(f):
450 def is_revlog_file(f):
451 if f.endswith(REVLOG_FILES_EXT):
451 if f.endswith(REVLOG_FILES_EXT):
452 return True
452 return True
453 return False
453 return False
454
454
455
455
456 @attr.s(slots=True)
456 @attr.s(slots=True)
457 class StoreFile:
457 class StoreFile:
458 """a file matching a store entry"""
458 """a file matching a store entry"""
459
459
460 unencoded_path = attr.ib()
460 unencoded_path = attr.ib()
461 _file_size = attr.ib(default=None)
461 _file_size = attr.ib(default=None)
462 is_volatile = attr.ib(default=False)
462 is_volatile = attr.ib(default=False)
463
463
464 def file_size(self, vfs):
464 def file_size(self, vfs):
465 if self._file_size is None:
465 if self._file_size is None:
466 if vfs is None:
466 if vfs is None:
467 msg = b"calling vfs-less file_size without prior call: %s"
467 msg = b"calling vfs-less file_size without prior call: %s"
468 msg %= self.unencoded_path
468 msg %= self.unencoded_path
469 raise error.ProgrammingError(msg)
469 raise error.ProgrammingError(msg)
470 try:
470 try:
471 self._file_size = vfs.stat(self.unencoded_path).st_size
471 self._file_size = vfs.stat(self.unencoded_path).st_size
472 except FileNotFoundError:
472 except FileNotFoundError:
473 self._file_size = 0
473 self._file_size = 0
474 return self._file_size
474 return self._file_size
475
475
476 @property
476 @property
477 def has_size(self):
477 def has_size(self):
478 return self._file_size is not None
478 return self._file_size is not None
479
479
480 def get_stream(self, vfs, volatiles):
480 def get_stream(self, vfs, volatiles):
481 """return data "stream" information for this file
481 """return data "stream" information for this file
482
482
483 (unencoded_file_path, content_iterator, content_size)
483 (unencoded_file_path, content_iterator, content_size)
484 """
484 """
485 size = self.file_size(None)
485 size = self.file_size(None)
486
486
487 def get_stream():
487 def get_stream():
488 path = vfs.join(self.unencoded_path)
488 path = vfs.join(self.unencoded_path)
489 with volatiles.open(path) as fp:
489 with volatiles.open(path) as fp:
490 yield None # ready to stream
490 yield None # ready to stream
491 if size <= 65536:
491 if size <= 65536:
492 yield fp.read(size)
492 yield fp.read(size)
493 else:
493 else:
494 yield from util.filechunkiter(fp, limit=size)
494 yield from util.filechunkiter(fp, limit=size)
495
495
496 s = get_stream()
496 s = get_stream()
497 next(s)
497 next(s)
498 return (self.unencoded_path, s, size)
498 return (self.unencoded_path, s, size)
499
499
500
500
501 @attr.s(slots=True, init=False)
501 @attr.s(slots=True, init=False)
502 class BaseStoreEntry:
502 class BaseStoreEntry:
503 """An entry in the store
503 """An entry in the store
504
504
505 This is returned by `store.walk` and represent some data in the store."""
505 This is returned by `store.walk` and represent some data in the store."""
506
506
507 maybe_volatile = True
507 maybe_volatile = True
508
508
509 def files(self) -> List[StoreFile]:
509 def files(self) -> List[StoreFile]:
510 raise NotImplementedError
510 raise NotImplementedError
511
511
512 def get_streams(
512 def get_streams(
513 self,
513 self,
514 repo=None,
514 repo=None,
515 vfs=None,
515 vfs=None,
516 volatiles=None,
516 volatiles=None,
517 max_changeset=None,
517 max_changeset=None,
518 preserve_file_count=False,
518 preserve_file_count=False,
519 ):
519 ):
520 """return a list of data stream associated to files for this entry
520 """return a list of data stream associated to files for this entry
521
521
522 return [(unencoded_file_path, content_iterator, content_size), …]
522 return [(unencoded_file_path, content_iterator, content_size), …]
523 """
523 """
524 assert vfs is not None
524 assert vfs is not None
525 return [f.get_stream(vfs, volatiles) for f in self.files()]
525 return [f.get_stream(vfs, volatiles) for f in self.files()]
526
526
527
527
528 @attr.s(slots=True, init=False)
528 @attr.s(slots=True, init=False)
529 class SimpleStoreEntry(BaseStoreEntry):
529 class SimpleStoreEntry(BaseStoreEntry):
530 """A generic entry in the store"""
530 """A generic entry in the store"""
531
531
532 is_revlog = False
532 is_revlog = False
533
533
534 maybe_volatile = attr.ib()
534 maybe_volatile = attr.ib()
535 _entry_path = attr.ib()
535 _entry_path = attr.ib()
536 _is_volatile = attr.ib(default=False)
536 _is_volatile = attr.ib(default=False)
537 _file_size = attr.ib(default=None)
537 _file_size = attr.ib(default=None)
538 _files = attr.ib(default=None)
538 _files = attr.ib(default=None)
539
539
540 def __init__(
540 def __init__(
541 self,
541 self,
542 entry_path,
542 entry_path,
543 is_volatile=False,
543 is_volatile=False,
544 file_size=None,
544 file_size=None,
545 ):
545 ):
546 super().__init__()
546 super().__init__()
547 self._entry_path = entry_path
547 self._entry_path = entry_path
548 self._is_volatile = is_volatile
548 self._is_volatile = is_volatile
549 self._file_size = file_size
549 self._file_size = file_size
550 self._files = None
550 self._files = None
551 self.maybe_volatile = is_volatile
551 self.maybe_volatile = is_volatile
552
552
553 def files(self) -> List[StoreFile]:
553 def files(self) -> List[StoreFile]:
554 if self._files is None:
554 if self._files is None:
555 self._files = [
555 self._files = [
556 StoreFile(
556 StoreFile(
557 unencoded_path=self._entry_path,
557 unencoded_path=self._entry_path,
558 file_size=self._file_size,
558 file_size=self._file_size,
559 is_volatile=self._is_volatile,
559 is_volatile=self._is_volatile,
560 )
560 )
561 ]
561 ]
562 return self._files
562 return self._files
563
563
564
564
565 @attr.s(slots=True, init=False)
565 @attr.s(slots=True, init=False)
566 class RevlogStoreEntry(BaseStoreEntry):
566 class RevlogStoreEntry(BaseStoreEntry):
567 """A revlog entry in the store"""
567 """A revlog entry in the store"""
568
568
569 is_revlog = True
569 is_revlog = True
570
570
571 revlog_type = attr.ib(default=None)
571 revlog_type = attr.ib(default=None)
572 target_id = attr.ib(default=None)
572 target_id = attr.ib(default=None)
573 maybe_volatile = attr.ib(default=True)
573 maybe_volatile = attr.ib(default=True)
574 _path_prefix = attr.ib(default=None)
574 _path_prefix = attr.ib(default=None)
575 _details = attr.ib(default=None)
575 _details = attr.ib(default=None)
576 _files = attr.ib(default=None)
576 _files = attr.ib(default=None)
577
577
578 def __init__(
578 def __init__(
579 self,
579 self,
580 revlog_type,
580 revlog_type,
581 path_prefix,
581 path_prefix,
582 target_id,
582 target_id,
583 details,
583 details,
584 ):
584 ):
585 super().__init__()
585 super().__init__()
586 self.revlog_type = revlog_type
586 self.revlog_type = revlog_type
587 self.target_id = target_id
587 self.target_id = target_id
588 self._path_prefix = path_prefix
588 self._path_prefix = path_prefix
589 assert b'.i' in details, (path_prefix, details)
589 assert b'.i' in details, (path_prefix, details)
590 for ext in details:
590 for ext in details:
591 if ext.endswith(REVLOG_FILES_VOLATILE_EXT):
591 if ext.endswith(REVLOG_FILES_VOLATILE_EXT):
592 self.maybe_volatile = True
592 self.maybe_volatile = True
593 break
593 break
594 else:
594 else:
595 self.maybe_volatile = False
595 self.maybe_volatile = False
596 self._details = details
596 self._details = details
597 self._files = None
597 self._files = None
598
598
599 @property
599 @property
600 def is_changelog(self):
600 def is_changelog(self):
601 return self.revlog_type == KIND_CHANGELOG
601 return self.revlog_type == KIND_CHANGELOG
602
602
603 @property
603 @property
604 def is_manifestlog(self):
604 def is_manifestlog(self):
605 return self.revlog_type == KIND_MANIFESTLOG
605 return self.revlog_type == KIND_MANIFESTLOG
606
606
607 @property
607 @property
608 def is_filelog(self):
608 def is_filelog(self):
609 return self.revlog_type == KIND_FILELOG
609 return self.revlog_type == KIND_FILELOG
610
610
611 def main_file_path(self):
611 def main_file_path(self):
612 """unencoded path of the main revlog file"""
612 """unencoded path of the main revlog file"""
613 return self._path_prefix + b'.i'
613 return self._path_prefix + b'.i'
614
614
615 def files(self) -> List[StoreFile]:
615 def files(self) -> List[StoreFile]:
616 if self._files is None:
616 if self._files is None:
617 self._files = []
617 self._files = []
618 for ext in sorted(self._details, key=_ext_key):
618 for ext in sorted(self._details, key=_ext_key):
619 path = self._path_prefix + ext
619 path = self._path_prefix + ext
620 file_size = self._details[ext]
620 file_size = self._details[ext]
621 # files that are "volatile" and might change between
621 # files that are "volatile" and might change between
622 # listing and streaming
622 # listing and streaming
623 #
623 #
624 # note: the ".nd" file are nodemap data and won't "change"
624 # note: the ".nd" file are nodemap data and won't "change"
625 # but they might be deleted.
625 # but they might be deleted.
626 volatile = ext.endswith(REVLOG_FILES_VOLATILE_EXT)
626 volatile = ext.endswith(REVLOG_FILES_VOLATILE_EXT)
627 f = StoreFile(path, file_size, volatile)
627 f = StoreFile(path, file_size, volatile)
628 self._files.append(f)
628 self._files.append(f)
629 return self._files
629 return self._files
630
630
631 def get_streams(
631 def get_streams(
632 self,
632 self,
633 repo=None,
633 repo=None,
634 vfs=None,
634 vfs=None,
635 volatiles=None,
635 volatiles=None,
636 max_changeset=None,
636 max_changeset=None,
637 preserve_file_count=False,
637 preserve_file_count=False,
638 ):
638 ):
639 pre_sized = all(f.has_size for f in self.files())
639 pre_sized = all(f.has_size for f in self.files())
640 if pre_sized and (
640 if pre_sized and (
641 repo is None
641 repo is None
642 or max_changeset is None
642 or max_changeset is None
643 # This use revlog-v2, ignore for now
643 # This use revlog-v2, ignore for now
644 or any(k.endswith(b'.idx') for k in self._details.keys())
644 or any(k.endswith(b'.idx') for k in self._details.keys())
645 # This is not inline, no race expected
645 # This is not inline, no race expected
646 or b'.d' in self._details
646 or b'.d' in self._details
647 ):
647 ):
648 return super().get_streams(
648 return super().get_streams(
649 repo=repo,
649 repo=repo,
650 vfs=vfs,
650 vfs=vfs,
651 volatiles=volatiles,
651 volatiles=volatiles,
652 max_changeset=max_changeset,
652 max_changeset=max_changeset,
653 preserve_file_count=preserve_file_count,
653 preserve_file_count=preserve_file_count,
654 )
654 )
655 elif not preserve_file_count:
655 elif not preserve_file_count:
656 stream = [
656 stream = [
657 f.get_stream(vfs, volatiles)
657 f.get_stream(vfs, volatiles)
658 for f in self.files()
658 for f in self.files()
659 if not f.unencoded_path.endswith((b'.i', b'.d'))
659 if not f.unencoded_path.endswith((b'.i', b'.d'))
660 ]
660 ]
661 rl = self.get_revlog_instance(repo).get_revlog()
661 rl = self.get_revlog_instance(repo).get_revlog()
662 rl_stream = rl.get_streams(max_changeset)
662 rl_stream = rl.get_streams(max_changeset)
663 stream.extend(rl_stream)
663 stream.extend(rl_stream)
664 return stream
664 return stream
665
665
666 name_to_size = {}
666 name_to_size = {}
667 for f in self.files():
667 for f in self.files():
668 name_to_size[f.unencoded_path] = f.file_size(None)
668 name_to_size[f.unencoded_path] = f.file_size(None)
669
669
670 stream = [
670 stream = [
671 f.get_stream(vfs, volatiles)
671 f.get_stream(vfs, volatiles)
672 for f in self.files()
672 for f in self.files()
673 if not f.unencoded_path.endswith(b'.i')
673 if not f.unencoded_path.endswith(b'.i')
674 ]
674 ]
675
675
676 index_path = self._path_prefix + b'.i'
676 index_path = self._path_prefix + b'.i'
677
677
678 index_file = None
678 index_file = None
679 try:
679 try:
680 index_file = vfs(index_path)
680 index_file = vfs(index_path)
681 header = index_file.read(INDEX_HEADER.size)
681 header = index_file.read(INDEX_HEADER.size)
682 if revlogmod.revlog.is_inline_index(header):
682 if revlogmod.revlog.is_inline_index(header):
683 size = name_to_size[index_path]
683 size = name_to_size[index_path]
684
684
685 # no split underneath, just return the stream
685 # no split underneath, just return the stream
686 def get_stream():
686 def get_stream():
687 fp = index_file
687 fp = index_file
688 try:
688 try:
689 fp.seek(0)
689 fp.seek(0)
690 yield None
690 yield None
691 if size <= 65536:
691 if size <= 65536:
692 yield fp.read(size)
692 yield fp.read(size)
693 else:
693 else:
694 yield from util.filechunkiter(fp, limit=size)
694 yield from util.filechunkiter(fp, limit=size)
695 finally:
695 finally:
696 fp.close()
696 fp.close()
697
697
698 s = get_stream()
698 s = get_stream()
699 next(s)
699 next(s)
700 index_file = None
700 index_file = None
701 stream.append((index_path, s, size))
701 stream.append((index_path, s, size))
702 else:
702 else:
703 rl = self.get_revlog_instance(repo).get_revlog()
703 rl = self.get_revlog_instance(repo).get_revlog()
704 rl_stream = rl.get_streams(max_changeset, force_inline=True)
704 rl_stream = rl.get_streams(max_changeset, force_inline=True)
705 for name, s, size in rl_stream:
705 for name, s, size in rl_stream:
706 if name_to_size.get(name, 0) != size:
706 if name_to_size.get(name, 0) != size:
707 msg = _(b"expected %d bytes but %d provided for %s")
707 msg = _(b"expected %d bytes but %d provided for %s")
708 msg %= name_to_size.get(name, 0), size, name
708 msg %= name_to_size.get(name, 0), size, name
709 raise error.Abort(msg)
709 raise error.Abort(msg)
710 stream.extend(rl_stream)
710 stream.extend(rl_stream)
711 finally:
711 finally:
712 if index_file is not None:
712 if index_file is not None:
713 index_file.close()
713 index_file.close()
714
714
715 files = self.files()
715 files = self.files()
716 assert len(stream) == len(files), (
716 assert len(stream) == len(files), (
717 stream,
717 stream,
718 files,
718 files,
719 self._path_prefix,
719 self._path_prefix,
720 self.target_id,
720 self.target_id,
721 )
721 )
722 return stream
722 return stream
723
723
724 def get_revlog_instance(self, repo):
724 def get_revlog_instance(self, repo):
725 """Obtain a revlog instance from this store entry
725 """Obtain a revlog instance from this store entry
726
726
727 An instance of the appropriate class is returned.
727 An instance of the appropriate class is returned.
728 """
728 """
729 if self.is_changelog:
729 if self.is_changelog:
730 return changelog.changelog(repo.svfs)
730 return changelog.changelog(repo.svfs)
731 elif self.is_manifestlog:
731 elif self.is_manifestlog:
732 mandir = self.target_id
732 mandir = self.target_id
733 return manifest.manifestrevlog(
733 return manifest.manifestrevlog(
734 repo.nodeconstants, repo.svfs, tree=mandir
734 repo.nodeconstants, repo.svfs, tree=mandir
735 )
735 )
736 else:
736 else:
737 return filelog.filelog(repo.svfs, self.target_id)
737 return filelog.filelog(repo.svfs, self.target_id)
738
738
739
739
740 def _gather_revlog(files_data):
740 def _gather_revlog(files_data):
741 """group files per revlog prefix
741 """group files per revlog prefix
742
742
743 The returns a two level nested dict. The top level key is the revlog prefix
743 The returns a two level nested dict. The top level key is the revlog prefix
744 without extension, the second level is all the file "suffix" that were
744 without extension, the second level is all the file "suffix" that were
745 seen for this revlog and arbitrary file data as value.
745 seen for this revlog and arbitrary file data as value.
746 """
746 """
747 revlogs = collections.defaultdict(dict)
747 revlogs = collections.defaultdict(dict)
748 for u, value in files_data:
748 for u, value in files_data:
749 name, ext = _split_revlog_ext(u)
749 name, ext = _split_revlog_ext(u)
750 revlogs[name][ext] = value
750 revlogs[name][ext] = value
751 return sorted(revlogs.items())
751 return sorted(revlogs.items())
752
752
753
753
754 def _split_revlog_ext(filename):
754 def _split_revlog_ext(filename):
755 """split the revlog file prefix from the variable extension"""
755 """split the revlog file prefix from the variable extension"""
756 if filename.endswith(REVLOG_FILES_LONG_EXT):
756 if filename.endswith(REVLOG_FILES_LONG_EXT):
757 char = b'-'
757 char = b'-'
758 else:
758 else:
759 char = b'.'
759 char = b'.'
760 idx = filename.rfind(char)
760 idx = filename.rfind(char)
761 return filename[:idx], filename[idx:]
761 return filename[:idx], filename[idx:]
762
762
763
763
764 def _ext_key(ext):
764 def _ext_key(ext):
765 """a key to order revlog suffix
765 """a key to order revlog suffix
766
766
767 important to issue .i after other entry."""
767 important to issue .i after other entry."""
768 # the only important part of this order is to keep the `.i` last.
768 # the only important part of this order is to keep the `.i` last.
769 if ext.endswith(b'.n'):
769 if ext.endswith(b'.n'):
770 return (0, ext)
770 return (0, ext)
771 elif ext.endswith(b'.nd'):
771 elif ext.endswith(b'.nd'):
772 return (10, ext)
772 return (10, ext)
773 elif ext.endswith(b'.d'):
773 elif ext.endswith(b'.d'):
774 return (20, ext)
774 return (20, ext)
775 elif ext.endswith(b'.i'):
775 elif ext.endswith(b'.i'):
776 return (50, ext)
776 return (50, ext)
777 else:
777 else:
778 return (40, ext)
778 return (40, ext)
779
779
780
780
781 class basicstore:
781 class basicstore:
782 '''base class for local repository stores'''
782 '''base class for local repository stores'''
783
783
784 def __init__(self, path, vfstype):
784 def __init__(self, path, vfstype):
785 vfs = vfstype(path)
785 vfs = vfstype(path)
786 self.path = vfs.base
786 self.path = vfs.base
787 self.createmode = _calcmode(vfs)
787 self.createmode = _calcmode(vfs)
788 vfs.createmode = self.createmode
788 vfs.createmode = self.createmode
789 self.rawvfs = vfs
789 self.rawvfs = vfs
790 self.vfs = vfsmod.filtervfs(vfs, encodedir)
790 self.vfs = vfsmod.filtervfs(vfs, encodedir)
791 self.opener = self.vfs
791 self.opener = self.vfs
792
792
793 def join(self, f):
793 def join(self, f):
794 return self.path + b'/' + encodedir(f)
794 return self.path + b'/' + encodedir(f)
795
795
796 def _walk(self, relpath, recurse, undecodable=None):
796 def _walk(self, relpath, recurse, undecodable=None):
797 '''yields (revlog_type, unencoded, size)'''
797 '''yields (revlog_type, unencoded, size)'''
798 path = self.path
798 path = self.path
799 if relpath:
799 if relpath:
800 path += b'/' + relpath
800 path += b'/' + relpath
801 striplen = len(self.path) + 1
801 striplen = len(self.path) + 1
802 l = []
802 l = []
803 if self.rawvfs.isdir(path):
803 if self.rawvfs.isdir(path):
804 visit = [path]
804 visit = [path]
805 readdir = self.rawvfs.readdir
805 readdir = self.rawvfs.readdir
806 while visit:
806 while visit:
807 p = visit.pop()
807 p = visit.pop()
808 for f, kind, st in readdir(p, stat=True):
808 for f, kind, st in readdir(p, stat=True):
809 fp = p + b'/' + f
809 fp = p + b'/' + f
810 if is_revlog(f, kind, st):
810 if is_revlog(f, kind, st):
811 n = util.pconvert(fp[striplen:])
811 n = util.pconvert(fp[striplen:])
812 l.append((decodedir(n), st.st_size))
812 l.append((decodedir(n), st.st_size))
813 elif kind == stat.S_IFDIR and recurse:
813 elif kind == stat.S_IFDIR and recurse:
814 visit.append(fp)
814 visit.append(fp)
815
815
816 l.sort()
816 l.sort()
817 return l
817 return l
818
818
819 def changelog(self, trypending, concurrencychecker=None):
819 def changelog(self, trypending, concurrencychecker=None):
820 return changelog.changelog(
820 return changelog.changelog(
821 self.vfs,
821 self.vfs,
822 trypending=trypending,
822 trypending=trypending,
823 concurrencychecker=concurrencychecker,
823 concurrencychecker=concurrencychecker,
824 )
824 )
825
825
826 def manifestlog(self, repo, storenarrowmatch) -> manifest.ManifestLog:
826 def manifestlog(self, repo, storenarrowmatch) -> manifest.manifestlog:
827 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
827 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
828 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
828 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
829
829
830 def data_entries(
830 def data_entries(
831 self, matcher=None, undecodable=None
831 self, matcher=None, undecodable=None
832 ) -> Generator[BaseStoreEntry, None, None]:
832 ) -> Generator[BaseStoreEntry, None, None]:
833 """Like walk, but excluding the changelog and root manifest.
833 """Like walk, but excluding the changelog and root manifest.
834
834
835 When [undecodable] is None, revlogs names that can't be
835 When [undecodable] is None, revlogs names that can't be
836 decoded cause an exception. When it is provided, it should
836 decoded cause an exception. When it is provided, it should
837 be a list and the filenames that can't be decoded are added
837 be a list and the filenames that can't be decoded are added
838 to it instead. This is very rarely needed."""
838 to it instead. This is very rarely needed."""
839 dirs = [
839 dirs = [
840 (b'data', KIND_FILELOG, False),
840 (b'data', KIND_FILELOG, False),
841 (b'meta', KIND_MANIFESTLOG, True),
841 (b'meta', KIND_MANIFESTLOG, True),
842 ]
842 ]
843 for base_dir, rl_type, strip_filename in dirs:
843 for base_dir, rl_type, strip_filename in dirs:
844 files = self._walk(base_dir, True, undecodable=undecodable)
844 files = self._walk(base_dir, True, undecodable=undecodable)
845 for revlog, details in _gather_revlog(files):
845 for revlog, details in _gather_revlog(files):
846 revlog_target_id = revlog.split(b'/', 1)[1]
846 revlog_target_id = revlog.split(b'/', 1)[1]
847 if strip_filename and b'/' in revlog:
847 if strip_filename and b'/' in revlog:
848 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
848 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
849 revlog_target_id += b'/'
849 revlog_target_id += b'/'
850 yield RevlogStoreEntry(
850 yield RevlogStoreEntry(
851 path_prefix=revlog,
851 path_prefix=revlog,
852 revlog_type=rl_type,
852 revlog_type=rl_type,
853 target_id=revlog_target_id,
853 target_id=revlog_target_id,
854 details=details,
854 details=details,
855 )
855 )
856
856
857 def top_entries(
857 def top_entries(
858 self, phase=False, obsolescence=False
858 self, phase=False, obsolescence=False
859 ) -> Generator[BaseStoreEntry, None, None]:
859 ) -> Generator[BaseStoreEntry, None, None]:
860 if phase and self.vfs.exists(b'phaseroots'):
860 if phase and self.vfs.exists(b'phaseroots'):
861 yield SimpleStoreEntry(
861 yield SimpleStoreEntry(
862 entry_path=b'phaseroots',
862 entry_path=b'phaseroots',
863 is_volatile=True,
863 is_volatile=True,
864 )
864 )
865
865
866 if obsolescence and self.vfs.exists(b'obsstore'):
866 if obsolescence and self.vfs.exists(b'obsstore'):
867 # XXX if we had the file size it could be non-volatile
867 # XXX if we had the file size it could be non-volatile
868 yield SimpleStoreEntry(
868 yield SimpleStoreEntry(
869 entry_path=b'obsstore',
869 entry_path=b'obsstore',
870 is_volatile=True,
870 is_volatile=True,
871 )
871 )
872
872
873 files = reversed(self._walk(b'', False))
873 files = reversed(self._walk(b'', False))
874
874
875 changelogs = collections.defaultdict(dict)
875 changelogs = collections.defaultdict(dict)
876 manifestlogs = collections.defaultdict(dict)
876 manifestlogs = collections.defaultdict(dict)
877
877
878 for u, s in files:
878 for u, s in files:
879 if u.startswith(b'00changelog'):
879 if u.startswith(b'00changelog'):
880 name, ext = _split_revlog_ext(u)
880 name, ext = _split_revlog_ext(u)
881 changelogs[name][ext] = s
881 changelogs[name][ext] = s
882 elif u.startswith(b'00manifest'):
882 elif u.startswith(b'00manifest'):
883 name, ext = _split_revlog_ext(u)
883 name, ext = _split_revlog_ext(u)
884 manifestlogs[name][ext] = s
884 manifestlogs[name][ext] = s
885 else:
885 else:
886 yield SimpleStoreEntry(
886 yield SimpleStoreEntry(
887 entry_path=u,
887 entry_path=u,
888 is_volatile=False,
888 is_volatile=False,
889 file_size=s,
889 file_size=s,
890 )
890 )
891 # yield manifest before changelog
891 # yield manifest before changelog
892 top_rl = [
892 top_rl = [
893 (manifestlogs, KIND_MANIFESTLOG),
893 (manifestlogs, KIND_MANIFESTLOG),
894 (changelogs, KIND_CHANGELOG),
894 (changelogs, KIND_CHANGELOG),
895 ]
895 ]
896 assert len(manifestlogs) <= 1
896 assert len(manifestlogs) <= 1
897 assert len(changelogs) <= 1
897 assert len(changelogs) <= 1
898 for data, revlog_type in top_rl:
898 for data, revlog_type in top_rl:
899 for revlog, details in sorted(data.items()):
899 for revlog, details in sorted(data.items()):
900 yield RevlogStoreEntry(
900 yield RevlogStoreEntry(
901 path_prefix=revlog,
901 path_prefix=revlog,
902 revlog_type=revlog_type,
902 revlog_type=revlog_type,
903 target_id=b'',
903 target_id=b'',
904 details=details,
904 details=details,
905 )
905 )
906
906
907 def walk(
907 def walk(
908 self, matcher=None, phase=False, obsolescence=False
908 self, matcher=None, phase=False, obsolescence=False
909 ) -> Generator[BaseStoreEntry, None, None]:
909 ) -> Generator[BaseStoreEntry, None, None]:
910 """return files related to data storage (ie: revlogs)
910 """return files related to data storage (ie: revlogs)
911
911
912 yields instance from BaseStoreEntry subclasses
912 yields instance from BaseStoreEntry subclasses
913
913
914 if a matcher is passed, storage files of only those tracked paths
914 if a matcher is passed, storage files of only those tracked paths
915 are passed with matches the matcher
915 are passed with matches the matcher
916 """
916 """
917 # yield data files first
917 # yield data files first
918 for x in self.data_entries(matcher):
918 for x in self.data_entries(matcher):
919 yield x
919 yield x
920 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
920 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
921 yield x
921 yield x
922
922
923 def copylist(self):
923 def copylist(self):
924 return _data
924 return _data
925
925
926 def write(self, tr):
926 def write(self, tr):
927 pass
927 pass
928
928
929 def invalidatecaches(self):
929 def invalidatecaches(self):
930 pass
930 pass
931
931
932 def markremoved(self, fn):
932 def markremoved(self, fn):
933 pass
933 pass
934
934
935 def __contains__(self, path):
935 def __contains__(self, path):
936 '''Checks if the store contains path'''
936 '''Checks if the store contains path'''
937 path = b"/".join((b"data", path))
937 path = b"/".join((b"data", path))
938 # file?
938 # file?
939 if self.vfs.exists(path + b".i"):
939 if self.vfs.exists(path + b".i"):
940 return True
940 return True
941 # dir?
941 # dir?
942 if not path.endswith(b"/"):
942 if not path.endswith(b"/"):
943 path = path + b"/"
943 path = path + b"/"
944 return self.vfs.exists(path)
944 return self.vfs.exists(path)
945
945
946
946
947 class encodedstore(basicstore):
947 class encodedstore(basicstore):
948 def __init__(self, path, vfstype):
948 def __init__(self, path, vfstype):
949 vfs = vfstype(path + b'/store')
949 vfs = vfstype(path + b'/store')
950 self.path = vfs.base
950 self.path = vfs.base
951 self.createmode = _calcmode(vfs)
951 self.createmode = _calcmode(vfs)
952 vfs.createmode = self.createmode
952 vfs.createmode = self.createmode
953 self.rawvfs = vfs
953 self.rawvfs = vfs
954 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
954 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
955 self.opener = self.vfs
955 self.opener = self.vfs
956
956
957 def _walk(self, relpath, recurse, undecodable=None):
957 def _walk(self, relpath, recurse, undecodable=None):
958 old = super()._walk(relpath, recurse)
958 old = super()._walk(relpath, recurse)
959 new = []
959 new = []
960 for f1, value in old:
960 for f1, value in old:
961 try:
961 try:
962 f2 = decodefilename(f1)
962 f2 = decodefilename(f1)
963 except KeyError:
963 except KeyError:
964 if undecodable is None:
964 if undecodable is None:
965 msg = _(b'undecodable revlog name %s') % f1
965 msg = _(b'undecodable revlog name %s') % f1
966 raise error.StorageError(msg)
966 raise error.StorageError(msg)
967 else:
967 else:
968 undecodable.append(f1)
968 undecodable.append(f1)
969 continue
969 continue
970 new.append((f2, value))
970 new.append((f2, value))
971 return new
971 return new
972
972
973 def data_entries(
973 def data_entries(
974 self, matcher=None, undecodable=None
974 self, matcher=None, undecodable=None
975 ) -> Generator[BaseStoreEntry, None, None]:
975 ) -> Generator[BaseStoreEntry, None, None]:
976 entries = super(encodedstore, self).data_entries(
976 entries = super(encodedstore, self).data_entries(
977 undecodable=undecodable
977 undecodable=undecodable
978 )
978 )
979 for entry in entries:
979 for entry in entries:
980 if _match_tracked_entry(entry, matcher):
980 if _match_tracked_entry(entry, matcher):
981 yield entry
981 yield entry
982
982
983 def join(self, f):
983 def join(self, f):
984 return self.path + b'/' + encodefilename(f)
984 return self.path + b'/' + encodefilename(f)
985
985
986 def copylist(self):
986 def copylist(self):
987 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
987 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
988
988
989
989
990 class fncache:
990 class fncache:
991 # the filename used to be partially encoded
991 # the filename used to be partially encoded
992 # hence the encodedir/decodedir dance
992 # hence the encodedir/decodedir dance
993 def __init__(self, vfs):
993 def __init__(self, vfs):
994 self.vfs = vfs
994 self.vfs = vfs
995 self._ignores = set()
995 self._ignores = set()
996 self.entries = None
996 self.entries = None
997 self._dirty = False
997 self._dirty = False
998 # set of new additions to fncache
998 # set of new additions to fncache
999 self.addls = set()
999 self.addls = set()
1000
1000
1001 def ensureloaded(self, warn=None):
1001 def ensureloaded(self, warn=None):
1002 """read the fncache file if not already read.
1002 """read the fncache file if not already read.
1003
1003
1004 If the file on disk is corrupted, raise. If warn is provided,
1004 If the file on disk is corrupted, raise. If warn is provided,
1005 warn and keep going instead."""
1005 warn and keep going instead."""
1006 if self.entries is None:
1006 if self.entries is None:
1007 self._load(warn)
1007 self._load(warn)
1008
1008
1009 def _load(self, warn=None):
1009 def _load(self, warn=None):
1010 '''fill the entries from the fncache file'''
1010 '''fill the entries from the fncache file'''
1011 self._dirty = False
1011 self._dirty = False
1012 try:
1012 try:
1013 fp = self.vfs(b'fncache', mode=b'rb')
1013 fp = self.vfs(b'fncache', mode=b'rb')
1014 except IOError:
1014 except IOError:
1015 # skip nonexistent file
1015 # skip nonexistent file
1016 self.entries = set()
1016 self.entries = set()
1017 return
1017 return
1018
1018
1019 self.entries = set()
1019 self.entries = set()
1020 chunk = b''
1020 chunk = b''
1021 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
1021 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
1022 chunk += c
1022 chunk += c
1023 try:
1023 try:
1024 p = chunk.rindex(b'\n')
1024 p = chunk.rindex(b'\n')
1025 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
1025 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
1026 chunk = chunk[p + 1 :]
1026 chunk = chunk[p + 1 :]
1027 except ValueError:
1027 except ValueError:
1028 # substring '\n' not found, maybe the entry is bigger than the
1028 # substring '\n' not found, maybe the entry is bigger than the
1029 # chunksize, so let's keep iterating
1029 # chunksize, so let's keep iterating
1030 pass
1030 pass
1031
1031
1032 if chunk:
1032 if chunk:
1033 msg = _(b"fncache does not ends with a newline")
1033 msg = _(b"fncache does not ends with a newline")
1034 if warn:
1034 if warn:
1035 warn(msg + b'\n')
1035 warn(msg + b'\n')
1036 else:
1036 else:
1037 raise error.Abort(
1037 raise error.Abort(
1038 msg,
1038 msg,
1039 hint=_(
1039 hint=_(
1040 b"use 'hg debugrebuildfncache' to "
1040 b"use 'hg debugrebuildfncache' to "
1041 b"rebuild the fncache"
1041 b"rebuild the fncache"
1042 ),
1042 ),
1043 )
1043 )
1044 self._checkentries(fp, warn)
1044 self._checkentries(fp, warn)
1045 fp.close()
1045 fp.close()
1046
1046
1047 def _checkentries(self, fp, warn):
1047 def _checkentries(self, fp, warn):
1048 """make sure there is no empty string in entries"""
1048 """make sure there is no empty string in entries"""
1049 if b'' in self.entries:
1049 if b'' in self.entries:
1050 fp.seek(0)
1050 fp.seek(0)
1051 for n, line in enumerate(fp):
1051 for n, line in enumerate(fp):
1052 if not line.rstrip(b'\n'):
1052 if not line.rstrip(b'\n'):
1053 t = _(b'invalid entry in fncache, line %d') % (n + 1)
1053 t = _(b'invalid entry in fncache, line %d') % (n + 1)
1054 if warn:
1054 if warn:
1055 warn(t + b'\n')
1055 warn(t + b'\n')
1056 else:
1056 else:
1057 raise error.Abort(t)
1057 raise error.Abort(t)
1058
1058
1059 def write(self, tr):
1059 def write(self, tr):
1060 if self._dirty:
1060 if self._dirty:
1061 assert self.entries is not None
1061 assert self.entries is not None
1062 self.entries = self.entries | self.addls
1062 self.entries = self.entries | self.addls
1063 self.addls = set()
1063 self.addls = set()
1064 tr.addbackup(b'fncache')
1064 tr.addbackup(b'fncache')
1065 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
1065 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
1066 if self.entries:
1066 if self.entries:
1067 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
1067 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
1068 fp.close()
1068 fp.close()
1069 self._dirty = False
1069 self._dirty = False
1070 if self.addls:
1070 if self.addls:
1071 # if we have just new entries, let's append them to the fncache
1071 # if we have just new entries, let's append them to the fncache
1072 tr.addbackup(b'fncache')
1072 tr.addbackup(b'fncache')
1073 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
1073 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
1074 if self.addls:
1074 if self.addls:
1075 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
1075 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
1076 fp.close()
1076 fp.close()
1077 self.entries = None
1077 self.entries = None
1078 self.addls = set()
1078 self.addls = set()
1079
1079
1080 def addignore(self, fn):
1080 def addignore(self, fn):
1081 self._ignores.add(fn)
1081 self._ignores.add(fn)
1082
1082
1083 def add(self, fn):
1083 def add(self, fn):
1084 if fn in self._ignores:
1084 if fn in self._ignores:
1085 return
1085 return
1086 if self.entries is None:
1086 if self.entries is None:
1087 self._load()
1087 self._load()
1088 if fn not in self.entries:
1088 if fn not in self.entries:
1089 self.addls.add(fn)
1089 self.addls.add(fn)
1090
1090
1091 def remove(self, fn):
1091 def remove(self, fn):
1092 if self.entries is None:
1092 if self.entries is None:
1093 self._load()
1093 self._load()
1094 if fn in self.addls:
1094 if fn in self.addls:
1095 self.addls.remove(fn)
1095 self.addls.remove(fn)
1096 return
1096 return
1097 try:
1097 try:
1098 self.entries.remove(fn)
1098 self.entries.remove(fn)
1099 self._dirty = True
1099 self._dirty = True
1100 except KeyError:
1100 except KeyError:
1101 pass
1101 pass
1102
1102
1103 def __contains__(self, fn):
1103 def __contains__(self, fn):
1104 if fn in self.addls:
1104 if fn in self.addls:
1105 return True
1105 return True
1106 if self.entries is None:
1106 if self.entries is None:
1107 self._load()
1107 self._load()
1108 return fn in self.entries
1108 return fn in self.entries
1109
1109
1110 def __iter__(self):
1110 def __iter__(self):
1111 if self.entries is None:
1111 if self.entries is None:
1112 self._load()
1112 self._load()
1113 return iter(self.entries | self.addls)
1113 return iter(self.entries | self.addls)
1114
1114
1115
1115
1116 class _fncachevfs(vfsmod.proxyvfs):
1116 class _fncachevfs(vfsmod.proxyvfs):
1117 def __init__(self, vfs, fnc, encode):
1117 def __init__(self, vfs, fnc, encode):
1118 vfsmod.proxyvfs.__init__(self, vfs)
1118 vfsmod.proxyvfs.__init__(self, vfs)
1119 self.fncache = fnc
1119 self.fncache = fnc
1120 self.encode = encode
1120 self.encode = encode
1121
1121
1122 def __call__(self, path, mode=b'r', *args, **kw):
1122 def __call__(self, path, mode=b'r', *args, **kw):
1123 encoded = self.encode(path)
1123 encoded = self.encode(path)
1124 if (
1124 if (
1125 mode not in (b'r', b'rb')
1125 mode not in (b'r', b'rb')
1126 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1126 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1127 and is_revlog_file(path)
1127 and is_revlog_file(path)
1128 ):
1128 ):
1129 # do not trigger a fncache load when adding a file that already is
1129 # do not trigger a fncache load when adding a file that already is
1130 # known to exist.
1130 # known to exist.
1131 notload = self.fncache.entries is None and (
1131 notload = self.fncache.entries is None and (
1132 # if the file has size zero, it should be considered as missing.
1132 # if the file has size zero, it should be considered as missing.
1133 # Such zero-size files are the result of truncation when a
1133 # Such zero-size files are the result of truncation when a
1134 # transaction is aborted.
1134 # transaction is aborted.
1135 self.vfs.exists(encoded)
1135 self.vfs.exists(encoded)
1136 and self.vfs.stat(encoded).st_size
1136 and self.vfs.stat(encoded).st_size
1137 )
1137 )
1138 if not notload:
1138 if not notload:
1139 self.fncache.add(path)
1139 self.fncache.add(path)
1140 return self.vfs(encoded, mode, *args, **kw)
1140 return self.vfs(encoded, mode, *args, **kw)
1141
1141
1142 def join(self, path: Optional[bytes], *insidef: bytes) -> bytes:
1142 def join(self, path: Optional[bytes], *insidef: bytes) -> bytes:
1143 insidef = (self.encode(f) for f in insidef)
1143 insidef = (self.encode(f) for f in insidef)
1144
1144
1145 if path:
1145 if path:
1146 return self.vfs.join(self.encode(path), *insidef)
1146 return self.vfs.join(self.encode(path), *insidef)
1147 else:
1147 else:
1148 return self.vfs.join(path, *insidef)
1148 return self.vfs.join(path, *insidef)
1149
1149
1150 def register_file(self, path):
1150 def register_file(self, path):
1151 """generic hook point to lets fncache steer its stew"""
1151 """generic hook point to lets fncache steer its stew"""
1152 if path.startswith(b'data/') or path.startswith(b'meta/'):
1152 if path.startswith(b'data/') or path.startswith(b'meta/'):
1153 self.fncache.add(path)
1153 self.fncache.add(path)
1154
1154
1155
1155
1156 class fncachestore(basicstore):
1156 class fncachestore(basicstore):
1157 def __init__(self, path, vfstype, dotencode):
1157 def __init__(self, path, vfstype, dotencode):
1158 if dotencode:
1158 if dotencode:
1159 encode = _pathencode
1159 encode = _pathencode
1160 else:
1160 else:
1161 encode = _plainhybridencode
1161 encode = _plainhybridencode
1162 self.encode = encode
1162 self.encode = encode
1163 vfs = vfstype(path + b'/store')
1163 vfs = vfstype(path + b'/store')
1164 self.path = vfs.base
1164 self.path = vfs.base
1165 self.pathsep = self.path + b'/'
1165 self.pathsep = self.path + b'/'
1166 self.createmode = _calcmode(vfs)
1166 self.createmode = _calcmode(vfs)
1167 vfs.createmode = self.createmode
1167 vfs.createmode = self.createmode
1168 self.rawvfs = vfs
1168 self.rawvfs = vfs
1169 fnc = fncache(vfs)
1169 fnc = fncache(vfs)
1170 self.fncache = fnc
1170 self.fncache = fnc
1171 self.vfs = _fncachevfs(vfs, fnc, encode)
1171 self.vfs = _fncachevfs(vfs, fnc, encode)
1172 self.opener = self.vfs
1172 self.opener = self.vfs
1173
1173
1174 def join(self, f):
1174 def join(self, f):
1175 return self.pathsep + self.encode(f)
1175 return self.pathsep + self.encode(f)
1176
1176
1177 def getsize(self, path):
1177 def getsize(self, path):
1178 return self.rawvfs.stat(path).st_size
1178 return self.rawvfs.stat(path).st_size
1179
1179
1180 def data_entries(
1180 def data_entries(
1181 self, matcher=None, undecodable=None
1181 self, matcher=None, undecodable=None
1182 ) -> Generator[BaseStoreEntry, None, None]:
1182 ) -> Generator[BaseStoreEntry, None, None]:
1183 # Note: all files in fncache should be revlog related, However the
1183 # Note: all files in fncache should be revlog related, However the
1184 # fncache might contains such file added by previous version of
1184 # fncache might contains such file added by previous version of
1185 # Mercurial.
1185 # Mercurial.
1186 files = ((f, None) for f in self.fncache if is_revlog_file(f))
1186 files = ((f, None) for f in self.fncache if is_revlog_file(f))
1187 by_revlog = _gather_revlog(files)
1187 by_revlog = _gather_revlog(files)
1188 for revlog, details in by_revlog:
1188 for revlog, details in by_revlog:
1189 if revlog.startswith(b'data/'):
1189 if revlog.startswith(b'data/'):
1190 rl_type = KIND_FILELOG
1190 rl_type = KIND_FILELOG
1191 revlog_target_id = revlog.split(b'/', 1)[1]
1191 revlog_target_id = revlog.split(b'/', 1)[1]
1192 elif revlog.startswith(b'meta/'):
1192 elif revlog.startswith(b'meta/'):
1193 rl_type = KIND_MANIFESTLOG
1193 rl_type = KIND_MANIFESTLOG
1194 # drop the initial directory and the `00manifest` file part
1194 # drop the initial directory and the `00manifest` file part
1195 tmp = revlog.split(b'/', 1)[1]
1195 tmp = revlog.split(b'/', 1)[1]
1196 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1196 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1197 else:
1197 else:
1198 # unreachable
1198 # unreachable
1199 assert False, revlog
1199 assert False, revlog
1200 entry = RevlogStoreEntry(
1200 entry = RevlogStoreEntry(
1201 path_prefix=revlog,
1201 path_prefix=revlog,
1202 revlog_type=rl_type,
1202 revlog_type=rl_type,
1203 target_id=revlog_target_id,
1203 target_id=revlog_target_id,
1204 details=details,
1204 details=details,
1205 )
1205 )
1206 if _match_tracked_entry(entry, matcher):
1206 if _match_tracked_entry(entry, matcher):
1207 yield entry
1207 yield entry
1208
1208
1209 def copylist(self):
1209 def copylist(self):
1210 d = (
1210 d = (
1211 b'bookmarks',
1211 b'bookmarks',
1212 b'narrowspec',
1212 b'narrowspec',
1213 b'data',
1213 b'data',
1214 b'meta',
1214 b'meta',
1215 b'dh',
1215 b'dh',
1216 b'fncache',
1216 b'fncache',
1217 b'phaseroots',
1217 b'phaseroots',
1218 b'obsstore',
1218 b'obsstore',
1219 b'00manifest.d',
1219 b'00manifest.d',
1220 b'00manifest.i',
1220 b'00manifest.i',
1221 b'00changelog.d',
1221 b'00changelog.d',
1222 b'00changelog.i',
1222 b'00changelog.i',
1223 b'requires',
1223 b'requires',
1224 )
1224 )
1225 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1225 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1226
1226
1227 def write(self, tr):
1227 def write(self, tr):
1228 self.fncache.write(tr)
1228 self.fncache.write(tr)
1229
1229
1230 def invalidatecaches(self):
1230 def invalidatecaches(self):
1231 self.fncache.entries = None
1231 self.fncache.entries = None
1232 self.fncache.addls = set()
1232 self.fncache.addls = set()
1233
1233
1234 def markremoved(self, fn):
1234 def markremoved(self, fn):
1235 self.fncache.remove(fn)
1235 self.fncache.remove(fn)
1236
1236
1237 def _exists(self, f):
1237 def _exists(self, f):
1238 ef = self.encode(f)
1238 ef = self.encode(f)
1239 try:
1239 try:
1240 self.getsize(ef)
1240 self.getsize(ef)
1241 return True
1241 return True
1242 except FileNotFoundError:
1242 except FileNotFoundError:
1243 return False
1243 return False
1244
1244
1245 def __contains__(self, path):
1245 def __contains__(self, path):
1246 '''Checks if the store contains path'''
1246 '''Checks if the store contains path'''
1247 path = b"/".join((b"data", path))
1247 path = b"/".join((b"data", path))
1248 # check for files (exact match)
1248 # check for files (exact match)
1249 e = path + b'.i'
1249 e = path + b'.i'
1250 if e in self.fncache and self._exists(e):
1250 if e in self.fncache and self._exists(e):
1251 return True
1251 return True
1252 # now check for directories (prefix match)
1252 # now check for directories (prefix match)
1253 if not path.endswith(b'/'):
1253 if not path.endswith(b'/'):
1254 path += b'/'
1254 path += b'/'
1255 for e in self.fncache:
1255 for e in self.fncache:
1256 if e.startswith(path) and self._exists(e):
1256 if e.startswith(path) and self._exists(e):
1257 return True
1257 return True
1258 return False
1258 return False
General Comments 0
You need to be logged in to leave comments. Login now