##// END OF EJS Templates
revlog: move the details of revlog "v2" index inside revlog.utils.constants...
marmoute -
r47617:85e3a630 default
parent child Browse files
Show More
@@ -1,365 +1,352 b''
1 # parsers.py - Python implementation of parsers.c
1 # parsers.py - Python implementation of parsers.c
2 #
2 #
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11 import zlib
11 import zlib
12
12
13 from ..node import nullid, nullrev
13 from ..node import nullid, nullrev
14 from .. import (
14 from .. import (
15 pycompat,
15 pycompat,
16 util,
16 util,
17 )
17 )
18
18
19 from ..revlogutils import nodemap as nodemaputil
19 from ..revlogutils import nodemap as nodemaputil
20 from ..revlogutils import constants as revlog_constants
20 from ..revlogutils import constants as revlog_constants
21
21
22 stringio = pycompat.bytesio
22 stringio = pycompat.bytesio
23
23
24
24
25 _pack = struct.pack
25 _pack = struct.pack
26 _unpack = struct.unpack
26 _unpack = struct.unpack
27 _compress = zlib.compress
27 _compress = zlib.compress
28 _decompress = zlib.decompress
28 _decompress = zlib.decompress
29
29
30 # Some code below makes tuples directly because it's more convenient. However,
30 # Some code below makes tuples directly because it's more convenient. However,
31 # code outside this module should always use dirstatetuple.
31 # code outside this module should always use dirstatetuple.
32 def dirstatetuple(*x):
32 def dirstatetuple(*x):
33 # x is a tuple
33 # x is a tuple
34 return x
34 return x
35
35
36
36
37 def gettype(q):
37 def gettype(q):
38 return int(q & 0xFFFF)
38 return int(q & 0xFFFF)
39
39
40
40
41 def offset_type(offset, type):
41 def offset_type(offset, type):
42 return int(int(offset) << 16 | type)
42 return int(int(offset) << 16 | type)
43
43
44
44
45 class BaseIndexObject(object):
45 class BaseIndexObject(object):
46 # Format of an index entry according to Python's `struct` language
46 # Format of an index entry according to Python's `struct` language
47 index_format = revlog_constants.INDEX_ENTRY_V1.format
47 index_format = revlog_constants.INDEX_ENTRY_V1.format
48 # Size of a C unsigned long long int, platform independent
48 # Size of a C unsigned long long int, platform independent
49 big_int_size = struct.calcsize(b'>Q')
49 big_int_size = struct.calcsize(b'>Q')
50 # Size of a C long int, platform independent
50 # Size of a C long int, platform independent
51 int_size = struct.calcsize(b'>i')
51 int_size = struct.calcsize(b'>i')
52 # Size of the entire index format
52 # Size of the entire index format
53 index_size = revlog_constants.INDEX_ENTRY_V1.size
53 index_size = revlog_constants.INDEX_ENTRY_V1.size
54 # An empty index entry, used as a default value to be overridden, or nullrev
54 # An empty index entry, used as a default value to be overridden, or nullrev
55 null_item = (0, 0, 0, -1, -1, -1, -1, nullid)
55 null_item = (0, 0, 0, -1, -1, -1, -1, nullid)
56
56
57 @property
57 @property
58 def nodemap(self):
58 def nodemap(self):
59 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
59 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
60 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
60 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
61 return self._nodemap
61 return self._nodemap
62
62
63 @util.propertycache
63 @util.propertycache
64 def _nodemap(self):
64 def _nodemap(self):
65 nodemap = nodemaputil.NodeMap({nullid: nullrev})
65 nodemap = nodemaputil.NodeMap({nullid: nullrev})
66 for r in range(0, len(self)):
66 for r in range(0, len(self)):
67 n = self[r][7]
67 n = self[r][7]
68 nodemap[n] = r
68 nodemap[n] = r
69 return nodemap
69 return nodemap
70
70
71 def has_node(self, node):
71 def has_node(self, node):
72 """return True if the node exist in the index"""
72 """return True if the node exist in the index"""
73 return node in self._nodemap
73 return node in self._nodemap
74
74
75 def rev(self, node):
75 def rev(self, node):
76 """return a revision for a node
76 """return a revision for a node
77
77
78 If the node is unknown, raise a RevlogError"""
78 If the node is unknown, raise a RevlogError"""
79 return self._nodemap[node]
79 return self._nodemap[node]
80
80
81 def get_rev(self, node):
81 def get_rev(self, node):
82 """return a revision for a node
82 """return a revision for a node
83
83
84 If the node is unknown, return None"""
84 If the node is unknown, return None"""
85 return self._nodemap.get(node)
85 return self._nodemap.get(node)
86
86
87 def _stripnodes(self, start):
87 def _stripnodes(self, start):
88 if '_nodemap' in vars(self):
88 if '_nodemap' in vars(self):
89 for r in range(start, len(self)):
89 for r in range(start, len(self)):
90 n = self[r][7]
90 n = self[r][7]
91 del self._nodemap[n]
91 del self._nodemap[n]
92
92
93 def clearcaches(self):
93 def clearcaches(self):
94 self.__dict__.pop('_nodemap', None)
94 self.__dict__.pop('_nodemap', None)
95
95
96 def __len__(self):
96 def __len__(self):
97 return self._lgt + len(self._extra)
97 return self._lgt + len(self._extra)
98
98
99 def append(self, tup):
99 def append(self, tup):
100 if '_nodemap' in vars(self):
100 if '_nodemap' in vars(self):
101 self._nodemap[tup[7]] = len(self)
101 self._nodemap[tup[7]] = len(self)
102 data = _pack(self.index_format, *tup)
102 data = _pack(self.index_format, *tup)
103 self._extra.append(data)
103 self._extra.append(data)
104
104
105 def _check_index(self, i):
105 def _check_index(self, i):
106 if not isinstance(i, int):
106 if not isinstance(i, int):
107 raise TypeError(b"expecting int indexes")
107 raise TypeError(b"expecting int indexes")
108 if i < 0 or i >= len(self):
108 if i < 0 or i >= len(self):
109 raise IndexError
109 raise IndexError
110
110
111 def __getitem__(self, i):
111 def __getitem__(self, i):
112 if i == -1:
112 if i == -1:
113 return self.null_item
113 return self.null_item
114 self._check_index(i)
114 self._check_index(i)
115 if i >= self._lgt:
115 if i >= self._lgt:
116 data = self._extra[i - self._lgt]
116 data = self._extra[i - self._lgt]
117 else:
117 else:
118 index = self._calculate_index(i)
118 index = self._calculate_index(i)
119 data = self._data[index : index + self.index_size]
119 data = self._data[index : index + self.index_size]
120 r = _unpack(self.index_format, data)
120 r = _unpack(self.index_format, data)
121 if self._lgt and i == 0:
121 if self._lgt and i == 0:
122 r = (offset_type(0, gettype(r[0])),) + r[1:]
122 r = (offset_type(0, gettype(r[0])),) + r[1:]
123 return r
123 return r
124
124
125
125
126 class IndexObject(BaseIndexObject):
126 class IndexObject(BaseIndexObject):
127 def __init__(self, data):
127 def __init__(self, data):
128 assert len(data) % self.index_size == 0
128 assert len(data) % self.index_size == 0
129 self._data = data
129 self._data = data
130 self._lgt = len(data) // self.index_size
130 self._lgt = len(data) // self.index_size
131 self._extra = []
131 self._extra = []
132
132
133 def _calculate_index(self, i):
133 def _calculate_index(self, i):
134 return i * self.index_size
134 return i * self.index_size
135
135
136 def __delitem__(self, i):
136 def __delitem__(self, i):
137 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
137 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
138 raise ValueError(b"deleting slices only supports a:-1 with step 1")
138 raise ValueError(b"deleting slices only supports a:-1 with step 1")
139 i = i.start
139 i = i.start
140 self._check_index(i)
140 self._check_index(i)
141 self._stripnodes(i)
141 self._stripnodes(i)
142 if i < self._lgt:
142 if i < self._lgt:
143 self._data = self._data[: i * self.index_size]
143 self._data = self._data[: i * self.index_size]
144 self._lgt = i
144 self._lgt = i
145 self._extra = []
145 self._extra = []
146 else:
146 else:
147 self._extra = self._extra[: i - self._lgt]
147 self._extra = self._extra[: i - self._lgt]
148
148
149
149
150 class PersistentNodeMapIndexObject(IndexObject):
150 class PersistentNodeMapIndexObject(IndexObject):
151 """a Debug oriented class to test persistent nodemap
151 """a Debug oriented class to test persistent nodemap
152
152
153 We need a simple python object to test API and higher level behavior. See
153 We need a simple python object to test API and higher level behavior. See
154 the Rust implementation for more serious usage. This should be used only
154 the Rust implementation for more serious usage. This should be used only
155 through the dedicated `devel.persistent-nodemap` config.
155 through the dedicated `devel.persistent-nodemap` config.
156 """
156 """
157
157
158 def nodemap_data_all(self):
158 def nodemap_data_all(self):
159 """Return bytes containing a full serialization of a nodemap
159 """Return bytes containing a full serialization of a nodemap
160
160
161 The nodemap should be valid for the full set of revisions in the
161 The nodemap should be valid for the full set of revisions in the
162 index."""
162 index."""
163 return nodemaputil.persistent_data(self)
163 return nodemaputil.persistent_data(self)
164
164
165 def nodemap_data_incremental(self):
165 def nodemap_data_incremental(self):
166 """Return bytes containing a incremental update to persistent nodemap
166 """Return bytes containing a incremental update to persistent nodemap
167
167
168 This containst the data for an append-only update of the data provided
168 This containst the data for an append-only update of the data provided
169 in the last call to `update_nodemap_data`.
169 in the last call to `update_nodemap_data`.
170 """
170 """
171 if self._nm_root is None:
171 if self._nm_root is None:
172 return None
172 return None
173 docket = self._nm_docket
173 docket = self._nm_docket
174 changed, data = nodemaputil.update_persistent_data(
174 changed, data = nodemaputil.update_persistent_data(
175 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
175 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
176 )
176 )
177
177
178 self._nm_root = self._nm_max_idx = self._nm_docket = None
178 self._nm_root = self._nm_max_idx = self._nm_docket = None
179 return docket, changed, data
179 return docket, changed, data
180
180
181 def update_nodemap_data(self, docket, nm_data):
181 def update_nodemap_data(self, docket, nm_data):
182 """provide full block of persisted binary data for a nodemap
182 """provide full block of persisted binary data for a nodemap
183
183
184 The data are expected to come from disk. See `nodemap_data_all` for a
184 The data are expected to come from disk. See `nodemap_data_all` for a
185 produceur of such data."""
185 produceur of such data."""
186 if nm_data is not None:
186 if nm_data is not None:
187 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
187 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
188 if self._nm_root:
188 if self._nm_root:
189 self._nm_docket = docket
189 self._nm_docket = docket
190 else:
190 else:
191 self._nm_root = self._nm_max_idx = self._nm_docket = None
191 self._nm_root = self._nm_max_idx = self._nm_docket = None
192
192
193
193
194 class InlinedIndexObject(BaseIndexObject):
194 class InlinedIndexObject(BaseIndexObject):
195 def __init__(self, data, inline=0):
195 def __init__(self, data, inline=0):
196 self._data = data
196 self._data = data
197 self._lgt = self._inline_scan(None)
197 self._lgt = self._inline_scan(None)
198 self._inline_scan(self._lgt)
198 self._inline_scan(self._lgt)
199 self._extra = []
199 self._extra = []
200
200
201 def _inline_scan(self, lgt):
201 def _inline_scan(self, lgt):
202 off = 0
202 off = 0
203 if lgt is not None:
203 if lgt is not None:
204 self._offsets = [0] * lgt
204 self._offsets = [0] * lgt
205 count = 0
205 count = 0
206 while off <= len(self._data) - self.index_size:
206 while off <= len(self._data) - self.index_size:
207 start = off + self.big_int_size
207 start = off + self.big_int_size
208 (s,) = struct.unpack(
208 (s,) = struct.unpack(
209 b'>i',
209 b'>i',
210 self._data[start : start + self.int_size],
210 self._data[start : start + self.int_size],
211 )
211 )
212 if lgt is not None:
212 if lgt is not None:
213 self._offsets[count] = off
213 self._offsets[count] = off
214 count += 1
214 count += 1
215 off += self.index_size + s
215 off += self.index_size + s
216 if off != len(self._data):
216 if off != len(self._data):
217 raise ValueError(b"corrupted data")
217 raise ValueError(b"corrupted data")
218 return count
218 return count
219
219
220 def __delitem__(self, i):
220 def __delitem__(self, i):
221 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
221 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
222 raise ValueError(b"deleting slices only supports a:-1 with step 1")
222 raise ValueError(b"deleting slices only supports a:-1 with step 1")
223 i = i.start
223 i = i.start
224 self._check_index(i)
224 self._check_index(i)
225 self._stripnodes(i)
225 self._stripnodes(i)
226 if i < self._lgt:
226 if i < self._lgt:
227 self._offsets = self._offsets[:i]
227 self._offsets = self._offsets[:i]
228 self._lgt = i
228 self._lgt = i
229 self._extra = []
229 self._extra = []
230 else:
230 else:
231 self._extra = self._extra[: i - self._lgt]
231 self._extra = self._extra[: i - self._lgt]
232
232
233 def _calculate_index(self, i):
233 def _calculate_index(self, i):
234 return self._offsets[i]
234 return self._offsets[i]
235
235
236
236
237 def parse_index2(data, inline, revlogv2=False):
237 def parse_index2(data, inline, revlogv2=False):
238 if not inline:
238 if not inline:
239 cls = IndexObject2 if revlogv2 else IndexObject
239 cls = IndexObject2 if revlogv2 else IndexObject
240 return cls(data), None
240 return cls(data), None
241 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
241 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
242 return cls(data, inline), (0, data)
242 return cls(data, inline), (0, data)
243
243
244
244
245 class Index2Mixin(object):
245 class Index2Mixin(object):
246 # 6 bytes: offset
246 index_format = revlog_constants.INDEX_ENTRY_V2.format
247 # 2 bytes: flags
247 index_size = revlog_constants.INDEX_ENTRY_V2.size
248 # 4 bytes: compressed length
249 # 4 bytes: uncompressed length
250 # 4 bytes: base rev
251 # 4 bytes: link rev
252 # 4 bytes: parent 1 rev
253 # 4 bytes: parent 2 rev
254 # 32 bytes: nodeid
255 # 8 bytes: sidedata offset
256 # 4 bytes: sidedata compressed length
257 # 20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
258 index_format = b">Qiiiiii20s12xQi20x"
259 index_size = struct.calcsize(index_format)
260 assert index_size == 96, index_size
261 null_item = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0)
248 null_item = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0)
262
249
263 def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
250 def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
264 """
251 """
265 Replace an existing index entry's sidedata offset and length with new
252 Replace an existing index entry's sidedata offset and length with new
266 ones.
253 ones.
267 This cannot be used outside of the context of sidedata rewriting,
254 This cannot be used outside of the context of sidedata rewriting,
268 inside the transaction that creates the revision `i`.
255 inside the transaction that creates the revision `i`.
269 """
256 """
270 if i < 0:
257 if i < 0:
271 raise KeyError
258 raise KeyError
272 self._check_index(i)
259 self._check_index(i)
273 sidedata_format = b">Qi"
260 sidedata_format = b">Qi"
274 packed_size = struct.calcsize(sidedata_format)
261 packed_size = struct.calcsize(sidedata_format)
275 if i >= self._lgt:
262 if i >= self._lgt:
276 packed = _pack(sidedata_format, sidedata_offset, sidedata_length)
263 packed = _pack(sidedata_format, sidedata_offset, sidedata_length)
277 old = self._extra[i - self._lgt]
264 old = self._extra[i - self._lgt]
278 new = old[:64] + packed + old[64 + packed_size :]
265 new = old[:64] + packed + old[64 + packed_size :]
279 self._extra[i - self._lgt] = new
266 self._extra[i - self._lgt] = new
280 else:
267 else:
281 msg = b"cannot rewrite entries outside of this transaction"
268 msg = b"cannot rewrite entries outside of this transaction"
282 raise KeyError(msg)
269 raise KeyError(msg)
283
270
284
271
285 class IndexObject2(Index2Mixin, IndexObject):
272 class IndexObject2(Index2Mixin, IndexObject):
286 pass
273 pass
287
274
288
275
289 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
276 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
290 def _inline_scan(self, lgt):
277 def _inline_scan(self, lgt):
291 sidedata_length_pos = 72
278 sidedata_length_pos = 72
292 off = 0
279 off = 0
293 if lgt is not None:
280 if lgt is not None:
294 self._offsets = [0] * lgt
281 self._offsets = [0] * lgt
295 count = 0
282 count = 0
296 while off <= len(self._data) - self.index_size:
283 while off <= len(self._data) - self.index_size:
297 start = off + self.big_int_size
284 start = off + self.big_int_size
298 (data_size,) = struct.unpack(
285 (data_size,) = struct.unpack(
299 b'>i',
286 b'>i',
300 self._data[start : start + self.int_size],
287 self._data[start : start + self.int_size],
301 )
288 )
302 start = off + sidedata_length_pos
289 start = off + sidedata_length_pos
303 (side_data_size,) = struct.unpack(
290 (side_data_size,) = struct.unpack(
304 b'>i', self._data[start : start + self.int_size]
291 b'>i', self._data[start : start + self.int_size]
305 )
292 )
306 if lgt is not None:
293 if lgt is not None:
307 self._offsets[count] = off
294 self._offsets[count] = off
308 count += 1
295 count += 1
309 off += self.index_size + data_size + side_data_size
296 off += self.index_size + data_size + side_data_size
310 if off != len(self._data):
297 if off != len(self._data):
311 raise ValueError(b"corrupted data")
298 raise ValueError(b"corrupted data")
312 return count
299 return count
313
300
314
301
315 def parse_index_devel_nodemap(data, inline):
302 def parse_index_devel_nodemap(data, inline):
316 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
303 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
317 return PersistentNodeMapIndexObject(data), None
304 return PersistentNodeMapIndexObject(data), None
318
305
319
306
320 def parse_dirstate(dmap, copymap, st):
307 def parse_dirstate(dmap, copymap, st):
321 parents = [st[:20], st[20:40]]
308 parents = [st[:20], st[20:40]]
322 # dereference fields so they will be local in loop
309 # dereference fields so they will be local in loop
323 format = b">cllll"
310 format = b">cllll"
324 e_size = struct.calcsize(format)
311 e_size = struct.calcsize(format)
325 pos1 = 40
312 pos1 = 40
326 l = len(st)
313 l = len(st)
327
314
328 # the inner loop
315 # the inner loop
329 while pos1 < l:
316 while pos1 < l:
330 pos2 = pos1 + e_size
317 pos2 = pos1 + e_size
331 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
318 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
332 pos1 = pos2 + e[4]
319 pos1 = pos2 + e[4]
333 f = st[pos2:pos1]
320 f = st[pos2:pos1]
334 if b'\0' in f:
321 if b'\0' in f:
335 f, c = f.split(b'\0')
322 f, c = f.split(b'\0')
336 copymap[f] = c
323 copymap[f] = c
337 dmap[f] = e[:4]
324 dmap[f] = e[:4]
338 return parents
325 return parents
339
326
340
327
341 def pack_dirstate(dmap, copymap, pl, now):
328 def pack_dirstate(dmap, copymap, pl, now):
342 now = int(now)
329 now = int(now)
343 cs = stringio()
330 cs = stringio()
344 write = cs.write
331 write = cs.write
345 write(b"".join(pl))
332 write(b"".join(pl))
346 for f, e in pycompat.iteritems(dmap):
333 for f, e in pycompat.iteritems(dmap):
347 if e[0] == b'n' and e[3] == now:
334 if e[0] == b'n' and e[3] == now:
348 # The file was last modified "simultaneously" with the current
335 # The file was last modified "simultaneously" with the current
349 # write to dirstate (i.e. within the same second for file-
336 # write to dirstate (i.e. within the same second for file-
350 # systems with a granularity of 1 sec). This commonly happens
337 # systems with a granularity of 1 sec). This commonly happens
351 # for at least a couple of files on 'update'.
338 # for at least a couple of files on 'update'.
352 # The user could change the file without changing its size
339 # The user could change the file without changing its size
353 # within the same second. Invalidate the file's mtime in
340 # within the same second. Invalidate the file's mtime in
354 # dirstate, forcing future 'status' calls to compare the
341 # dirstate, forcing future 'status' calls to compare the
355 # contents of the file if the size is the same. This prevents
342 # contents of the file if the size is the same. This prevents
356 # mistakenly treating such files as clean.
343 # mistakenly treating such files as clean.
357 e = dirstatetuple(e[0], e[1], e[2], -1)
344 e = dirstatetuple(e[0], e[1], e[2], -1)
358 dmap[f] = e
345 dmap[f] = e
359
346
360 if f in copymap:
347 if f in copymap:
361 f = b"%s\0%s" % (f, copymap[f])
348 f = b"%s\0%s" % (f, copymap[f])
362 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
349 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
363 write(e)
350 write(e)
364 write(f)
351 write(f)
365 return cs.getvalue()
352 return cs.getvalue()
@@ -1,3256 +1,3252 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullhex,
29 nullhex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 sha1nodeconstants,
32 sha1nodeconstants,
33 short,
33 short,
34 wdirfilenodeids,
34 wdirfilenodeids,
35 wdirhex,
35 wdirhex,
36 wdirid,
36 wdirid,
37 wdirrev,
37 wdirrev,
38 )
38 )
39 from .i18n import _
39 from .i18n import _
40 from .pycompat import getattr
40 from .pycompat import getattr
41 from .revlogutils.constants import (
41 from .revlogutils.constants import (
42 FLAG_GENERALDELTA,
42 FLAG_GENERALDELTA,
43 FLAG_INLINE_DATA,
43 FLAG_INLINE_DATA,
44 INDEX_ENTRY_V0,
44 INDEX_ENTRY_V0,
45 INDEX_ENTRY_V1,
45 INDEX_ENTRY_V1,
46 INDEX_ENTRY_V2,
46 REVLOGV0,
47 REVLOGV0,
47 REVLOGV1,
48 REVLOGV1,
48 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
49 REVLOGV2,
50 REVLOGV2,
50 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
51 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
54 )
55 )
55 from .revlogutils.flagutil import (
56 from .revlogutils.flagutil import (
56 REVIDX_DEFAULT_FLAGS,
57 REVIDX_DEFAULT_FLAGS,
57 REVIDX_ELLIPSIS,
58 REVIDX_ELLIPSIS,
58 REVIDX_EXTSTORED,
59 REVIDX_EXTSTORED,
59 REVIDX_FLAGS_ORDER,
60 REVIDX_FLAGS_ORDER,
60 REVIDX_HASCOPIESINFO,
61 REVIDX_HASCOPIESINFO,
61 REVIDX_ISCENSORED,
62 REVIDX_ISCENSORED,
62 REVIDX_RAWTEXT_CHANGING_FLAGS,
63 REVIDX_RAWTEXT_CHANGING_FLAGS,
63 REVIDX_SIDEDATA,
64 REVIDX_SIDEDATA,
64 )
65 )
65 from .thirdparty import attr
66 from .thirdparty import attr
66 from . import (
67 from . import (
67 ancestor,
68 ancestor,
68 dagop,
69 dagop,
69 error,
70 error,
70 mdiff,
71 mdiff,
71 policy,
72 policy,
72 pycompat,
73 pycompat,
73 templatefilters,
74 templatefilters,
74 util,
75 util,
75 )
76 )
76 from .interfaces import (
77 from .interfaces import (
77 repository,
78 repository,
78 util as interfaceutil,
79 util as interfaceutil,
79 )
80 )
80 from .revlogutils import (
81 from .revlogutils import (
81 deltas as deltautil,
82 deltas as deltautil,
82 flagutil,
83 flagutil,
83 nodemap as nodemaputil,
84 nodemap as nodemaputil,
84 sidedata as sidedatautil,
85 sidedata as sidedatautil,
85 )
86 )
86 from .utils import (
87 from .utils import (
87 storageutil,
88 storageutil,
88 stringutil,
89 stringutil,
89 )
90 )
90 from .pure import parsers as pureparsers
91
91
92 # blanked usage of all the name to prevent pyflakes constraints
92 # blanked usage of all the name to prevent pyflakes constraints
93 # We need these name available in the module for extensions.
93 # We need these name available in the module for extensions.
94 REVLOGV0
94 REVLOGV0
95 REVLOGV1
95 REVLOGV1
96 REVLOGV2
96 REVLOGV2
97 FLAG_INLINE_DATA
97 FLAG_INLINE_DATA
98 FLAG_GENERALDELTA
98 FLAG_GENERALDELTA
99 REVLOG_DEFAULT_FLAGS
99 REVLOG_DEFAULT_FLAGS
100 REVLOG_DEFAULT_FORMAT
100 REVLOG_DEFAULT_FORMAT
101 REVLOG_DEFAULT_VERSION
101 REVLOG_DEFAULT_VERSION
102 REVLOGV1_FLAGS
102 REVLOGV1_FLAGS
103 REVLOGV2_FLAGS
103 REVLOGV2_FLAGS
104 REVIDX_ISCENSORED
104 REVIDX_ISCENSORED
105 REVIDX_ELLIPSIS
105 REVIDX_ELLIPSIS
106 REVIDX_SIDEDATA
106 REVIDX_SIDEDATA
107 REVIDX_HASCOPIESINFO
107 REVIDX_HASCOPIESINFO
108 REVIDX_EXTSTORED
108 REVIDX_EXTSTORED
109 REVIDX_DEFAULT_FLAGS
109 REVIDX_DEFAULT_FLAGS
110 REVIDX_FLAGS_ORDER
110 REVIDX_FLAGS_ORDER
111 REVIDX_RAWTEXT_CHANGING_FLAGS
111 REVIDX_RAWTEXT_CHANGING_FLAGS
112
112
113 parsers = policy.importmod('parsers')
113 parsers = policy.importmod('parsers')
114 rustancestor = policy.importrust('ancestor')
114 rustancestor = policy.importrust('ancestor')
115 rustdagop = policy.importrust('dagop')
115 rustdagop = policy.importrust('dagop')
116 rustrevlog = policy.importrust('revlog')
116 rustrevlog = policy.importrust('revlog')
117
117
118 # Aliased for performance.
118 # Aliased for performance.
119 _zlibdecompress = zlib.decompress
119 _zlibdecompress = zlib.decompress
120
120
121 # max size of revlog with inline data
121 # max size of revlog with inline data
122 _maxinline = 131072
122 _maxinline = 131072
123 _chunksize = 1048576
123 _chunksize = 1048576
124
124
125 # Flag processors for REVIDX_ELLIPSIS.
125 # Flag processors for REVIDX_ELLIPSIS.
126 def ellipsisreadprocessor(rl, text):
126 def ellipsisreadprocessor(rl, text):
127 return text, False
127 return text, False
128
128
129
129
130 def ellipsiswriteprocessor(rl, text):
130 def ellipsiswriteprocessor(rl, text):
131 return text, False
131 return text, False
132
132
133
133
134 def ellipsisrawprocessor(rl, text):
134 def ellipsisrawprocessor(rl, text):
135 return False
135 return False
136
136
137
137
138 ellipsisprocessor = (
138 ellipsisprocessor = (
139 ellipsisreadprocessor,
139 ellipsisreadprocessor,
140 ellipsiswriteprocessor,
140 ellipsiswriteprocessor,
141 ellipsisrawprocessor,
141 ellipsisrawprocessor,
142 )
142 )
143
143
144
144
145 def getoffset(q):
145 def getoffset(q):
146 return int(q >> 16)
146 return int(q >> 16)
147
147
148
148
149 def gettype(q):
149 def gettype(q):
150 return int(q & 0xFFFF)
150 return int(q & 0xFFFF)
151
151
152
152
153 def offset_type(offset, type):
153 def offset_type(offset, type):
154 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
154 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
155 raise ValueError(b'unknown revlog index flags')
155 raise ValueError(b'unknown revlog index flags')
156 return int(int(offset) << 16 | type)
156 return int(int(offset) << 16 | type)
157
157
158
158
159 def _verify_revision(rl, skipflags, state, node):
159 def _verify_revision(rl, skipflags, state, node):
160 """Verify the integrity of the given revlog ``node`` while providing a hook
160 """Verify the integrity of the given revlog ``node`` while providing a hook
161 point for extensions to influence the operation."""
161 point for extensions to influence the operation."""
162 if skipflags:
162 if skipflags:
163 state[b'skipread'].add(node)
163 state[b'skipread'].add(node)
164 else:
164 else:
165 # Side-effect: read content and verify hash.
165 # Side-effect: read content and verify hash.
166 rl.revision(node)
166 rl.revision(node)
167
167
168
168
169 # True if a fast implementation for persistent-nodemap is available
169 # True if a fast implementation for persistent-nodemap is available
170 #
170 #
171 # We also consider we have a "fast" implementation in "pure" python because
171 # We also consider we have a "fast" implementation in "pure" python because
172 # people using pure don't really have performance consideration (and a
172 # people using pure don't really have performance consideration (and a
173 # wheelbarrow of other slowness source)
173 # wheelbarrow of other slowness source)
174 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
174 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
175 parsers, 'BaseIndexObject'
175 parsers, 'BaseIndexObject'
176 )
176 )
177
177
178
178
179 @attr.s(slots=True, frozen=True)
179 @attr.s(slots=True, frozen=True)
180 class _revisioninfo(object):
180 class _revisioninfo(object):
181 """Information about a revision that allows building its fulltext
181 """Information about a revision that allows building its fulltext
182 node: expected hash of the revision
182 node: expected hash of the revision
183 p1, p2: parent revs of the revision
183 p1, p2: parent revs of the revision
184 btext: built text cache consisting of a one-element list
184 btext: built text cache consisting of a one-element list
185 cachedelta: (baserev, uncompressed_delta) or None
185 cachedelta: (baserev, uncompressed_delta) or None
186 flags: flags associated to the revision storage
186 flags: flags associated to the revision storage
187
187
188 One of btext[0] or cachedelta must be set.
188 One of btext[0] or cachedelta must be set.
189 """
189 """
190
190
191 node = attr.ib()
191 node = attr.ib()
192 p1 = attr.ib()
192 p1 = attr.ib()
193 p2 = attr.ib()
193 p2 = attr.ib()
194 btext = attr.ib()
194 btext = attr.ib()
195 textlen = attr.ib()
195 textlen = attr.ib()
196 cachedelta = attr.ib()
196 cachedelta = attr.ib()
197 flags = attr.ib()
197 flags = attr.ib()
198
198
199
199
200 @interfaceutil.implementer(repository.irevisiondelta)
200 @interfaceutil.implementer(repository.irevisiondelta)
201 @attr.s(slots=True)
201 @attr.s(slots=True)
202 class revlogrevisiondelta(object):
202 class revlogrevisiondelta(object):
203 node = attr.ib()
203 node = attr.ib()
204 p1node = attr.ib()
204 p1node = attr.ib()
205 p2node = attr.ib()
205 p2node = attr.ib()
206 basenode = attr.ib()
206 basenode = attr.ib()
207 flags = attr.ib()
207 flags = attr.ib()
208 baserevisionsize = attr.ib()
208 baserevisionsize = attr.ib()
209 revision = attr.ib()
209 revision = attr.ib()
210 delta = attr.ib()
210 delta = attr.ib()
211 sidedata = attr.ib()
211 sidedata = attr.ib()
212 linknode = attr.ib(default=None)
212 linknode = attr.ib(default=None)
213
213
214
214
215 @interfaceutil.implementer(repository.iverifyproblem)
215 @interfaceutil.implementer(repository.iverifyproblem)
216 @attr.s(frozen=True)
216 @attr.s(frozen=True)
217 class revlogproblem(object):
217 class revlogproblem(object):
218 warning = attr.ib(default=None)
218 warning = attr.ib(default=None)
219 error = attr.ib(default=None)
219 error = attr.ib(default=None)
220 node = attr.ib(default=None)
220 node = attr.ib(default=None)
221
221
222
222
223 class revlogoldindex(list):
223 class revlogoldindex(list):
224 @property
224 @property
225 def nodemap(self):
225 def nodemap(self):
226 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
226 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
227 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
227 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
228 return self._nodemap
228 return self._nodemap
229
229
230 @util.propertycache
230 @util.propertycache
231 def _nodemap(self):
231 def _nodemap(self):
232 nodemap = nodemaputil.NodeMap({nullid: nullrev})
232 nodemap = nodemaputil.NodeMap({nullid: nullrev})
233 for r in range(0, len(self)):
233 for r in range(0, len(self)):
234 n = self[r][7]
234 n = self[r][7]
235 nodemap[n] = r
235 nodemap[n] = r
236 return nodemap
236 return nodemap
237
237
238 def has_node(self, node):
238 def has_node(self, node):
239 """return True if the node exist in the index"""
239 """return True if the node exist in the index"""
240 return node in self._nodemap
240 return node in self._nodemap
241
241
242 def rev(self, node):
242 def rev(self, node):
243 """return a revision for a node
243 """return a revision for a node
244
244
245 If the node is unknown, raise a RevlogError"""
245 If the node is unknown, raise a RevlogError"""
246 return self._nodemap[node]
246 return self._nodemap[node]
247
247
248 def get_rev(self, node):
248 def get_rev(self, node):
249 """return a revision for a node
249 """return a revision for a node
250
250
251 If the node is unknown, return None"""
251 If the node is unknown, return None"""
252 return self._nodemap.get(node)
252 return self._nodemap.get(node)
253
253
254 def append(self, tup):
254 def append(self, tup):
255 self._nodemap[tup[7]] = len(self)
255 self._nodemap[tup[7]] = len(self)
256 super(revlogoldindex, self).append(tup)
256 super(revlogoldindex, self).append(tup)
257
257
258 def __delitem__(self, i):
258 def __delitem__(self, i):
259 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
259 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
260 raise ValueError(b"deleting slices only supports a:-1 with step 1")
260 raise ValueError(b"deleting slices only supports a:-1 with step 1")
261 for r in pycompat.xrange(i.start, len(self)):
261 for r in pycompat.xrange(i.start, len(self)):
262 del self._nodemap[self[r][7]]
262 del self._nodemap[self[r][7]]
263 super(revlogoldindex, self).__delitem__(i)
263 super(revlogoldindex, self).__delitem__(i)
264
264
265 def clearcaches(self):
265 def clearcaches(self):
266 self.__dict__.pop('_nodemap', None)
266 self.__dict__.pop('_nodemap', None)
267
267
268 def __getitem__(self, i):
268 def __getitem__(self, i):
269 if i == -1:
269 if i == -1:
270 return (0, 0, 0, -1, -1, -1, -1, nullid)
270 return (0, 0, 0, -1, -1, -1, -1, nullid)
271 return list.__getitem__(self, i)
271 return list.__getitem__(self, i)
272
272
273
273
274 class revlogoldio(object):
274 class revlogoldio(object):
275 def __init__(self):
275 def __init__(self):
276 self.size = INDEX_ENTRY_V0.size
276 self.size = INDEX_ENTRY_V0.size
277
277
278 def parseindex(self, data, inline):
278 def parseindex(self, data, inline):
279 s = self.size
279 s = self.size
280 index = []
280 index = []
281 nodemap = nodemaputil.NodeMap({nullid: nullrev})
281 nodemap = nodemaputil.NodeMap({nullid: nullrev})
282 n = off = 0
282 n = off = 0
283 l = len(data)
283 l = len(data)
284 while off + s <= l:
284 while off + s <= l:
285 cur = data[off : off + s]
285 cur = data[off : off + s]
286 off += s
286 off += s
287 e = INDEX_ENTRY_V0.unpack(cur)
287 e = INDEX_ENTRY_V0.unpack(cur)
288 # transform to revlogv1 format
288 # transform to revlogv1 format
289 e2 = (
289 e2 = (
290 offset_type(e[0], 0),
290 offset_type(e[0], 0),
291 e[1],
291 e[1],
292 -1,
292 -1,
293 e[2],
293 e[2],
294 e[3],
294 e[3],
295 nodemap.get(e[4], nullrev),
295 nodemap.get(e[4], nullrev),
296 nodemap.get(e[5], nullrev),
296 nodemap.get(e[5], nullrev),
297 e[6],
297 e[6],
298 )
298 )
299 index.append(e2)
299 index.append(e2)
300 nodemap[e[6]] = n
300 nodemap[e[6]] = n
301 n += 1
301 n += 1
302
302
303 index = revlogoldindex(index)
303 index = revlogoldindex(index)
304 return index, None
304 return index, None
305
305
306 def packentry(self, entry, node, version, rev):
306 def packentry(self, entry, node, version, rev):
307 """return the binary representation of an entry
307 """return the binary representation of an entry
308
308
309 entry: a tuple containing all the values (see index.__getitem__)
309 entry: a tuple containing all the values (see index.__getitem__)
310 node: a callback to convert a revision to nodeid
310 node: a callback to convert a revision to nodeid
311 version: the changelog version
311 version: the changelog version
312 rev: the revision number
312 rev: the revision number
313 """
313 """
314 if gettype(entry[0]):
314 if gettype(entry[0]):
315 raise error.RevlogError(
315 raise error.RevlogError(
316 _(b'index entry flags need revlog version 1')
316 _(b'index entry flags need revlog version 1')
317 )
317 )
318 e2 = (
318 e2 = (
319 getoffset(entry[0]),
319 getoffset(entry[0]),
320 entry[1],
320 entry[1],
321 entry[3],
321 entry[3],
322 entry[4],
322 entry[4],
323 node(entry[5]),
323 node(entry[5]),
324 node(entry[6]),
324 node(entry[6]),
325 entry[7],
325 entry[7],
326 )
326 )
327 return INDEX_ENTRY_V0.pack(*e2)
327 return INDEX_ENTRY_V0.pack(*e2)
328
328
329
329
330 versionformat = struct.Struct(b">I")
330 versionformat = struct.Struct(b">I")
331 versionformat_pack = versionformat.pack
331 versionformat_pack = versionformat.pack
332 versionformat_unpack = versionformat.unpack
332 versionformat_unpack = versionformat.unpack
333
333
334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
335 # signed integer)
335 # signed integer)
336 _maxentrysize = 0x7FFFFFFF
336 _maxentrysize = 0x7FFFFFFF
337
337
338
338
339 class revlogio(object):
339 class revlogio(object):
340 def __init__(self):
340 def __init__(self):
341 self.size = INDEX_ENTRY_V1.size
341 self.size = INDEX_ENTRY_V1.size
342
342
343 def parseindex(self, data, inline):
343 def parseindex(self, data, inline):
344 # call the C implementation to parse the index data
344 # call the C implementation to parse the index data
345 index, cache = parsers.parse_index2(data, inline)
345 index, cache = parsers.parse_index2(data, inline)
346 return index, cache
346 return index, cache
347
347
348 def packentry(self, entry, node, version, rev):
348 def packentry(self, entry, node, version, rev):
349 p = INDEX_ENTRY_V1.pack(*entry)
349 p = INDEX_ENTRY_V1.pack(*entry)
350 if rev == 0:
350 if rev == 0:
351 p = versionformat_pack(version) + p[4:]
351 p = versionformat_pack(version) + p[4:]
352 return p
352 return p
353
353
354
354
355 indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format)
356 indexformatv2_pack = indexformatv2.pack
357
358
359 class revlogv2io(object):
355 class revlogv2io(object):
360 def __init__(self):
356 def __init__(self):
361 self.size = indexformatv2.size
357 self.size = INDEX_ENTRY_V2.size
362
358
363 def parseindex(self, data, inline):
359 def parseindex(self, data, inline):
364 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
360 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
365 return index, cache
361 return index, cache
366
362
367 def packentry(self, entry, node, version, rev):
363 def packentry(self, entry, node, version, rev):
368 p = indexformatv2_pack(*entry)
364 p = INDEX_ENTRY_V2.pack(*entry)
369 if rev == 0:
365 if rev == 0:
370 p = versionformat_pack(version) + p[4:]
366 p = versionformat_pack(version) + p[4:]
371 return p
367 return p
372
368
373
369
374 NodemapRevlogIO = None
370 NodemapRevlogIO = None
375
371
376 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
372 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
377
373
378 class NodemapRevlogIO(revlogio):
374 class NodemapRevlogIO(revlogio):
379 """A debug oriented IO class that return a PersistentNodeMapIndexObject
375 """A debug oriented IO class that return a PersistentNodeMapIndexObject
380
376
381 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
377 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
382 """
378 """
383
379
384 def parseindex(self, data, inline):
380 def parseindex(self, data, inline):
385 index, cache = parsers.parse_index_devel_nodemap(data, inline)
381 index, cache = parsers.parse_index_devel_nodemap(data, inline)
386 return index, cache
382 return index, cache
387
383
388
384
389 class rustrevlogio(revlogio):
385 class rustrevlogio(revlogio):
390 def parseindex(self, data, inline):
386 def parseindex(self, data, inline):
391 index, cache = super(rustrevlogio, self).parseindex(data, inline)
387 index, cache = super(rustrevlogio, self).parseindex(data, inline)
392 return rustrevlog.MixedIndex(index), cache
388 return rustrevlog.MixedIndex(index), cache
393
389
394
390
395 class revlog(object):
391 class revlog(object):
396 """
392 """
397 the underlying revision storage object
393 the underlying revision storage object
398
394
399 A revlog consists of two parts, an index and the revision data.
395 A revlog consists of two parts, an index and the revision data.
400
396
401 The index is a file with a fixed record size containing
397 The index is a file with a fixed record size containing
402 information on each revision, including its nodeid (hash), the
398 information on each revision, including its nodeid (hash), the
403 nodeids of its parents, the position and offset of its data within
399 nodeids of its parents, the position and offset of its data within
404 the data file, and the revision it's based on. Finally, each entry
400 the data file, and the revision it's based on. Finally, each entry
405 contains a linkrev entry that can serve as a pointer to external
401 contains a linkrev entry that can serve as a pointer to external
406 data.
402 data.
407
403
408 The revision data itself is a linear collection of data chunks.
404 The revision data itself is a linear collection of data chunks.
409 Each chunk represents a revision and is usually represented as a
405 Each chunk represents a revision and is usually represented as a
410 delta against the previous chunk. To bound lookup time, runs of
406 delta against the previous chunk. To bound lookup time, runs of
411 deltas are limited to about 2 times the length of the original
407 deltas are limited to about 2 times the length of the original
412 version data. This makes retrieval of a version proportional to
408 version data. This makes retrieval of a version proportional to
413 its size, or O(1) relative to the number of revisions.
409 its size, or O(1) relative to the number of revisions.
414
410
415 Both pieces of the revlog are written to in an append-only
411 Both pieces of the revlog are written to in an append-only
416 fashion, which means we never need to rewrite a file to insert or
412 fashion, which means we never need to rewrite a file to insert or
417 remove data, and can use some simple techniques to avoid the need
413 remove data, and can use some simple techniques to avoid the need
418 for locking while reading.
414 for locking while reading.
419
415
420 If checkambig, indexfile is opened with checkambig=True at
416 If checkambig, indexfile is opened with checkambig=True at
421 writing, to avoid file stat ambiguity.
417 writing, to avoid file stat ambiguity.
422
418
423 If mmaplargeindex is True, and an mmapindexthreshold is set, the
419 If mmaplargeindex is True, and an mmapindexthreshold is set, the
424 index will be mmapped rather than read if it is larger than the
420 index will be mmapped rather than read if it is larger than the
425 configured threshold.
421 configured threshold.
426
422
427 If censorable is True, the revlog can have censored revisions.
423 If censorable is True, the revlog can have censored revisions.
428
424
429 If `upperboundcomp` is not None, this is the expected maximal gain from
425 If `upperboundcomp` is not None, this is the expected maximal gain from
430 compression for the data content.
426 compression for the data content.
431
427
432 `concurrencychecker` is an optional function that receives 3 arguments: a
428 `concurrencychecker` is an optional function that receives 3 arguments: a
433 file handle, a filename, and an expected position. It should check whether
429 file handle, a filename, and an expected position. It should check whether
434 the current position in the file handle is valid, and log/warn/fail (by
430 the current position in the file handle is valid, and log/warn/fail (by
435 raising).
431 raising).
436 """
432 """
437
433
438 _flagserrorclass = error.RevlogError
434 _flagserrorclass = error.RevlogError
439
435
440 def __init__(
436 def __init__(
441 self,
437 self,
442 opener,
438 opener,
443 indexfile,
439 indexfile,
444 datafile=None,
440 datafile=None,
445 checkambig=False,
441 checkambig=False,
446 mmaplargeindex=False,
442 mmaplargeindex=False,
447 censorable=False,
443 censorable=False,
448 upperboundcomp=None,
444 upperboundcomp=None,
449 persistentnodemap=False,
445 persistentnodemap=False,
450 concurrencychecker=None,
446 concurrencychecker=None,
451 ):
447 ):
452 """
448 """
453 create a revlog object
449 create a revlog object
454
450
455 opener is a function that abstracts the file opening operation
451 opener is a function that abstracts the file opening operation
456 and can be used to implement COW semantics or the like.
452 and can be used to implement COW semantics or the like.
457
453
458 """
454 """
459 self.upperboundcomp = upperboundcomp
455 self.upperboundcomp = upperboundcomp
460 self.indexfile = indexfile
456 self.indexfile = indexfile
461 self.datafile = datafile or (indexfile[:-2] + b".d")
457 self.datafile = datafile or (indexfile[:-2] + b".d")
462 self.nodemap_file = None
458 self.nodemap_file = None
463 if persistentnodemap:
459 if persistentnodemap:
464 self.nodemap_file = nodemaputil.get_nodemap_file(
460 self.nodemap_file = nodemaputil.get_nodemap_file(
465 opener, self.indexfile
461 opener, self.indexfile
466 )
462 )
467
463
468 self.opener = opener
464 self.opener = opener
469 # When True, indexfile is opened with checkambig=True at writing, to
465 # When True, indexfile is opened with checkambig=True at writing, to
470 # avoid file stat ambiguity.
466 # avoid file stat ambiguity.
471 self._checkambig = checkambig
467 self._checkambig = checkambig
472 self._mmaplargeindex = mmaplargeindex
468 self._mmaplargeindex = mmaplargeindex
473 self._censorable = censorable
469 self._censorable = censorable
474 # 3-tuple of (node, rev, text) for a raw revision.
470 # 3-tuple of (node, rev, text) for a raw revision.
475 self._revisioncache = None
471 self._revisioncache = None
476 # Maps rev to chain base rev.
472 # Maps rev to chain base rev.
477 self._chainbasecache = util.lrucachedict(100)
473 self._chainbasecache = util.lrucachedict(100)
478 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
474 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
479 self._chunkcache = (0, b'')
475 self._chunkcache = (0, b'')
480 # How much data to read and cache into the raw revlog data cache.
476 # How much data to read and cache into the raw revlog data cache.
481 self._chunkcachesize = 65536
477 self._chunkcachesize = 65536
482 self._maxchainlen = None
478 self._maxchainlen = None
483 self._deltabothparents = True
479 self._deltabothparents = True
484 self.index = None
480 self.index = None
485 self._nodemap_docket = None
481 self._nodemap_docket = None
486 # Mapping of partial identifiers to full nodes.
482 # Mapping of partial identifiers to full nodes.
487 self._pcache = {}
483 self._pcache = {}
488 # Mapping of revision integer to full node.
484 # Mapping of revision integer to full node.
489 self._compengine = b'zlib'
485 self._compengine = b'zlib'
490 self._compengineopts = {}
486 self._compengineopts = {}
491 self._maxdeltachainspan = -1
487 self._maxdeltachainspan = -1
492 self._withsparseread = False
488 self._withsparseread = False
493 self._sparserevlog = False
489 self._sparserevlog = False
494 self._srdensitythreshold = 0.50
490 self._srdensitythreshold = 0.50
495 self._srmingapsize = 262144
491 self._srmingapsize = 262144
496
492
497 # Make copy of flag processors so each revlog instance can support
493 # Make copy of flag processors so each revlog instance can support
498 # custom flags.
494 # custom flags.
499 self._flagprocessors = dict(flagutil.flagprocessors)
495 self._flagprocessors = dict(flagutil.flagprocessors)
500
496
501 # 2-tuple of file handles being used for active writing.
497 # 2-tuple of file handles being used for active writing.
502 self._writinghandles = None
498 self._writinghandles = None
503
499
504 self._loadindex()
500 self._loadindex()
505
501
506 self._concurrencychecker = concurrencychecker
502 self._concurrencychecker = concurrencychecker
507
503
508 def _loadindex(self):
504 def _loadindex(self):
509 mmapindexthreshold = None
505 mmapindexthreshold = None
510 opts = self.opener.options
506 opts = self.opener.options
511
507
512 if b'revlogv2' in opts:
508 if b'revlogv2' in opts:
513 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
509 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
514 elif b'revlogv1' in opts:
510 elif b'revlogv1' in opts:
515 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
511 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
516 if b'generaldelta' in opts:
512 if b'generaldelta' in opts:
517 newversionflags |= FLAG_GENERALDELTA
513 newversionflags |= FLAG_GENERALDELTA
518 elif b'revlogv0' in self.opener.options:
514 elif b'revlogv0' in self.opener.options:
519 newversionflags = REVLOGV0
515 newversionflags = REVLOGV0
520 else:
516 else:
521 newversionflags = REVLOG_DEFAULT_VERSION
517 newversionflags = REVLOG_DEFAULT_VERSION
522
518
523 if b'chunkcachesize' in opts:
519 if b'chunkcachesize' in opts:
524 self._chunkcachesize = opts[b'chunkcachesize']
520 self._chunkcachesize = opts[b'chunkcachesize']
525 if b'maxchainlen' in opts:
521 if b'maxchainlen' in opts:
526 self._maxchainlen = opts[b'maxchainlen']
522 self._maxchainlen = opts[b'maxchainlen']
527 if b'deltabothparents' in opts:
523 if b'deltabothparents' in opts:
528 self._deltabothparents = opts[b'deltabothparents']
524 self._deltabothparents = opts[b'deltabothparents']
529 self._lazydelta = bool(opts.get(b'lazydelta', True))
525 self._lazydelta = bool(opts.get(b'lazydelta', True))
530 self._lazydeltabase = False
526 self._lazydeltabase = False
531 if self._lazydelta:
527 if self._lazydelta:
532 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
528 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
533 if b'compengine' in opts:
529 if b'compengine' in opts:
534 self._compengine = opts[b'compengine']
530 self._compengine = opts[b'compengine']
535 if b'zlib.level' in opts:
531 if b'zlib.level' in opts:
536 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
532 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
537 if b'zstd.level' in opts:
533 if b'zstd.level' in opts:
538 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
534 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
539 if b'maxdeltachainspan' in opts:
535 if b'maxdeltachainspan' in opts:
540 self._maxdeltachainspan = opts[b'maxdeltachainspan']
536 self._maxdeltachainspan = opts[b'maxdeltachainspan']
541 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
537 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
542 mmapindexthreshold = opts[b'mmapindexthreshold']
538 mmapindexthreshold = opts[b'mmapindexthreshold']
543 self.hassidedata = bool(opts.get(b'side-data', False))
539 self.hassidedata = bool(opts.get(b'side-data', False))
544 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
540 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
545 withsparseread = bool(opts.get(b'with-sparse-read', False))
541 withsparseread = bool(opts.get(b'with-sparse-read', False))
546 # sparse-revlog forces sparse-read
542 # sparse-revlog forces sparse-read
547 self._withsparseread = self._sparserevlog or withsparseread
543 self._withsparseread = self._sparserevlog or withsparseread
548 if b'sparse-read-density-threshold' in opts:
544 if b'sparse-read-density-threshold' in opts:
549 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
545 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
550 if b'sparse-read-min-gap-size' in opts:
546 if b'sparse-read-min-gap-size' in opts:
551 self._srmingapsize = opts[b'sparse-read-min-gap-size']
547 self._srmingapsize = opts[b'sparse-read-min-gap-size']
552 if opts.get(b'enableellipsis'):
548 if opts.get(b'enableellipsis'):
553 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
549 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
554
550
555 # revlog v0 doesn't have flag processors
551 # revlog v0 doesn't have flag processors
556 for flag, processor in pycompat.iteritems(
552 for flag, processor in pycompat.iteritems(
557 opts.get(b'flagprocessors', {})
553 opts.get(b'flagprocessors', {})
558 ):
554 ):
559 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
555 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
560
556
561 if self._chunkcachesize <= 0:
557 if self._chunkcachesize <= 0:
562 raise error.RevlogError(
558 raise error.RevlogError(
563 _(b'revlog chunk cache size %r is not greater than 0')
559 _(b'revlog chunk cache size %r is not greater than 0')
564 % self._chunkcachesize
560 % self._chunkcachesize
565 )
561 )
566 elif self._chunkcachesize & (self._chunkcachesize - 1):
562 elif self._chunkcachesize & (self._chunkcachesize - 1):
567 raise error.RevlogError(
563 raise error.RevlogError(
568 _(b'revlog chunk cache size %r is not a power of 2')
564 _(b'revlog chunk cache size %r is not a power of 2')
569 % self._chunkcachesize
565 % self._chunkcachesize
570 )
566 )
571
567
572 indexdata = b''
568 indexdata = b''
573 self._initempty = True
569 self._initempty = True
574 try:
570 try:
575 with self._indexfp() as f:
571 with self._indexfp() as f:
576 if (
572 if (
577 mmapindexthreshold is not None
573 mmapindexthreshold is not None
578 and self.opener.fstat(f).st_size >= mmapindexthreshold
574 and self.opener.fstat(f).st_size >= mmapindexthreshold
579 ):
575 ):
580 # TODO: should .close() to release resources without
576 # TODO: should .close() to release resources without
581 # relying on Python GC
577 # relying on Python GC
582 indexdata = util.buffer(util.mmapread(f))
578 indexdata = util.buffer(util.mmapread(f))
583 else:
579 else:
584 indexdata = f.read()
580 indexdata = f.read()
585 if len(indexdata) > 0:
581 if len(indexdata) > 0:
586 versionflags = versionformat_unpack(indexdata[:4])[0]
582 versionflags = versionformat_unpack(indexdata[:4])[0]
587 self._initempty = False
583 self._initempty = False
588 else:
584 else:
589 versionflags = newversionflags
585 versionflags = newversionflags
590 except IOError as inst:
586 except IOError as inst:
591 if inst.errno != errno.ENOENT:
587 if inst.errno != errno.ENOENT:
592 raise
588 raise
593
589
594 versionflags = newversionflags
590 versionflags = newversionflags
595
591
596 self.version = versionflags
592 self.version = versionflags
597
593
598 flags = versionflags & ~0xFFFF
594 flags = versionflags & ~0xFFFF
599 fmt = versionflags & 0xFFFF
595 fmt = versionflags & 0xFFFF
600
596
601 if fmt == REVLOGV0:
597 if fmt == REVLOGV0:
602 if flags:
598 if flags:
603 raise error.RevlogError(
599 raise error.RevlogError(
604 _(b'unknown flags (%#04x) in version %d revlog %s')
600 _(b'unknown flags (%#04x) in version %d revlog %s')
605 % (flags >> 16, fmt, self.indexfile)
601 % (flags >> 16, fmt, self.indexfile)
606 )
602 )
607
603
608 self._inline = False
604 self._inline = False
609 self._generaldelta = False
605 self._generaldelta = False
610
606
611 elif fmt == REVLOGV1:
607 elif fmt == REVLOGV1:
612 if flags & ~REVLOGV1_FLAGS:
608 if flags & ~REVLOGV1_FLAGS:
613 raise error.RevlogError(
609 raise error.RevlogError(
614 _(b'unknown flags (%#04x) in version %d revlog %s')
610 _(b'unknown flags (%#04x) in version %d revlog %s')
615 % (flags >> 16, fmt, self.indexfile)
611 % (flags >> 16, fmt, self.indexfile)
616 )
612 )
617
613
618 self._inline = versionflags & FLAG_INLINE_DATA
614 self._inline = versionflags & FLAG_INLINE_DATA
619 self._generaldelta = versionflags & FLAG_GENERALDELTA
615 self._generaldelta = versionflags & FLAG_GENERALDELTA
620
616
621 elif fmt == REVLOGV2:
617 elif fmt == REVLOGV2:
622 if flags & ~REVLOGV2_FLAGS:
618 if flags & ~REVLOGV2_FLAGS:
623 raise error.RevlogError(
619 raise error.RevlogError(
624 _(b'unknown flags (%#04x) in version %d revlog %s')
620 _(b'unknown flags (%#04x) in version %d revlog %s')
625 % (flags >> 16, fmt, self.indexfile)
621 % (flags >> 16, fmt, self.indexfile)
626 )
622 )
627
623
628 # There is a bug in the transaction handling when going from an
624 # There is a bug in the transaction handling when going from an
629 # inline revlog to a separate index and data file. Turn it off until
625 # inline revlog to a separate index and data file. Turn it off until
630 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
626 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
631 # See issue6485
627 # See issue6485
632 self._inline = False
628 self._inline = False
633 # generaldelta implied by version 2 revlogs.
629 # generaldelta implied by version 2 revlogs.
634 self._generaldelta = True
630 self._generaldelta = True
635
631
636 else:
632 else:
637 raise error.RevlogError(
633 raise error.RevlogError(
638 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
634 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
639 )
635 )
640
636
641 self.nodeconstants = sha1nodeconstants
637 self.nodeconstants = sha1nodeconstants
642 self.nullid = self.nodeconstants.nullid
638 self.nullid = self.nodeconstants.nullid
643
639
644 # sparse-revlog can't be on without general-delta (issue6056)
640 # sparse-revlog can't be on without general-delta (issue6056)
645 if not self._generaldelta:
641 if not self._generaldelta:
646 self._sparserevlog = False
642 self._sparserevlog = False
647
643
648 self._storedeltachains = True
644 self._storedeltachains = True
649
645
650 devel_nodemap = (
646 devel_nodemap = (
651 self.nodemap_file
647 self.nodemap_file
652 and opts.get(b'devel-force-nodemap', False)
648 and opts.get(b'devel-force-nodemap', False)
653 and NodemapRevlogIO is not None
649 and NodemapRevlogIO is not None
654 )
650 )
655
651
656 use_rust_index = False
652 use_rust_index = False
657 if rustrevlog is not None:
653 if rustrevlog is not None:
658 if self.nodemap_file is not None:
654 if self.nodemap_file is not None:
659 use_rust_index = True
655 use_rust_index = True
660 else:
656 else:
661 use_rust_index = self.opener.options.get(b'rust.index')
657 use_rust_index = self.opener.options.get(b'rust.index')
662
658
663 self._io = revlogio()
659 self._io = revlogio()
664 if self.version == REVLOGV0:
660 if self.version == REVLOGV0:
665 self._io = revlogoldio()
661 self._io = revlogoldio()
666 elif fmt == REVLOGV2:
662 elif fmt == REVLOGV2:
667 self._io = revlogv2io()
663 self._io = revlogv2io()
668 elif devel_nodemap:
664 elif devel_nodemap:
669 self._io = NodemapRevlogIO()
665 self._io = NodemapRevlogIO()
670 elif use_rust_index:
666 elif use_rust_index:
671 self._io = rustrevlogio()
667 self._io = rustrevlogio()
672 try:
668 try:
673 d = self._io.parseindex(indexdata, self._inline)
669 d = self._io.parseindex(indexdata, self._inline)
674 index, _chunkcache = d
670 index, _chunkcache = d
675 use_nodemap = (
671 use_nodemap = (
676 not self._inline
672 not self._inline
677 and self.nodemap_file is not None
673 and self.nodemap_file is not None
678 and util.safehasattr(index, 'update_nodemap_data')
674 and util.safehasattr(index, 'update_nodemap_data')
679 )
675 )
680 if use_nodemap:
676 if use_nodemap:
681 nodemap_data = nodemaputil.persisted_data(self)
677 nodemap_data = nodemaputil.persisted_data(self)
682 if nodemap_data is not None:
678 if nodemap_data is not None:
683 docket = nodemap_data[0]
679 docket = nodemap_data[0]
684 if (
680 if (
685 len(d[0]) > docket.tip_rev
681 len(d[0]) > docket.tip_rev
686 and d[0][docket.tip_rev][7] == docket.tip_node
682 and d[0][docket.tip_rev][7] == docket.tip_node
687 ):
683 ):
688 # no changelog tampering
684 # no changelog tampering
689 self._nodemap_docket = docket
685 self._nodemap_docket = docket
690 index.update_nodemap_data(*nodemap_data)
686 index.update_nodemap_data(*nodemap_data)
691 except (ValueError, IndexError):
687 except (ValueError, IndexError):
692 raise error.RevlogError(
688 raise error.RevlogError(
693 _(b"index %s is corrupted") % self.indexfile
689 _(b"index %s is corrupted") % self.indexfile
694 )
690 )
695 self.index, self._chunkcache = d
691 self.index, self._chunkcache = d
696 if not self._chunkcache:
692 if not self._chunkcache:
697 self._chunkclear()
693 self._chunkclear()
698 # revnum -> (chain-length, sum-delta-length)
694 # revnum -> (chain-length, sum-delta-length)
699 self._chaininfocache = util.lrucachedict(500)
695 self._chaininfocache = util.lrucachedict(500)
700 # revlog header -> revlog compressor
696 # revlog header -> revlog compressor
701 self._decompressors = {}
697 self._decompressors = {}
702
698
703 @util.propertycache
699 @util.propertycache
704 def _compressor(self):
700 def _compressor(self):
705 engine = util.compengines[self._compengine]
701 engine = util.compengines[self._compengine]
706 return engine.revlogcompressor(self._compengineopts)
702 return engine.revlogcompressor(self._compengineopts)
707
703
708 def _indexfp(self, mode=b'r'):
704 def _indexfp(self, mode=b'r'):
709 """file object for the revlog's index file"""
705 """file object for the revlog's index file"""
710 args = {'mode': mode}
706 args = {'mode': mode}
711 if mode != b'r':
707 if mode != b'r':
712 args['checkambig'] = self._checkambig
708 args['checkambig'] = self._checkambig
713 if mode == b'w':
709 if mode == b'w':
714 args['atomictemp'] = True
710 args['atomictemp'] = True
715 return self.opener(self.indexfile, **args)
711 return self.opener(self.indexfile, **args)
716
712
717 def _datafp(self, mode=b'r'):
713 def _datafp(self, mode=b'r'):
718 """file object for the revlog's data file"""
714 """file object for the revlog's data file"""
719 return self.opener(self.datafile, mode=mode)
715 return self.opener(self.datafile, mode=mode)
720
716
721 @contextlib.contextmanager
717 @contextlib.contextmanager
722 def _datareadfp(self, existingfp=None):
718 def _datareadfp(self, existingfp=None):
723 """file object suitable to read data"""
719 """file object suitable to read data"""
724 # Use explicit file handle, if given.
720 # Use explicit file handle, if given.
725 if existingfp is not None:
721 if existingfp is not None:
726 yield existingfp
722 yield existingfp
727
723
728 # Use a file handle being actively used for writes, if available.
724 # Use a file handle being actively used for writes, if available.
729 # There is some danger to doing this because reads will seek the
725 # There is some danger to doing this because reads will seek the
730 # file. However, _writeentry() performs a SEEK_END before all writes,
726 # file. However, _writeentry() performs a SEEK_END before all writes,
731 # so we should be safe.
727 # so we should be safe.
732 elif self._writinghandles:
728 elif self._writinghandles:
733 if self._inline:
729 if self._inline:
734 yield self._writinghandles[0]
730 yield self._writinghandles[0]
735 else:
731 else:
736 yield self._writinghandles[1]
732 yield self._writinghandles[1]
737
733
738 # Otherwise open a new file handle.
734 # Otherwise open a new file handle.
739 else:
735 else:
740 if self._inline:
736 if self._inline:
741 func = self._indexfp
737 func = self._indexfp
742 else:
738 else:
743 func = self._datafp
739 func = self._datafp
744 with func() as fp:
740 with func() as fp:
745 yield fp
741 yield fp
746
742
747 def tiprev(self):
743 def tiprev(self):
748 return len(self.index) - 1
744 return len(self.index) - 1
749
745
750 def tip(self):
746 def tip(self):
751 return self.node(self.tiprev())
747 return self.node(self.tiprev())
752
748
753 def __contains__(self, rev):
749 def __contains__(self, rev):
754 return 0 <= rev < len(self)
750 return 0 <= rev < len(self)
755
751
756 def __len__(self):
752 def __len__(self):
757 return len(self.index)
753 return len(self.index)
758
754
759 def __iter__(self):
755 def __iter__(self):
760 return iter(pycompat.xrange(len(self)))
756 return iter(pycompat.xrange(len(self)))
761
757
762 def revs(self, start=0, stop=None):
758 def revs(self, start=0, stop=None):
763 """iterate over all rev in this revlog (from start to stop)"""
759 """iterate over all rev in this revlog (from start to stop)"""
764 return storageutil.iterrevs(len(self), start=start, stop=stop)
760 return storageutil.iterrevs(len(self), start=start, stop=stop)
765
761
766 @property
762 @property
767 def nodemap(self):
763 def nodemap(self):
768 msg = (
764 msg = (
769 b"revlog.nodemap is deprecated, "
765 b"revlog.nodemap is deprecated, "
770 b"use revlog.index.[has_node|rev|get_rev]"
766 b"use revlog.index.[has_node|rev|get_rev]"
771 )
767 )
772 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
768 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
773 return self.index.nodemap
769 return self.index.nodemap
774
770
775 @property
771 @property
776 def _nodecache(self):
772 def _nodecache(self):
777 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
773 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
778 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
774 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
779 return self.index.nodemap
775 return self.index.nodemap
780
776
781 def hasnode(self, node):
777 def hasnode(self, node):
782 try:
778 try:
783 self.rev(node)
779 self.rev(node)
784 return True
780 return True
785 except KeyError:
781 except KeyError:
786 return False
782 return False
787
783
788 def candelta(self, baserev, rev):
784 def candelta(self, baserev, rev):
789 """whether two revisions (baserev, rev) can be delta-ed or not"""
785 """whether two revisions (baserev, rev) can be delta-ed or not"""
790 # Disable delta if either rev requires a content-changing flag
786 # Disable delta if either rev requires a content-changing flag
791 # processor (ex. LFS). This is because such flag processor can alter
787 # processor (ex. LFS). This is because such flag processor can alter
792 # the rawtext content that the delta will be based on, and two clients
788 # the rawtext content that the delta will be based on, and two clients
793 # could have a same revlog node with different flags (i.e. different
789 # could have a same revlog node with different flags (i.e. different
794 # rawtext contents) and the delta could be incompatible.
790 # rawtext contents) and the delta could be incompatible.
795 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
791 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
796 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
792 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
797 ):
793 ):
798 return False
794 return False
799 return True
795 return True
800
796
801 def update_caches(self, transaction):
797 def update_caches(self, transaction):
802 if self.nodemap_file is not None:
798 if self.nodemap_file is not None:
803 if transaction is None:
799 if transaction is None:
804 nodemaputil.update_persistent_nodemap(self)
800 nodemaputil.update_persistent_nodemap(self)
805 else:
801 else:
806 nodemaputil.setup_persistent_nodemap(transaction, self)
802 nodemaputil.setup_persistent_nodemap(transaction, self)
807
803
808 def clearcaches(self):
804 def clearcaches(self):
809 self._revisioncache = None
805 self._revisioncache = None
810 self._chainbasecache.clear()
806 self._chainbasecache.clear()
811 self._chunkcache = (0, b'')
807 self._chunkcache = (0, b'')
812 self._pcache = {}
808 self._pcache = {}
813 self._nodemap_docket = None
809 self._nodemap_docket = None
814 self.index.clearcaches()
810 self.index.clearcaches()
815 # The python code is the one responsible for validating the docket, we
811 # The python code is the one responsible for validating the docket, we
816 # end up having to refresh it here.
812 # end up having to refresh it here.
817 use_nodemap = (
813 use_nodemap = (
818 not self._inline
814 not self._inline
819 and self.nodemap_file is not None
815 and self.nodemap_file is not None
820 and util.safehasattr(self.index, 'update_nodemap_data')
816 and util.safehasattr(self.index, 'update_nodemap_data')
821 )
817 )
822 if use_nodemap:
818 if use_nodemap:
823 nodemap_data = nodemaputil.persisted_data(self)
819 nodemap_data = nodemaputil.persisted_data(self)
824 if nodemap_data is not None:
820 if nodemap_data is not None:
825 self._nodemap_docket = nodemap_data[0]
821 self._nodemap_docket = nodemap_data[0]
826 self.index.update_nodemap_data(*nodemap_data)
822 self.index.update_nodemap_data(*nodemap_data)
827
823
828 def rev(self, node):
824 def rev(self, node):
829 try:
825 try:
830 return self.index.rev(node)
826 return self.index.rev(node)
831 except TypeError:
827 except TypeError:
832 raise
828 raise
833 except error.RevlogError:
829 except error.RevlogError:
834 # parsers.c radix tree lookup failed
830 # parsers.c radix tree lookup failed
835 if node == wdirid or node in wdirfilenodeids:
831 if node == wdirid or node in wdirfilenodeids:
836 raise error.WdirUnsupported
832 raise error.WdirUnsupported
837 raise error.LookupError(node, self.indexfile, _(b'no node'))
833 raise error.LookupError(node, self.indexfile, _(b'no node'))
838
834
839 # Accessors for index entries.
835 # Accessors for index entries.
840
836
841 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
837 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
842 # are flags.
838 # are flags.
843 def start(self, rev):
839 def start(self, rev):
844 return int(self.index[rev][0] >> 16)
840 return int(self.index[rev][0] >> 16)
845
841
846 def flags(self, rev):
842 def flags(self, rev):
847 return self.index[rev][0] & 0xFFFF
843 return self.index[rev][0] & 0xFFFF
848
844
849 def length(self, rev):
845 def length(self, rev):
850 return self.index[rev][1]
846 return self.index[rev][1]
851
847
852 def sidedata_length(self, rev):
848 def sidedata_length(self, rev):
853 if self.version & 0xFFFF != REVLOGV2:
849 if self.version & 0xFFFF != REVLOGV2:
854 return 0
850 return 0
855 return self.index[rev][9]
851 return self.index[rev][9]
856
852
857 def rawsize(self, rev):
853 def rawsize(self, rev):
858 """return the length of the uncompressed text for a given revision"""
854 """return the length of the uncompressed text for a given revision"""
859 l = self.index[rev][2]
855 l = self.index[rev][2]
860 if l >= 0:
856 if l >= 0:
861 return l
857 return l
862
858
863 t = self.rawdata(rev)
859 t = self.rawdata(rev)
864 return len(t)
860 return len(t)
865
861
866 def size(self, rev):
862 def size(self, rev):
867 """length of non-raw text (processed by a "read" flag processor)"""
863 """length of non-raw text (processed by a "read" flag processor)"""
868 # fast path: if no "read" flag processor could change the content,
864 # fast path: if no "read" flag processor could change the content,
869 # size is rawsize. note: ELLIPSIS is known to not change the content.
865 # size is rawsize. note: ELLIPSIS is known to not change the content.
870 flags = self.flags(rev)
866 flags = self.flags(rev)
871 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
867 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
872 return self.rawsize(rev)
868 return self.rawsize(rev)
873
869
874 return len(self.revision(rev, raw=False))
870 return len(self.revision(rev, raw=False))
875
871
876 def chainbase(self, rev):
872 def chainbase(self, rev):
877 base = self._chainbasecache.get(rev)
873 base = self._chainbasecache.get(rev)
878 if base is not None:
874 if base is not None:
879 return base
875 return base
880
876
881 index = self.index
877 index = self.index
882 iterrev = rev
878 iterrev = rev
883 base = index[iterrev][3]
879 base = index[iterrev][3]
884 while base != iterrev:
880 while base != iterrev:
885 iterrev = base
881 iterrev = base
886 base = index[iterrev][3]
882 base = index[iterrev][3]
887
883
888 self._chainbasecache[rev] = base
884 self._chainbasecache[rev] = base
889 return base
885 return base
890
886
891 def linkrev(self, rev):
887 def linkrev(self, rev):
892 return self.index[rev][4]
888 return self.index[rev][4]
893
889
894 def parentrevs(self, rev):
890 def parentrevs(self, rev):
895 try:
891 try:
896 entry = self.index[rev]
892 entry = self.index[rev]
897 except IndexError:
893 except IndexError:
898 if rev == wdirrev:
894 if rev == wdirrev:
899 raise error.WdirUnsupported
895 raise error.WdirUnsupported
900 raise
896 raise
901 if entry[5] == nullrev:
897 if entry[5] == nullrev:
902 return entry[6], entry[5]
898 return entry[6], entry[5]
903 else:
899 else:
904 return entry[5], entry[6]
900 return entry[5], entry[6]
905
901
906 # fast parentrevs(rev) where rev isn't filtered
902 # fast parentrevs(rev) where rev isn't filtered
907 _uncheckedparentrevs = parentrevs
903 _uncheckedparentrevs = parentrevs
908
904
909 def node(self, rev):
905 def node(self, rev):
910 try:
906 try:
911 return self.index[rev][7]
907 return self.index[rev][7]
912 except IndexError:
908 except IndexError:
913 if rev == wdirrev:
909 if rev == wdirrev:
914 raise error.WdirUnsupported
910 raise error.WdirUnsupported
915 raise
911 raise
916
912
917 # Derived from index values.
913 # Derived from index values.
918
914
919 def end(self, rev):
915 def end(self, rev):
920 return self.start(rev) + self.length(rev)
916 return self.start(rev) + self.length(rev)
921
917
922 def parents(self, node):
918 def parents(self, node):
923 i = self.index
919 i = self.index
924 d = i[self.rev(node)]
920 d = i[self.rev(node)]
925 # inline node() to avoid function call overhead
921 # inline node() to avoid function call overhead
926 if d[5] == nullid:
922 if d[5] == nullid:
927 return i[d[6]][7], i[d[5]][7]
923 return i[d[6]][7], i[d[5]][7]
928 else:
924 else:
929 return i[d[5]][7], i[d[6]][7]
925 return i[d[5]][7], i[d[6]][7]
930
926
931 def chainlen(self, rev):
927 def chainlen(self, rev):
932 return self._chaininfo(rev)[0]
928 return self._chaininfo(rev)[0]
933
929
934 def _chaininfo(self, rev):
930 def _chaininfo(self, rev):
935 chaininfocache = self._chaininfocache
931 chaininfocache = self._chaininfocache
936 if rev in chaininfocache:
932 if rev in chaininfocache:
937 return chaininfocache[rev]
933 return chaininfocache[rev]
938 index = self.index
934 index = self.index
939 generaldelta = self._generaldelta
935 generaldelta = self._generaldelta
940 iterrev = rev
936 iterrev = rev
941 e = index[iterrev]
937 e = index[iterrev]
942 clen = 0
938 clen = 0
943 compresseddeltalen = 0
939 compresseddeltalen = 0
944 while iterrev != e[3]:
940 while iterrev != e[3]:
945 clen += 1
941 clen += 1
946 compresseddeltalen += e[1]
942 compresseddeltalen += e[1]
947 if generaldelta:
943 if generaldelta:
948 iterrev = e[3]
944 iterrev = e[3]
949 else:
945 else:
950 iterrev -= 1
946 iterrev -= 1
951 if iterrev in chaininfocache:
947 if iterrev in chaininfocache:
952 t = chaininfocache[iterrev]
948 t = chaininfocache[iterrev]
953 clen += t[0]
949 clen += t[0]
954 compresseddeltalen += t[1]
950 compresseddeltalen += t[1]
955 break
951 break
956 e = index[iterrev]
952 e = index[iterrev]
957 else:
953 else:
958 # Add text length of base since decompressing that also takes
954 # Add text length of base since decompressing that also takes
959 # work. For cache hits the length is already included.
955 # work. For cache hits the length is already included.
960 compresseddeltalen += e[1]
956 compresseddeltalen += e[1]
961 r = (clen, compresseddeltalen)
957 r = (clen, compresseddeltalen)
962 chaininfocache[rev] = r
958 chaininfocache[rev] = r
963 return r
959 return r
964
960
965 def _deltachain(self, rev, stoprev=None):
961 def _deltachain(self, rev, stoprev=None):
966 """Obtain the delta chain for a revision.
962 """Obtain the delta chain for a revision.
967
963
968 ``stoprev`` specifies a revision to stop at. If not specified, we
964 ``stoprev`` specifies a revision to stop at. If not specified, we
969 stop at the base of the chain.
965 stop at the base of the chain.
970
966
971 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
967 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
972 revs in ascending order and ``stopped`` is a bool indicating whether
968 revs in ascending order and ``stopped`` is a bool indicating whether
973 ``stoprev`` was hit.
969 ``stoprev`` was hit.
974 """
970 """
975 # Try C implementation.
971 # Try C implementation.
976 try:
972 try:
977 return self.index.deltachain(rev, stoprev, self._generaldelta)
973 return self.index.deltachain(rev, stoprev, self._generaldelta)
978 except AttributeError:
974 except AttributeError:
979 pass
975 pass
980
976
981 chain = []
977 chain = []
982
978
983 # Alias to prevent attribute lookup in tight loop.
979 # Alias to prevent attribute lookup in tight loop.
984 index = self.index
980 index = self.index
985 generaldelta = self._generaldelta
981 generaldelta = self._generaldelta
986
982
987 iterrev = rev
983 iterrev = rev
988 e = index[iterrev]
984 e = index[iterrev]
989 while iterrev != e[3] and iterrev != stoprev:
985 while iterrev != e[3] and iterrev != stoprev:
990 chain.append(iterrev)
986 chain.append(iterrev)
991 if generaldelta:
987 if generaldelta:
992 iterrev = e[3]
988 iterrev = e[3]
993 else:
989 else:
994 iterrev -= 1
990 iterrev -= 1
995 e = index[iterrev]
991 e = index[iterrev]
996
992
997 if iterrev == stoprev:
993 if iterrev == stoprev:
998 stopped = True
994 stopped = True
999 else:
995 else:
1000 chain.append(iterrev)
996 chain.append(iterrev)
1001 stopped = False
997 stopped = False
1002
998
1003 chain.reverse()
999 chain.reverse()
1004 return chain, stopped
1000 return chain, stopped
1005
1001
1006 def ancestors(self, revs, stoprev=0, inclusive=False):
1002 def ancestors(self, revs, stoprev=0, inclusive=False):
1007 """Generate the ancestors of 'revs' in reverse revision order.
1003 """Generate the ancestors of 'revs' in reverse revision order.
1008 Does not generate revs lower than stoprev.
1004 Does not generate revs lower than stoprev.
1009
1005
1010 See the documentation for ancestor.lazyancestors for more details."""
1006 See the documentation for ancestor.lazyancestors for more details."""
1011
1007
1012 # first, make sure start revisions aren't filtered
1008 # first, make sure start revisions aren't filtered
1013 revs = list(revs)
1009 revs = list(revs)
1014 checkrev = self.node
1010 checkrev = self.node
1015 for r in revs:
1011 for r in revs:
1016 checkrev(r)
1012 checkrev(r)
1017 # and we're sure ancestors aren't filtered as well
1013 # and we're sure ancestors aren't filtered as well
1018
1014
1019 if rustancestor is not None:
1015 if rustancestor is not None:
1020 lazyancestors = rustancestor.LazyAncestors
1016 lazyancestors = rustancestor.LazyAncestors
1021 arg = self.index
1017 arg = self.index
1022 else:
1018 else:
1023 lazyancestors = ancestor.lazyancestors
1019 lazyancestors = ancestor.lazyancestors
1024 arg = self._uncheckedparentrevs
1020 arg = self._uncheckedparentrevs
1025 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1021 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1026
1022
1027 def descendants(self, revs):
1023 def descendants(self, revs):
1028 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1024 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1029
1025
1030 def findcommonmissing(self, common=None, heads=None):
1026 def findcommonmissing(self, common=None, heads=None):
1031 """Return a tuple of the ancestors of common and the ancestors of heads
1027 """Return a tuple of the ancestors of common and the ancestors of heads
1032 that are not ancestors of common. In revset terminology, we return the
1028 that are not ancestors of common. In revset terminology, we return the
1033 tuple:
1029 tuple:
1034
1030
1035 ::common, (::heads) - (::common)
1031 ::common, (::heads) - (::common)
1036
1032
1037 The list is sorted by revision number, meaning it is
1033 The list is sorted by revision number, meaning it is
1038 topologically sorted.
1034 topologically sorted.
1039
1035
1040 'heads' and 'common' are both lists of node IDs. If heads is
1036 'heads' and 'common' are both lists of node IDs. If heads is
1041 not supplied, uses all of the revlog's heads. If common is not
1037 not supplied, uses all of the revlog's heads. If common is not
1042 supplied, uses nullid."""
1038 supplied, uses nullid."""
1043 if common is None:
1039 if common is None:
1044 common = [nullid]
1040 common = [nullid]
1045 if heads is None:
1041 if heads is None:
1046 heads = self.heads()
1042 heads = self.heads()
1047
1043
1048 common = [self.rev(n) for n in common]
1044 common = [self.rev(n) for n in common]
1049 heads = [self.rev(n) for n in heads]
1045 heads = [self.rev(n) for n in heads]
1050
1046
1051 # we want the ancestors, but inclusive
1047 # we want the ancestors, but inclusive
1052 class lazyset(object):
1048 class lazyset(object):
1053 def __init__(self, lazyvalues):
1049 def __init__(self, lazyvalues):
1054 self.addedvalues = set()
1050 self.addedvalues = set()
1055 self.lazyvalues = lazyvalues
1051 self.lazyvalues = lazyvalues
1056
1052
1057 def __contains__(self, value):
1053 def __contains__(self, value):
1058 return value in self.addedvalues or value in self.lazyvalues
1054 return value in self.addedvalues or value in self.lazyvalues
1059
1055
1060 def __iter__(self):
1056 def __iter__(self):
1061 added = self.addedvalues
1057 added = self.addedvalues
1062 for r in added:
1058 for r in added:
1063 yield r
1059 yield r
1064 for r in self.lazyvalues:
1060 for r in self.lazyvalues:
1065 if not r in added:
1061 if not r in added:
1066 yield r
1062 yield r
1067
1063
1068 def add(self, value):
1064 def add(self, value):
1069 self.addedvalues.add(value)
1065 self.addedvalues.add(value)
1070
1066
1071 def update(self, values):
1067 def update(self, values):
1072 self.addedvalues.update(values)
1068 self.addedvalues.update(values)
1073
1069
1074 has = lazyset(self.ancestors(common))
1070 has = lazyset(self.ancestors(common))
1075 has.add(nullrev)
1071 has.add(nullrev)
1076 has.update(common)
1072 has.update(common)
1077
1073
1078 # take all ancestors from heads that aren't in has
1074 # take all ancestors from heads that aren't in has
1079 missing = set()
1075 missing = set()
1080 visit = collections.deque(r for r in heads if r not in has)
1076 visit = collections.deque(r for r in heads if r not in has)
1081 while visit:
1077 while visit:
1082 r = visit.popleft()
1078 r = visit.popleft()
1083 if r in missing:
1079 if r in missing:
1084 continue
1080 continue
1085 else:
1081 else:
1086 missing.add(r)
1082 missing.add(r)
1087 for p in self.parentrevs(r):
1083 for p in self.parentrevs(r):
1088 if p not in has:
1084 if p not in has:
1089 visit.append(p)
1085 visit.append(p)
1090 missing = list(missing)
1086 missing = list(missing)
1091 missing.sort()
1087 missing.sort()
1092 return has, [self.node(miss) for miss in missing]
1088 return has, [self.node(miss) for miss in missing]
1093
1089
1094 def incrementalmissingrevs(self, common=None):
1090 def incrementalmissingrevs(self, common=None):
1095 """Return an object that can be used to incrementally compute the
1091 """Return an object that can be used to incrementally compute the
1096 revision numbers of the ancestors of arbitrary sets that are not
1092 revision numbers of the ancestors of arbitrary sets that are not
1097 ancestors of common. This is an ancestor.incrementalmissingancestors
1093 ancestors of common. This is an ancestor.incrementalmissingancestors
1098 object.
1094 object.
1099
1095
1100 'common' is a list of revision numbers. If common is not supplied, uses
1096 'common' is a list of revision numbers. If common is not supplied, uses
1101 nullrev.
1097 nullrev.
1102 """
1098 """
1103 if common is None:
1099 if common is None:
1104 common = [nullrev]
1100 common = [nullrev]
1105
1101
1106 if rustancestor is not None:
1102 if rustancestor is not None:
1107 return rustancestor.MissingAncestors(self.index, common)
1103 return rustancestor.MissingAncestors(self.index, common)
1108 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1104 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1109
1105
1110 def findmissingrevs(self, common=None, heads=None):
1106 def findmissingrevs(self, common=None, heads=None):
1111 """Return the revision numbers of the ancestors of heads that
1107 """Return the revision numbers of the ancestors of heads that
1112 are not ancestors of common.
1108 are not ancestors of common.
1113
1109
1114 More specifically, return a list of revision numbers corresponding to
1110 More specifically, return a list of revision numbers corresponding to
1115 nodes N such that every N satisfies the following constraints:
1111 nodes N such that every N satisfies the following constraints:
1116
1112
1117 1. N is an ancestor of some node in 'heads'
1113 1. N is an ancestor of some node in 'heads'
1118 2. N is not an ancestor of any node in 'common'
1114 2. N is not an ancestor of any node in 'common'
1119
1115
1120 The list is sorted by revision number, meaning it is
1116 The list is sorted by revision number, meaning it is
1121 topologically sorted.
1117 topologically sorted.
1122
1118
1123 'heads' and 'common' are both lists of revision numbers. If heads is
1119 'heads' and 'common' are both lists of revision numbers. If heads is
1124 not supplied, uses all of the revlog's heads. If common is not
1120 not supplied, uses all of the revlog's heads. If common is not
1125 supplied, uses nullid."""
1121 supplied, uses nullid."""
1126 if common is None:
1122 if common is None:
1127 common = [nullrev]
1123 common = [nullrev]
1128 if heads is None:
1124 if heads is None:
1129 heads = self.headrevs()
1125 heads = self.headrevs()
1130
1126
1131 inc = self.incrementalmissingrevs(common=common)
1127 inc = self.incrementalmissingrevs(common=common)
1132 return inc.missingancestors(heads)
1128 return inc.missingancestors(heads)
1133
1129
1134 def findmissing(self, common=None, heads=None):
1130 def findmissing(self, common=None, heads=None):
1135 """Return the ancestors of heads that are not ancestors of common.
1131 """Return the ancestors of heads that are not ancestors of common.
1136
1132
1137 More specifically, return a list of nodes N such that every N
1133 More specifically, return a list of nodes N such that every N
1138 satisfies the following constraints:
1134 satisfies the following constraints:
1139
1135
1140 1. N is an ancestor of some node in 'heads'
1136 1. N is an ancestor of some node in 'heads'
1141 2. N is not an ancestor of any node in 'common'
1137 2. N is not an ancestor of any node in 'common'
1142
1138
1143 The list is sorted by revision number, meaning it is
1139 The list is sorted by revision number, meaning it is
1144 topologically sorted.
1140 topologically sorted.
1145
1141
1146 'heads' and 'common' are both lists of node IDs. If heads is
1142 'heads' and 'common' are both lists of node IDs. If heads is
1147 not supplied, uses all of the revlog's heads. If common is not
1143 not supplied, uses all of the revlog's heads. If common is not
1148 supplied, uses nullid."""
1144 supplied, uses nullid."""
1149 if common is None:
1145 if common is None:
1150 common = [nullid]
1146 common = [nullid]
1151 if heads is None:
1147 if heads is None:
1152 heads = self.heads()
1148 heads = self.heads()
1153
1149
1154 common = [self.rev(n) for n in common]
1150 common = [self.rev(n) for n in common]
1155 heads = [self.rev(n) for n in heads]
1151 heads = [self.rev(n) for n in heads]
1156
1152
1157 inc = self.incrementalmissingrevs(common=common)
1153 inc = self.incrementalmissingrevs(common=common)
1158 return [self.node(r) for r in inc.missingancestors(heads)]
1154 return [self.node(r) for r in inc.missingancestors(heads)]
1159
1155
1160 def nodesbetween(self, roots=None, heads=None):
1156 def nodesbetween(self, roots=None, heads=None):
1161 """Return a topological path from 'roots' to 'heads'.
1157 """Return a topological path from 'roots' to 'heads'.
1162
1158
1163 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1159 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1164 topologically sorted list of all nodes N that satisfy both of
1160 topologically sorted list of all nodes N that satisfy both of
1165 these constraints:
1161 these constraints:
1166
1162
1167 1. N is a descendant of some node in 'roots'
1163 1. N is a descendant of some node in 'roots'
1168 2. N is an ancestor of some node in 'heads'
1164 2. N is an ancestor of some node in 'heads'
1169
1165
1170 Every node is considered to be both a descendant and an ancestor
1166 Every node is considered to be both a descendant and an ancestor
1171 of itself, so every reachable node in 'roots' and 'heads' will be
1167 of itself, so every reachable node in 'roots' and 'heads' will be
1172 included in 'nodes'.
1168 included in 'nodes'.
1173
1169
1174 'outroots' is the list of reachable nodes in 'roots', i.e., the
1170 'outroots' is the list of reachable nodes in 'roots', i.e., the
1175 subset of 'roots' that is returned in 'nodes'. Likewise,
1171 subset of 'roots' that is returned in 'nodes'. Likewise,
1176 'outheads' is the subset of 'heads' that is also in 'nodes'.
1172 'outheads' is the subset of 'heads' that is also in 'nodes'.
1177
1173
1178 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1174 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1179 unspecified, uses nullid as the only root. If 'heads' is
1175 unspecified, uses nullid as the only root. If 'heads' is
1180 unspecified, uses list of all of the revlog's heads."""
1176 unspecified, uses list of all of the revlog's heads."""
1181 nonodes = ([], [], [])
1177 nonodes = ([], [], [])
1182 if roots is not None:
1178 if roots is not None:
1183 roots = list(roots)
1179 roots = list(roots)
1184 if not roots:
1180 if not roots:
1185 return nonodes
1181 return nonodes
1186 lowestrev = min([self.rev(n) for n in roots])
1182 lowestrev = min([self.rev(n) for n in roots])
1187 else:
1183 else:
1188 roots = [nullid] # Everybody's a descendant of nullid
1184 roots = [nullid] # Everybody's a descendant of nullid
1189 lowestrev = nullrev
1185 lowestrev = nullrev
1190 if (lowestrev == nullrev) and (heads is None):
1186 if (lowestrev == nullrev) and (heads is None):
1191 # We want _all_ the nodes!
1187 # We want _all_ the nodes!
1192 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1188 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1193 if heads is None:
1189 if heads is None:
1194 # All nodes are ancestors, so the latest ancestor is the last
1190 # All nodes are ancestors, so the latest ancestor is the last
1195 # node.
1191 # node.
1196 highestrev = len(self) - 1
1192 highestrev = len(self) - 1
1197 # Set ancestors to None to signal that every node is an ancestor.
1193 # Set ancestors to None to signal that every node is an ancestor.
1198 ancestors = None
1194 ancestors = None
1199 # Set heads to an empty dictionary for later discovery of heads
1195 # Set heads to an empty dictionary for later discovery of heads
1200 heads = {}
1196 heads = {}
1201 else:
1197 else:
1202 heads = list(heads)
1198 heads = list(heads)
1203 if not heads:
1199 if not heads:
1204 return nonodes
1200 return nonodes
1205 ancestors = set()
1201 ancestors = set()
1206 # Turn heads into a dictionary so we can remove 'fake' heads.
1202 # Turn heads into a dictionary so we can remove 'fake' heads.
1207 # Also, later we will be using it to filter out the heads we can't
1203 # Also, later we will be using it to filter out the heads we can't
1208 # find from roots.
1204 # find from roots.
1209 heads = dict.fromkeys(heads, False)
1205 heads = dict.fromkeys(heads, False)
1210 # Start at the top and keep marking parents until we're done.
1206 # Start at the top and keep marking parents until we're done.
1211 nodestotag = set(heads)
1207 nodestotag = set(heads)
1212 # Remember where the top was so we can use it as a limit later.
1208 # Remember where the top was so we can use it as a limit later.
1213 highestrev = max([self.rev(n) for n in nodestotag])
1209 highestrev = max([self.rev(n) for n in nodestotag])
1214 while nodestotag:
1210 while nodestotag:
1215 # grab a node to tag
1211 # grab a node to tag
1216 n = nodestotag.pop()
1212 n = nodestotag.pop()
1217 # Never tag nullid
1213 # Never tag nullid
1218 if n == nullid:
1214 if n == nullid:
1219 continue
1215 continue
1220 # A node's revision number represents its place in a
1216 # A node's revision number represents its place in a
1221 # topologically sorted list of nodes.
1217 # topologically sorted list of nodes.
1222 r = self.rev(n)
1218 r = self.rev(n)
1223 if r >= lowestrev:
1219 if r >= lowestrev:
1224 if n not in ancestors:
1220 if n not in ancestors:
1225 # If we are possibly a descendant of one of the roots
1221 # If we are possibly a descendant of one of the roots
1226 # and we haven't already been marked as an ancestor
1222 # and we haven't already been marked as an ancestor
1227 ancestors.add(n) # Mark as ancestor
1223 ancestors.add(n) # Mark as ancestor
1228 # Add non-nullid parents to list of nodes to tag.
1224 # Add non-nullid parents to list of nodes to tag.
1229 nodestotag.update(
1225 nodestotag.update(
1230 [p for p in self.parents(n) if p != nullid]
1226 [p for p in self.parents(n) if p != nullid]
1231 )
1227 )
1232 elif n in heads: # We've seen it before, is it a fake head?
1228 elif n in heads: # We've seen it before, is it a fake head?
1233 # So it is, real heads should not be the ancestors of
1229 # So it is, real heads should not be the ancestors of
1234 # any other heads.
1230 # any other heads.
1235 heads.pop(n)
1231 heads.pop(n)
1236 if not ancestors:
1232 if not ancestors:
1237 return nonodes
1233 return nonodes
1238 # Now that we have our set of ancestors, we want to remove any
1234 # Now that we have our set of ancestors, we want to remove any
1239 # roots that are not ancestors.
1235 # roots that are not ancestors.
1240
1236
1241 # If one of the roots was nullid, everything is included anyway.
1237 # If one of the roots was nullid, everything is included anyway.
1242 if lowestrev > nullrev:
1238 if lowestrev > nullrev:
1243 # But, since we weren't, let's recompute the lowest rev to not
1239 # But, since we weren't, let's recompute the lowest rev to not
1244 # include roots that aren't ancestors.
1240 # include roots that aren't ancestors.
1245
1241
1246 # Filter out roots that aren't ancestors of heads
1242 # Filter out roots that aren't ancestors of heads
1247 roots = [root for root in roots if root in ancestors]
1243 roots = [root for root in roots if root in ancestors]
1248 # Recompute the lowest revision
1244 # Recompute the lowest revision
1249 if roots:
1245 if roots:
1250 lowestrev = min([self.rev(root) for root in roots])
1246 lowestrev = min([self.rev(root) for root in roots])
1251 else:
1247 else:
1252 # No more roots? Return empty list
1248 # No more roots? Return empty list
1253 return nonodes
1249 return nonodes
1254 else:
1250 else:
1255 # We are descending from nullid, and don't need to care about
1251 # We are descending from nullid, and don't need to care about
1256 # any other roots.
1252 # any other roots.
1257 lowestrev = nullrev
1253 lowestrev = nullrev
1258 roots = [nullid]
1254 roots = [nullid]
1259 # Transform our roots list into a set.
1255 # Transform our roots list into a set.
1260 descendants = set(roots)
1256 descendants = set(roots)
1261 # Also, keep the original roots so we can filter out roots that aren't
1257 # Also, keep the original roots so we can filter out roots that aren't
1262 # 'real' roots (i.e. are descended from other roots).
1258 # 'real' roots (i.e. are descended from other roots).
1263 roots = descendants.copy()
1259 roots = descendants.copy()
1264 # Our topologically sorted list of output nodes.
1260 # Our topologically sorted list of output nodes.
1265 orderedout = []
1261 orderedout = []
1266 # Don't start at nullid since we don't want nullid in our output list,
1262 # Don't start at nullid since we don't want nullid in our output list,
1267 # and if nullid shows up in descendants, empty parents will look like
1263 # and if nullid shows up in descendants, empty parents will look like
1268 # they're descendants.
1264 # they're descendants.
1269 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1265 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1270 n = self.node(r)
1266 n = self.node(r)
1271 isdescendant = False
1267 isdescendant = False
1272 if lowestrev == nullrev: # Everybody is a descendant of nullid
1268 if lowestrev == nullrev: # Everybody is a descendant of nullid
1273 isdescendant = True
1269 isdescendant = True
1274 elif n in descendants:
1270 elif n in descendants:
1275 # n is already a descendant
1271 # n is already a descendant
1276 isdescendant = True
1272 isdescendant = True
1277 # This check only needs to be done here because all the roots
1273 # This check only needs to be done here because all the roots
1278 # will start being marked is descendants before the loop.
1274 # will start being marked is descendants before the loop.
1279 if n in roots:
1275 if n in roots:
1280 # If n was a root, check if it's a 'real' root.
1276 # If n was a root, check if it's a 'real' root.
1281 p = tuple(self.parents(n))
1277 p = tuple(self.parents(n))
1282 # If any of its parents are descendants, it's not a root.
1278 # If any of its parents are descendants, it's not a root.
1283 if (p[0] in descendants) or (p[1] in descendants):
1279 if (p[0] in descendants) or (p[1] in descendants):
1284 roots.remove(n)
1280 roots.remove(n)
1285 else:
1281 else:
1286 p = tuple(self.parents(n))
1282 p = tuple(self.parents(n))
1287 # A node is a descendant if either of its parents are
1283 # A node is a descendant if either of its parents are
1288 # descendants. (We seeded the dependents list with the roots
1284 # descendants. (We seeded the dependents list with the roots
1289 # up there, remember?)
1285 # up there, remember?)
1290 if (p[0] in descendants) or (p[1] in descendants):
1286 if (p[0] in descendants) or (p[1] in descendants):
1291 descendants.add(n)
1287 descendants.add(n)
1292 isdescendant = True
1288 isdescendant = True
1293 if isdescendant and ((ancestors is None) or (n in ancestors)):
1289 if isdescendant and ((ancestors is None) or (n in ancestors)):
1294 # Only include nodes that are both descendants and ancestors.
1290 # Only include nodes that are both descendants and ancestors.
1295 orderedout.append(n)
1291 orderedout.append(n)
1296 if (ancestors is not None) and (n in heads):
1292 if (ancestors is not None) and (n in heads):
1297 # We're trying to figure out which heads are reachable
1293 # We're trying to figure out which heads are reachable
1298 # from roots.
1294 # from roots.
1299 # Mark this head as having been reached
1295 # Mark this head as having been reached
1300 heads[n] = True
1296 heads[n] = True
1301 elif ancestors is None:
1297 elif ancestors is None:
1302 # Otherwise, we're trying to discover the heads.
1298 # Otherwise, we're trying to discover the heads.
1303 # Assume this is a head because if it isn't, the next step
1299 # Assume this is a head because if it isn't, the next step
1304 # will eventually remove it.
1300 # will eventually remove it.
1305 heads[n] = True
1301 heads[n] = True
1306 # But, obviously its parents aren't.
1302 # But, obviously its parents aren't.
1307 for p in self.parents(n):
1303 for p in self.parents(n):
1308 heads.pop(p, None)
1304 heads.pop(p, None)
1309 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1305 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1310 roots = list(roots)
1306 roots = list(roots)
1311 assert orderedout
1307 assert orderedout
1312 assert roots
1308 assert roots
1313 assert heads
1309 assert heads
1314 return (orderedout, roots, heads)
1310 return (orderedout, roots, heads)
1315
1311
1316 def headrevs(self, revs=None):
1312 def headrevs(self, revs=None):
1317 if revs is None:
1313 if revs is None:
1318 try:
1314 try:
1319 return self.index.headrevs()
1315 return self.index.headrevs()
1320 except AttributeError:
1316 except AttributeError:
1321 return self._headrevs()
1317 return self._headrevs()
1322 if rustdagop is not None:
1318 if rustdagop is not None:
1323 return rustdagop.headrevs(self.index, revs)
1319 return rustdagop.headrevs(self.index, revs)
1324 return dagop.headrevs(revs, self._uncheckedparentrevs)
1320 return dagop.headrevs(revs, self._uncheckedparentrevs)
1325
1321
1326 def computephases(self, roots):
1322 def computephases(self, roots):
1327 return self.index.computephasesmapsets(roots)
1323 return self.index.computephasesmapsets(roots)
1328
1324
1329 def _headrevs(self):
1325 def _headrevs(self):
1330 count = len(self)
1326 count = len(self)
1331 if not count:
1327 if not count:
1332 return [nullrev]
1328 return [nullrev]
1333 # we won't iter over filtered rev so nobody is a head at start
1329 # we won't iter over filtered rev so nobody is a head at start
1334 ishead = [0] * (count + 1)
1330 ishead = [0] * (count + 1)
1335 index = self.index
1331 index = self.index
1336 for r in self:
1332 for r in self:
1337 ishead[r] = 1 # I may be an head
1333 ishead[r] = 1 # I may be an head
1338 e = index[r]
1334 e = index[r]
1339 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1335 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1340 return [r for r, val in enumerate(ishead) if val]
1336 return [r for r, val in enumerate(ishead) if val]
1341
1337
1342 def heads(self, start=None, stop=None):
1338 def heads(self, start=None, stop=None):
1343 """return the list of all nodes that have no children
1339 """return the list of all nodes that have no children
1344
1340
1345 if start is specified, only heads that are descendants of
1341 if start is specified, only heads that are descendants of
1346 start will be returned
1342 start will be returned
1347 if stop is specified, it will consider all the revs from stop
1343 if stop is specified, it will consider all the revs from stop
1348 as if they had no children
1344 as if they had no children
1349 """
1345 """
1350 if start is None and stop is None:
1346 if start is None and stop is None:
1351 if not len(self):
1347 if not len(self):
1352 return [nullid]
1348 return [nullid]
1353 return [self.node(r) for r in self.headrevs()]
1349 return [self.node(r) for r in self.headrevs()]
1354
1350
1355 if start is None:
1351 if start is None:
1356 start = nullrev
1352 start = nullrev
1357 else:
1353 else:
1358 start = self.rev(start)
1354 start = self.rev(start)
1359
1355
1360 stoprevs = {self.rev(n) for n in stop or []}
1356 stoprevs = {self.rev(n) for n in stop or []}
1361
1357
1362 revs = dagop.headrevssubset(
1358 revs = dagop.headrevssubset(
1363 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1359 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1364 )
1360 )
1365
1361
1366 return [self.node(rev) for rev in revs]
1362 return [self.node(rev) for rev in revs]
1367
1363
1368 def children(self, node):
1364 def children(self, node):
1369 """find the children of a given node"""
1365 """find the children of a given node"""
1370 c = []
1366 c = []
1371 p = self.rev(node)
1367 p = self.rev(node)
1372 for r in self.revs(start=p + 1):
1368 for r in self.revs(start=p + 1):
1373 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1369 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1374 if prevs:
1370 if prevs:
1375 for pr in prevs:
1371 for pr in prevs:
1376 if pr == p:
1372 if pr == p:
1377 c.append(self.node(r))
1373 c.append(self.node(r))
1378 elif p == nullrev:
1374 elif p == nullrev:
1379 c.append(self.node(r))
1375 c.append(self.node(r))
1380 return c
1376 return c
1381
1377
1382 def commonancestorsheads(self, a, b):
1378 def commonancestorsheads(self, a, b):
1383 """calculate all the heads of the common ancestors of nodes a and b"""
1379 """calculate all the heads of the common ancestors of nodes a and b"""
1384 a, b = self.rev(a), self.rev(b)
1380 a, b = self.rev(a), self.rev(b)
1385 ancs = self._commonancestorsheads(a, b)
1381 ancs = self._commonancestorsheads(a, b)
1386 return pycompat.maplist(self.node, ancs)
1382 return pycompat.maplist(self.node, ancs)
1387
1383
1388 def _commonancestorsheads(self, *revs):
1384 def _commonancestorsheads(self, *revs):
1389 """calculate all the heads of the common ancestors of revs"""
1385 """calculate all the heads of the common ancestors of revs"""
1390 try:
1386 try:
1391 ancs = self.index.commonancestorsheads(*revs)
1387 ancs = self.index.commonancestorsheads(*revs)
1392 except (AttributeError, OverflowError): # C implementation failed
1388 except (AttributeError, OverflowError): # C implementation failed
1393 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1389 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1394 return ancs
1390 return ancs
1395
1391
1396 def isancestor(self, a, b):
1392 def isancestor(self, a, b):
1397 """return True if node a is an ancestor of node b
1393 """return True if node a is an ancestor of node b
1398
1394
1399 A revision is considered an ancestor of itself."""
1395 A revision is considered an ancestor of itself."""
1400 a, b = self.rev(a), self.rev(b)
1396 a, b = self.rev(a), self.rev(b)
1401 return self.isancestorrev(a, b)
1397 return self.isancestorrev(a, b)
1402
1398
1403 def isancestorrev(self, a, b):
1399 def isancestorrev(self, a, b):
1404 """return True if revision a is an ancestor of revision b
1400 """return True if revision a is an ancestor of revision b
1405
1401
1406 A revision is considered an ancestor of itself.
1402 A revision is considered an ancestor of itself.
1407
1403
1408 The implementation of this is trivial but the use of
1404 The implementation of this is trivial but the use of
1409 reachableroots is not."""
1405 reachableroots is not."""
1410 if a == nullrev:
1406 if a == nullrev:
1411 return True
1407 return True
1412 elif a == b:
1408 elif a == b:
1413 return True
1409 return True
1414 elif a > b:
1410 elif a > b:
1415 return False
1411 return False
1416 return bool(self.reachableroots(a, [b], [a], includepath=False))
1412 return bool(self.reachableroots(a, [b], [a], includepath=False))
1417
1413
1418 def reachableroots(self, minroot, heads, roots, includepath=False):
1414 def reachableroots(self, minroot, heads, roots, includepath=False):
1419 """return (heads(::(<roots> and <roots>::<heads>)))
1415 """return (heads(::(<roots> and <roots>::<heads>)))
1420
1416
1421 If includepath is True, return (<roots>::<heads>)."""
1417 If includepath is True, return (<roots>::<heads>)."""
1422 try:
1418 try:
1423 return self.index.reachableroots2(
1419 return self.index.reachableroots2(
1424 minroot, heads, roots, includepath
1420 minroot, heads, roots, includepath
1425 )
1421 )
1426 except AttributeError:
1422 except AttributeError:
1427 return dagop._reachablerootspure(
1423 return dagop._reachablerootspure(
1428 self.parentrevs, minroot, roots, heads, includepath
1424 self.parentrevs, minroot, roots, heads, includepath
1429 )
1425 )
1430
1426
1431 def ancestor(self, a, b):
1427 def ancestor(self, a, b):
1432 """calculate the "best" common ancestor of nodes a and b"""
1428 """calculate the "best" common ancestor of nodes a and b"""
1433
1429
1434 a, b = self.rev(a), self.rev(b)
1430 a, b = self.rev(a), self.rev(b)
1435 try:
1431 try:
1436 ancs = self.index.ancestors(a, b)
1432 ancs = self.index.ancestors(a, b)
1437 except (AttributeError, OverflowError):
1433 except (AttributeError, OverflowError):
1438 ancs = ancestor.ancestors(self.parentrevs, a, b)
1434 ancs = ancestor.ancestors(self.parentrevs, a, b)
1439 if ancs:
1435 if ancs:
1440 # choose a consistent winner when there's a tie
1436 # choose a consistent winner when there's a tie
1441 return min(map(self.node, ancs))
1437 return min(map(self.node, ancs))
1442 return nullid
1438 return nullid
1443
1439
1444 def _match(self, id):
1440 def _match(self, id):
1445 if isinstance(id, int):
1441 if isinstance(id, int):
1446 # rev
1442 # rev
1447 return self.node(id)
1443 return self.node(id)
1448 if len(id) == 20:
1444 if len(id) == 20:
1449 # possibly a binary node
1445 # possibly a binary node
1450 # odds of a binary node being all hex in ASCII are 1 in 10**25
1446 # odds of a binary node being all hex in ASCII are 1 in 10**25
1451 try:
1447 try:
1452 node = id
1448 node = id
1453 self.rev(node) # quick search the index
1449 self.rev(node) # quick search the index
1454 return node
1450 return node
1455 except error.LookupError:
1451 except error.LookupError:
1456 pass # may be partial hex id
1452 pass # may be partial hex id
1457 try:
1453 try:
1458 # str(rev)
1454 # str(rev)
1459 rev = int(id)
1455 rev = int(id)
1460 if b"%d" % rev != id:
1456 if b"%d" % rev != id:
1461 raise ValueError
1457 raise ValueError
1462 if rev < 0:
1458 if rev < 0:
1463 rev = len(self) + rev
1459 rev = len(self) + rev
1464 if rev < 0 or rev >= len(self):
1460 if rev < 0 or rev >= len(self):
1465 raise ValueError
1461 raise ValueError
1466 return self.node(rev)
1462 return self.node(rev)
1467 except (ValueError, OverflowError):
1463 except (ValueError, OverflowError):
1468 pass
1464 pass
1469 if len(id) == 40:
1465 if len(id) == 40:
1470 try:
1466 try:
1471 # a full hex nodeid?
1467 # a full hex nodeid?
1472 node = bin(id)
1468 node = bin(id)
1473 self.rev(node)
1469 self.rev(node)
1474 return node
1470 return node
1475 except (TypeError, error.LookupError):
1471 except (TypeError, error.LookupError):
1476 pass
1472 pass
1477
1473
1478 def _partialmatch(self, id):
1474 def _partialmatch(self, id):
1479 # we don't care wdirfilenodeids as they should be always full hash
1475 # we don't care wdirfilenodeids as they should be always full hash
1480 maybewdir = wdirhex.startswith(id)
1476 maybewdir = wdirhex.startswith(id)
1481 try:
1477 try:
1482 partial = self.index.partialmatch(id)
1478 partial = self.index.partialmatch(id)
1483 if partial and self.hasnode(partial):
1479 if partial and self.hasnode(partial):
1484 if maybewdir:
1480 if maybewdir:
1485 # single 'ff...' match in radix tree, ambiguous with wdir
1481 # single 'ff...' match in radix tree, ambiguous with wdir
1486 raise error.RevlogError
1482 raise error.RevlogError
1487 return partial
1483 return partial
1488 if maybewdir:
1484 if maybewdir:
1489 # no 'ff...' match in radix tree, wdir identified
1485 # no 'ff...' match in radix tree, wdir identified
1490 raise error.WdirUnsupported
1486 raise error.WdirUnsupported
1491 return None
1487 return None
1492 except error.RevlogError:
1488 except error.RevlogError:
1493 # parsers.c radix tree lookup gave multiple matches
1489 # parsers.c radix tree lookup gave multiple matches
1494 # fast path: for unfiltered changelog, radix tree is accurate
1490 # fast path: for unfiltered changelog, radix tree is accurate
1495 if not getattr(self, 'filteredrevs', None):
1491 if not getattr(self, 'filteredrevs', None):
1496 raise error.AmbiguousPrefixLookupError(
1492 raise error.AmbiguousPrefixLookupError(
1497 id, self.indexfile, _(b'ambiguous identifier')
1493 id, self.indexfile, _(b'ambiguous identifier')
1498 )
1494 )
1499 # fall through to slow path that filters hidden revisions
1495 # fall through to slow path that filters hidden revisions
1500 except (AttributeError, ValueError):
1496 except (AttributeError, ValueError):
1501 # we are pure python, or key was too short to search radix tree
1497 # we are pure python, or key was too short to search radix tree
1502 pass
1498 pass
1503
1499
1504 if id in self._pcache:
1500 if id in self._pcache:
1505 return self._pcache[id]
1501 return self._pcache[id]
1506
1502
1507 if len(id) <= 40:
1503 if len(id) <= 40:
1508 try:
1504 try:
1509 # hex(node)[:...]
1505 # hex(node)[:...]
1510 l = len(id) // 2 # grab an even number of digits
1506 l = len(id) // 2 # grab an even number of digits
1511 prefix = bin(id[: l * 2])
1507 prefix = bin(id[: l * 2])
1512 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1508 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1513 nl = [
1509 nl = [
1514 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1510 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1515 ]
1511 ]
1516 if nullhex.startswith(id):
1512 if nullhex.startswith(id):
1517 nl.append(nullid)
1513 nl.append(nullid)
1518 if len(nl) > 0:
1514 if len(nl) > 0:
1519 if len(nl) == 1 and not maybewdir:
1515 if len(nl) == 1 and not maybewdir:
1520 self._pcache[id] = nl[0]
1516 self._pcache[id] = nl[0]
1521 return nl[0]
1517 return nl[0]
1522 raise error.AmbiguousPrefixLookupError(
1518 raise error.AmbiguousPrefixLookupError(
1523 id, self.indexfile, _(b'ambiguous identifier')
1519 id, self.indexfile, _(b'ambiguous identifier')
1524 )
1520 )
1525 if maybewdir:
1521 if maybewdir:
1526 raise error.WdirUnsupported
1522 raise error.WdirUnsupported
1527 return None
1523 return None
1528 except TypeError:
1524 except TypeError:
1529 pass
1525 pass
1530
1526
1531 def lookup(self, id):
1527 def lookup(self, id):
1532 """locate a node based on:
1528 """locate a node based on:
1533 - revision number or str(revision number)
1529 - revision number or str(revision number)
1534 - nodeid or subset of hex nodeid
1530 - nodeid or subset of hex nodeid
1535 """
1531 """
1536 n = self._match(id)
1532 n = self._match(id)
1537 if n is not None:
1533 if n is not None:
1538 return n
1534 return n
1539 n = self._partialmatch(id)
1535 n = self._partialmatch(id)
1540 if n:
1536 if n:
1541 return n
1537 return n
1542
1538
1543 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1539 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1544
1540
1545 def shortest(self, node, minlength=1):
1541 def shortest(self, node, minlength=1):
1546 """Find the shortest unambiguous prefix that matches node."""
1542 """Find the shortest unambiguous prefix that matches node."""
1547
1543
1548 def isvalid(prefix):
1544 def isvalid(prefix):
1549 try:
1545 try:
1550 matchednode = self._partialmatch(prefix)
1546 matchednode = self._partialmatch(prefix)
1551 except error.AmbiguousPrefixLookupError:
1547 except error.AmbiguousPrefixLookupError:
1552 return False
1548 return False
1553 except error.WdirUnsupported:
1549 except error.WdirUnsupported:
1554 # single 'ff...' match
1550 # single 'ff...' match
1555 return True
1551 return True
1556 if matchednode is None:
1552 if matchednode is None:
1557 raise error.LookupError(node, self.indexfile, _(b'no node'))
1553 raise error.LookupError(node, self.indexfile, _(b'no node'))
1558 return True
1554 return True
1559
1555
1560 def maybewdir(prefix):
1556 def maybewdir(prefix):
1561 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1557 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1562
1558
1563 hexnode = hex(node)
1559 hexnode = hex(node)
1564
1560
1565 def disambiguate(hexnode, minlength):
1561 def disambiguate(hexnode, minlength):
1566 """Disambiguate against wdirid."""
1562 """Disambiguate against wdirid."""
1567 for length in range(minlength, len(hexnode) + 1):
1563 for length in range(minlength, len(hexnode) + 1):
1568 prefix = hexnode[:length]
1564 prefix = hexnode[:length]
1569 if not maybewdir(prefix):
1565 if not maybewdir(prefix):
1570 return prefix
1566 return prefix
1571
1567
1572 if not getattr(self, 'filteredrevs', None):
1568 if not getattr(self, 'filteredrevs', None):
1573 try:
1569 try:
1574 length = max(self.index.shortest(node), minlength)
1570 length = max(self.index.shortest(node), minlength)
1575 return disambiguate(hexnode, length)
1571 return disambiguate(hexnode, length)
1576 except error.RevlogError:
1572 except error.RevlogError:
1577 if node != wdirid:
1573 if node != wdirid:
1578 raise error.LookupError(node, self.indexfile, _(b'no node'))
1574 raise error.LookupError(node, self.indexfile, _(b'no node'))
1579 except AttributeError:
1575 except AttributeError:
1580 # Fall through to pure code
1576 # Fall through to pure code
1581 pass
1577 pass
1582
1578
1583 if node == wdirid:
1579 if node == wdirid:
1584 for length in range(minlength, len(hexnode) + 1):
1580 for length in range(minlength, len(hexnode) + 1):
1585 prefix = hexnode[:length]
1581 prefix = hexnode[:length]
1586 if isvalid(prefix):
1582 if isvalid(prefix):
1587 return prefix
1583 return prefix
1588
1584
1589 for length in range(minlength, len(hexnode) + 1):
1585 for length in range(minlength, len(hexnode) + 1):
1590 prefix = hexnode[:length]
1586 prefix = hexnode[:length]
1591 if isvalid(prefix):
1587 if isvalid(prefix):
1592 return disambiguate(hexnode, length)
1588 return disambiguate(hexnode, length)
1593
1589
1594 def cmp(self, node, text):
1590 def cmp(self, node, text):
1595 """compare text with a given file revision
1591 """compare text with a given file revision
1596
1592
1597 returns True if text is different than what is stored.
1593 returns True if text is different than what is stored.
1598 """
1594 """
1599 p1, p2 = self.parents(node)
1595 p1, p2 = self.parents(node)
1600 return storageutil.hashrevisionsha1(text, p1, p2) != node
1596 return storageutil.hashrevisionsha1(text, p1, p2) != node
1601
1597
1602 def _cachesegment(self, offset, data):
1598 def _cachesegment(self, offset, data):
1603 """Add a segment to the revlog cache.
1599 """Add a segment to the revlog cache.
1604
1600
1605 Accepts an absolute offset and the data that is at that location.
1601 Accepts an absolute offset and the data that is at that location.
1606 """
1602 """
1607 o, d = self._chunkcache
1603 o, d = self._chunkcache
1608 # try to add to existing cache
1604 # try to add to existing cache
1609 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1605 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1610 self._chunkcache = o, d + data
1606 self._chunkcache = o, d + data
1611 else:
1607 else:
1612 self._chunkcache = offset, data
1608 self._chunkcache = offset, data
1613
1609
1614 def _readsegment(self, offset, length, df=None):
1610 def _readsegment(self, offset, length, df=None):
1615 """Load a segment of raw data from the revlog.
1611 """Load a segment of raw data from the revlog.
1616
1612
1617 Accepts an absolute offset, length to read, and an optional existing
1613 Accepts an absolute offset, length to read, and an optional existing
1618 file handle to read from.
1614 file handle to read from.
1619
1615
1620 If an existing file handle is passed, it will be seeked and the
1616 If an existing file handle is passed, it will be seeked and the
1621 original seek position will NOT be restored.
1617 original seek position will NOT be restored.
1622
1618
1623 Returns a str or buffer of raw byte data.
1619 Returns a str or buffer of raw byte data.
1624
1620
1625 Raises if the requested number of bytes could not be read.
1621 Raises if the requested number of bytes could not be read.
1626 """
1622 """
1627 # Cache data both forward and backward around the requested
1623 # Cache data both forward and backward around the requested
1628 # data, in a fixed size window. This helps speed up operations
1624 # data, in a fixed size window. This helps speed up operations
1629 # involving reading the revlog backwards.
1625 # involving reading the revlog backwards.
1630 cachesize = self._chunkcachesize
1626 cachesize = self._chunkcachesize
1631 realoffset = offset & ~(cachesize - 1)
1627 realoffset = offset & ~(cachesize - 1)
1632 reallength = (
1628 reallength = (
1633 (offset + length + cachesize) & ~(cachesize - 1)
1629 (offset + length + cachesize) & ~(cachesize - 1)
1634 ) - realoffset
1630 ) - realoffset
1635 with self._datareadfp(df) as df:
1631 with self._datareadfp(df) as df:
1636 df.seek(realoffset)
1632 df.seek(realoffset)
1637 d = df.read(reallength)
1633 d = df.read(reallength)
1638
1634
1639 self._cachesegment(realoffset, d)
1635 self._cachesegment(realoffset, d)
1640 if offset != realoffset or reallength != length:
1636 if offset != realoffset or reallength != length:
1641 startoffset = offset - realoffset
1637 startoffset = offset - realoffset
1642 if len(d) - startoffset < length:
1638 if len(d) - startoffset < length:
1643 raise error.RevlogError(
1639 raise error.RevlogError(
1644 _(
1640 _(
1645 b'partial read of revlog %s; expected %d bytes from '
1641 b'partial read of revlog %s; expected %d bytes from '
1646 b'offset %d, got %d'
1642 b'offset %d, got %d'
1647 )
1643 )
1648 % (
1644 % (
1649 self.indexfile if self._inline else self.datafile,
1645 self.indexfile if self._inline else self.datafile,
1650 length,
1646 length,
1651 realoffset,
1647 realoffset,
1652 len(d) - startoffset,
1648 len(d) - startoffset,
1653 )
1649 )
1654 )
1650 )
1655
1651
1656 return util.buffer(d, startoffset, length)
1652 return util.buffer(d, startoffset, length)
1657
1653
1658 if len(d) < length:
1654 if len(d) < length:
1659 raise error.RevlogError(
1655 raise error.RevlogError(
1660 _(
1656 _(
1661 b'partial read of revlog %s; expected %d bytes from offset '
1657 b'partial read of revlog %s; expected %d bytes from offset '
1662 b'%d, got %d'
1658 b'%d, got %d'
1663 )
1659 )
1664 % (
1660 % (
1665 self.indexfile if self._inline else self.datafile,
1661 self.indexfile if self._inline else self.datafile,
1666 length,
1662 length,
1667 offset,
1663 offset,
1668 len(d),
1664 len(d),
1669 )
1665 )
1670 )
1666 )
1671
1667
1672 return d
1668 return d
1673
1669
1674 def _getsegment(self, offset, length, df=None):
1670 def _getsegment(self, offset, length, df=None):
1675 """Obtain a segment of raw data from the revlog.
1671 """Obtain a segment of raw data from the revlog.
1676
1672
1677 Accepts an absolute offset, length of bytes to obtain, and an
1673 Accepts an absolute offset, length of bytes to obtain, and an
1678 optional file handle to the already-opened revlog. If the file
1674 optional file handle to the already-opened revlog. If the file
1679 handle is used, it's original seek position will not be preserved.
1675 handle is used, it's original seek position will not be preserved.
1680
1676
1681 Requests for data may be returned from a cache.
1677 Requests for data may be returned from a cache.
1682
1678
1683 Returns a str or a buffer instance of raw byte data.
1679 Returns a str or a buffer instance of raw byte data.
1684 """
1680 """
1685 o, d = self._chunkcache
1681 o, d = self._chunkcache
1686 l = len(d)
1682 l = len(d)
1687
1683
1688 # is it in the cache?
1684 # is it in the cache?
1689 cachestart = offset - o
1685 cachestart = offset - o
1690 cacheend = cachestart + length
1686 cacheend = cachestart + length
1691 if cachestart >= 0 and cacheend <= l:
1687 if cachestart >= 0 and cacheend <= l:
1692 if cachestart == 0 and cacheend == l:
1688 if cachestart == 0 and cacheend == l:
1693 return d # avoid a copy
1689 return d # avoid a copy
1694 return util.buffer(d, cachestart, cacheend - cachestart)
1690 return util.buffer(d, cachestart, cacheend - cachestart)
1695
1691
1696 return self._readsegment(offset, length, df=df)
1692 return self._readsegment(offset, length, df=df)
1697
1693
1698 def _getsegmentforrevs(self, startrev, endrev, df=None):
1694 def _getsegmentforrevs(self, startrev, endrev, df=None):
1699 """Obtain a segment of raw data corresponding to a range of revisions.
1695 """Obtain a segment of raw data corresponding to a range of revisions.
1700
1696
1701 Accepts the start and end revisions and an optional already-open
1697 Accepts the start and end revisions and an optional already-open
1702 file handle to be used for reading. If the file handle is read, its
1698 file handle to be used for reading. If the file handle is read, its
1703 seek position will not be preserved.
1699 seek position will not be preserved.
1704
1700
1705 Requests for data may be satisfied by a cache.
1701 Requests for data may be satisfied by a cache.
1706
1702
1707 Returns a 2-tuple of (offset, data) for the requested range of
1703 Returns a 2-tuple of (offset, data) for the requested range of
1708 revisions. Offset is the integer offset from the beginning of the
1704 revisions. Offset is the integer offset from the beginning of the
1709 revlog and data is a str or buffer of the raw byte data.
1705 revlog and data is a str or buffer of the raw byte data.
1710
1706
1711 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1707 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1712 to determine where each revision's data begins and ends.
1708 to determine where each revision's data begins and ends.
1713 """
1709 """
1714 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1710 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1715 # (functions are expensive).
1711 # (functions are expensive).
1716 index = self.index
1712 index = self.index
1717 istart = index[startrev]
1713 istart = index[startrev]
1718 start = int(istart[0] >> 16)
1714 start = int(istart[0] >> 16)
1719 if startrev == endrev:
1715 if startrev == endrev:
1720 end = start + istart[1]
1716 end = start + istart[1]
1721 else:
1717 else:
1722 iend = index[endrev]
1718 iend = index[endrev]
1723 end = int(iend[0] >> 16) + iend[1]
1719 end = int(iend[0] >> 16) + iend[1]
1724
1720
1725 if self._inline:
1721 if self._inline:
1726 start += (startrev + 1) * self._io.size
1722 start += (startrev + 1) * self._io.size
1727 end += (endrev + 1) * self._io.size
1723 end += (endrev + 1) * self._io.size
1728 length = end - start
1724 length = end - start
1729
1725
1730 return start, self._getsegment(start, length, df=df)
1726 return start, self._getsegment(start, length, df=df)
1731
1727
1732 def _chunk(self, rev, df=None):
1728 def _chunk(self, rev, df=None):
1733 """Obtain a single decompressed chunk for a revision.
1729 """Obtain a single decompressed chunk for a revision.
1734
1730
1735 Accepts an integer revision and an optional already-open file handle
1731 Accepts an integer revision and an optional already-open file handle
1736 to be used for reading. If used, the seek position of the file will not
1732 to be used for reading. If used, the seek position of the file will not
1737 be preserved.
1733 be preserved.
1738
1734
1739 Returns a str holding uncompressed data for the requested revision.
1735 Returns a str holding uncompressed data for the requested revision.
1740 """
1736 """
1741 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1737 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1742
1738
1743 def _chunks(self, revs, df=None, targetsize=None):
1739 def _chunks(self, revs, df=None, targetsize=None):
1744 """Obtain decompressed chunks for the specified revisions.
1740 """Obtain decompressed chunks for the specified revisions.
1745
1741
1746 Accepts an iterable of numeric revisions that are assumed to be in
1742 Accepts an iterable of numeric revisions that are assumed to be in
1747 ascending order. Also accepts an optional already-open file handle
1743 ascending order. Also accepts an optional already-open file handle
1748 to be used for reading. If used, the seek position of the file will
1744 to be used for reading. If used, the seek position of the file will
1749 not be preserved.
1745 not be preserved.
1750
1746
1751 This function is similar to calling ``self._chunk()`` multiple times,
1747 This function is similar to calling ``self._chunk()`` multiple times,
1752 but is faster.
1748 but is faster.
1753
1749
1754 Returns a list with decompressed data for each requested revision.
1750 Returns a list with decompressed data for each requested revision.
1755 """
1751 """
1756 if not revs:
1752 if not revs:
1757 return []
1753 return []
1758 start = self.start
1754 start = self.start
1759 length = self.length
1755 length = self.length
1760 inline = self._inline
1756 inline = self._inline
1761 iosize = self._io.size
1757 iosize = self._io.size
1762 buffer = util.buffer
1758 buffer = util.buffer
1763
1759
1764 l = []
1760 l = []
1765 ladd = l.append
1761 ladd = l.append
1766
1762
1767 if not self._withsparseread:
1763 if not self._withsparseread:
1768 slicedchunks = (revs,)
1764 slicedchunks = (revs,)
1769 else:
1765 else:
1770 slicedchunks = deltautil.slicechunk(
1766 slicedchunks = deltautil.slicechunk(
1771 self, revs, targetsize=targetsize
1767 self, revs, targetsize=targetsize
1772 )
1768 )
1773
1769
1774 for revschunk in slicedchunks:
1770 for revschunk in slicedchunks:
1775 firstrev = revschunk[0]
1771 firstrev = revschunk[0]
1776 # Skip trailing revisions with empty diff
1772 # Skip trailing revisions with empty diff
1777 for lastrev in revschunk[::-1]:
1773 for lastrev in revschunk[::-1]:
1778 if length(lastrev) != 0:
1774 if length(lastrev) != 0:
1779 break
1775 break
1780
1776
1781 try:
1777 try:
1782 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1778 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1783 except OverflowError:
1779 except OverflowError:
1784 # issue4215 - we can't cache a run of chunks greater than
1780 # issue4215 - we can't cache a run of chunks greater than
1785 # 2G on Windows
1781 # 2G on Windows
1786 return [self._chunk(rev, df=df) for rev in revschunk]
1782 return [self._chunk(rev, df=df) for rev in revschunk]
1787
1783
1788 decomp = self.decompress
1784 decomp = self.decompress
1789 for rev in revschunk:
1785 for rev in revschunk:
1790 chunkstart = start(rev)
1786 chunkstart = start(rev)
1791 if inline:
1787 if inline:
1792 chunkstart += (rev + 1) * iosize
1788 chunkstart += (rev + 1) * iosize
1793 chunklength = length(rev)
1789 chunklength = length(rev)
1794 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1790 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1795
1791
1796 return l
1792 return l
1797
1793
1798 def _chunkclear(self):
1794 def _chunkclear(self):
1799 """Clear the raw chunk cache."""
1795 """Clear the raw chunk cache."""
1800 self._chunkcache = (0, b'')
1796 self._chunkcache = (0, b'')
1801
1797
1802 def deltaparent(self, rev):
1798 def deltaparent(self, rev):
1803 """return deltaparent of the given revision"""
1799 """return deltaparent of the given revision"""
1804 base = self.index[rev][3]
1800 base = self.index[rev][3]
1805 if base == rev:
1801 if base == rev:
1806 return nullrev
1802 return nullrev
1807 elif self._generaldelta:
1803 elif self._generaldelta:
1808 return base
1804 return base
1809 else:
1805 else:
1810 return rev - 1
1806 return rev - 1
1811
1807
1812 def issnapshot(self, rev):
1808 def issnapshot(self, rev):
1813 """tells whether rev is a snapshot"""
1809 """tells whether rev is a snapshot"""
1814 if not self._sparserevlog:
1810 if not self._sparserevlog:
1815 return self.deltaparent(rev) == nullrev
1811 return self.deltaparent(rev) == nullrev
1816 elif util.safehasattr(self.index, b'issnapshot'):
1812 elif util.safehasattr(self.index, b'issnapshot'):
1817 # directly assign the method to cache the testing and access
1813 # directly assign the method to cache the testing and access
1818 self.issnapshot = self.index.issnapshot
1814 self.issnapshot = self.index.issnapshot
1819 return self.issnapshot(rev)
1815 return self.issnapshot(rev)
1820 if rev == nullrev:
1816 if rev == nullrev:
1821 return True
1817 return True
1822 entry = self.index[rev]
1818 entry = self.index[rev]
1823 base = entry[3]
1819 base = entry[3]
1824 if base == rev:
1820 if base == rev:
1825 return True
1821 return True
1826 if base == nullrev:
1822 if base == nullrev:
1827 return True
1823 return True
1828 p1 = entry[5]
1824 p1 = entry[5]
1829 p2 = entry[6]
1825 p2 = entry[6]
1830 if base == p1 or base == p2:
1826 if base == p1 or base == p2:
1831 return False
1827 return False
1832 return self.issnapshot(base)
1828 return self.issnapshot(base)
1833
1829
1834 def snapshotdepth(self, rev):
1830 def snapshotdepth(self, rev):
1835 """number of snapshot in the chain before this one"""
1831 """number of snapshot in the chain before this one"""
1836 if not self.issnapshot(rev):
1832 if not self.issnapshot(rev):
1837 raise error.ProgrammingError(b'revision %d not a snapshot')
1833 raise error.ProgrammingError(b'revision %d not a snapshot')
1838 return len(self._deltachain(rev)[0]) - 1
1834 return len(self._deltachain(rev)[0]) - 1
1839
1835
1840 def revdiff(self, rev1, rev2):
1836 def revdiff(self, rev1, rev2):
1841 """return or calculate a delta between two revisions
1837 """return or calculate a delta between two revisions
1842
1838
1843 The delta calculated is in binary form and is intended to be written to
1839 The delta calculated is in binary form and is intended to be written to
1844 revlog data directly. So this function needs raw revision data.
1840 revlog data directly. So this function needs raw revision data.
1845 """
1841 """
1846 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1842 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1847 return bytes(self._chunk(rev2))
1843 return bytes(self._chunk(rev2))
1848
1844
1849 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1845 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1850
1846
1851 def _processflags(self, text, flags, operation, raw=False):
1847 def _processflags(self, text, flags, operation, raw=False):
1852 """deprecated entry point to access flag processors"""
1848 """deprecated entry point to access flag processors"""
1853 msg = b'_processflag(...) use the specialized variant'
1849 msg = b'_processflag(...) use the specialized variant'
1854 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1850 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1855 if raw:
1851 if raw:
1856 return text, flagutil.processflagsraw(self, text, flags)
1852 return text, flagutil.processflagsraw(self, text, flags)
1857 elif operation == b'read':
1853 elif operation == b'read':
1858 return flagutil.processflagsread(self, text, flags)
1854 return flagutil.processflagsread(self, text, flags)
1859 else: # write operation
1855 else: # write operation
1860 return flagutil.processflagswrite(self, text, flags)
1856 return flagutil.processflagswrite(self, text, flags)
1861
1857
1862 def revision(self, nodeorrev, _df=None, raw=False):
1858 def revision(self, nodeorrev, _df=None, raw=False):
1863 """return an uncompressed revision of a given node or revision
1859 """return an uncompressed revision of a given node or revision
1864 number.
1860 number.
1865
1861
1866 _df - an existing file handle to read from. (internal-only)
1862 _df - an existing file handle to read from. (internal-only)
1867 raw - an optional argument specifying if the revision data is to be
1863 raw - an optional argument specifying if the revision data is to be
1868 treated as raw data when applying flag transforms. 'raw' should be set
1864 treated as raw data when applying flag transforms. 'raw' should be set
1869 to True when generating changegroups or in debug commands.
1865 to True when generating changegroups or in debug commands.
1870 """
1866 """
1871 if raw:
1867 if raw:
1872 msg = (
1868 msg = (
1873 b'revlog.revision(..., raw=True) is deprecated, '
1869 b'revlog.revision(..., raw=True) is deprecated, '
1874 b'use revlog.rawdata(...)'
1870 b'use revlog.rawdata(...)'
1875 )
1871 )
1876 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1872 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1877 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1873 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1878
1874
1879 def sidedata(self, nodeorrev, _df=None):
1875 def sidedata(self, nodeorrev, _df=None):
1880 """a map of extra data related to the changeset but not part of the hash
1876 """a map of extra data related to the changeset but not part of the hash
1881
1877
1882 This function currently return a dictionary. However, more advanced
1878 This function currently return a dictionary. However, more advanced
1883 mapping object will likely be used in the future for a more
1879 mapping object will likely be used in the future for a more
1884 efficient/lazy code.
1880 efficient/lazy code.
1885 """
1881 """
1886 return self._revisiondata(nodeorrev, _df)[1]
1882 return self._revisiondata(nodeorrev, _df)[1]
1887
1883
1888 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1884 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1889 # deal with <nodeorrev> argument type
1885 # deal with <nodeorrev> argument type
1890 if isinstance(nodeorrev, int):
1886 if isinstance(nodeorrev, int):
1891 rev = nodeorrev
1887 rev = nodeorrev
1892 node = self.node(rev)
1888 node = self.node(rev)
1893 else:
1889 else:
1894 node = nodeorrev
1890 node = nodeorrev
1895 rev = None
1891 rev = None
1896
1892
1897 # fast path the special `nullid` rev
1893 # fast path the special `nullid` rev
1898 if node == nullid:
1894 if node == nullid:
1899 return b"", {}
1895 return b"", {}
1900
1896
1901 # ``rawtext`` is the text as stored inside the revlog. Might be the
1897 # ``rawtext`` is the text as stored inside the revlog. Might be the
1902 # revision or might need to be processed to retrieve the revision.
1898 # revision or might need to be processed to retrieve the revision.
1903 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1899 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1904
1900
1905 if self.version & 0xFFFF == REVLOGV2:
1901 if self.version & 0xFFFF == REVLOGV2:
1906 if rev is None:
1902 if rev is None:
1907 rev = self.rev(node)
1903 rev = self.rev(node)
1908 sidedata = self._sidedata(rev)
1904 sidedata = self._sidedata(rev)
1909 else:
1905 else:
1910 sidedata = {}
1906 sidedata = {}
1911
1907
1912 if raw and validated:
1908 if raw and validated:
1913 # if we don't want to process the raw text and that raw
1909 # if we don't want to process the raw text and that raw
1914 # text is cached, we can exit early.
1910 # text is cached, we can exit early.
1915 return rawtext, sidedata
1911 return rawtext, sidedata
1916 if rev is None:
1912 if rev is None:
1917 rev = self.rev(node)
1913 rev = self.rev(node)
1918 # the revlog's flag for this revision
1914 # the revlog's flag for this revision
1919 # (usually alter its state or content)
1915 # (usually alter its state or content)
1920 flags = self.flags(rev)
1916 flags = self.flags(rev)
1921
1917
1922 if validated and flags == REVIDX_DEFAULT_FLAGS:
1918 if validated and flags == REVIDX_DEFAULT_FLAGS:
1923 # no extra flags set, no flag processor runs, text = rawtext
1919 # no extra flags set, no flag processor runs, text = rawtext
1924 return rawtext, sidedata
1920 return rawtext, sidedata
1925
1921
1926 if raw:
1922 if raw:
1927 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1923 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1928 text = rawtext
1924 text = rawtext
1929 else:
1925 else:
1930 r = flagutil.processflagsread(self, rawtext, flags)
1926 r = flagutil.processflagsread(self, rawtext, flags)
1931 text, validatehash = r
1927 text, validatehash = r
1932 if validatehash:
1928 if validatehash:
1933 self.checkhash(text, node, rev=rev)
1929 self.checkhash(text, node, rev=rev)
1934 if not validated:
1930 if not validated:
1935 self._revisioncache = (node, rev, rawtext)
1931 self._revisioncache = (node, rev, rawtext)
1936
1932
1937 return text, sidedata
1933 return text, sidedata
1938
1934
1939 def _rawtext(self, node, rev, _df=None):
1935 def _rawtext(self, node, rev, _df=None):
1940 """return the possibly unvalidated rawtext for a revision
1936 """return the possibly unvalidated rawtext for a revision
1941
1937
1942 returns (rev, rawtext, validated)
1938 returns (rev, rawtext, validated)
1943 """
1939 """
1944
1940
1945 # revision in the cache (could be useful to apply delta)
1941 # revision in the cache (could be useful to apply delta)
1946 cachedrev = None
1942 cachedrev = None
1947 # An intermediate text to apply deltas to
1943 # An intermediate text to apply deltas to
1948 basetext = None
1944 basetext = None
1949
1945
1950 # Check if we have the entry in cache
1946 # Check if we have the entry in cache
1951 # The cache entry looks like (node, rev, rawtext)
1947 # The cache entry looks like (node, rev, rawtext)
1952 if self._revisioncache:
1948 if self._revisioncache:
1953 if self._revisioncache[0] == node:
1949 if self._revisioncache[0] == node:
1954 return (rev, self._revisioncache[2], True)
1950 return (rev, self._revisioncache[2], True)
1955 cachedrev = self._revisioncache[1]
1951 cachedrev = self._revisioncache[1]
1956
1952
1957 if rev is None:
1953 if rev is None:
1958 rev = self.rev(node)
1954 rev = self.rev(node)
1959
1955
1960 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1956 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1961 if stopped:
1957 if stopped:
1962 basetext = self._revisioncache[2]
1958 basetext = self._revisioncache[2]
1963
1959
1964 # drop cache to save memory, the caller is expected to
1960 # drop cache to save memory, the caller is expected to
1965 # update self._revisioncache after validating the text
1961 # update self._revisioncache after validating the text
1966 self._revisioncache = None
1962 self._revisioncache = None
1967
1963
1968 targetsize = None
1964 targetsize = None
1969 rawsize = self.index[rev][2]
1965 rawsize = self.index[rev][2]
1970 if 0 <= rawsize:
1966 if 0 <= rawsize:
1971 targetsize = 4 * rawsize
1967 targetsize = 4 * rawsize
1972
1968
1973 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1969 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1974 if basetext is None:
1970 if basetext is None:
1975 basetext = bytes(bins[0])
1971 basetext = bytes(bins[0])
1976 bins = bins[1:]
1972 bins = bins[1:]
1977
1973
1978 rawtext = mdiff.patches(basetext, bins)
1974 rawtext = mdiff.patches(basetext, bins)
1979 del basetext # let us have a chance to free memory early
1975 del basetext # let us have a chance to free memory early
1980 return (rev, rawtext, False)
1976 return (rev, rawtext, False)
1981
1977
1982 def _sidedata(self, rev):
1978 def _sidedata(self, rev):
1983 """Return the sidedata for a given revision number."""
1979 """Return the sidedata for a given revision number."""
1984 index_entry = self.index[rev]
1980 index_entry = self.index[rev]
1985 sidedata_offset = index_entry[8]
1981 sidedata_offset = index_entry[8]
1986 sidedata_size = index_entry[9]
1982 sidedata_size = index_entry[9]
1987
1983
1988 if self._inline:
1984 if self._inline:
1989 sidedata_offset += self._io.size * (1 + rev)
1985 sidedata_offset += self._io.size * (1 + rev)
1990 if sidedata_size == 0:
1986 if sidedata_size == 0:
1991 return {}
1987 return {}
1992
1988
1993 segment = self._getsegment(sidedata_offset, sidedata_size)
1989 segment = self._getsegment(sidedata_offset, sidedata_size)
1994 sidedata = sidedatautil.deserialize_sidedata(segment)
1990 sidedata = sidedatautil.deserialize_sidedata(segment)
1995 return sidedata
1991 return sidedata
1996
1992
1997 def rawdata(self, nodeorrev, _df=None):
1993 def rawdata(self, nodeorrev, _df=None):
1998 """return an uncompressed raw data of a given node or revision number.
1994 """return an uncompressed raw data of a given node or revision number.
1999
1995
2000 _df - an existing file handle to read from. (internal-only)
1996 _df - an existing file handle to read from. (internal-only)
2001 """
1997 """
2002 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1998 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2003
1999
2004 def hash(self, text, p1, p2):
2000 def hash(self, text, p1, p2):
2005 """Compute a node hash.
2001 """Compute a node hash.
2006
2002
2007 Available as a function so that subclasses can replace the hash
2003 Available as a function so that subclasses can replace the hash
2008 as needed.
2004 as needed.
2009 """
2005 """
2010 return storageutil.hashrevisionsha1(text, p1, p2)
2006 return storageutil.hashrevisionsha1(text, p1, p2)
2011
2007
2012 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2008 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2013 """Check node hash integrity.
2009 """Check node hash integrity.
2014
2010
2015 Available as a function so that subclasses can extend hash mismatch
2011 Available as a function so that subclasses can extend hash mismatch
2016 behaviors as needed.
2012 behaviors as needed.
2017 """
2013 """
2018 try:
2014 try:
2019 if p1 is None and p2 is None:
2015 if p1 is None and p2 is None:
2020 p1, p2 = self.parents(node)
2016 p1, p2 = self.parents(node)
2021 if node != self.hash(text, p1, p2):
2017 if node != self.hash(text, p1, p2):
2022 # Clear the revision cache on hash failure. The revision cache
2018 # Clear the revision cache on hash failure. The revision cache
2023 # only stores the raw revision and clearing the cache does have
2019 # only stores the raw revision and clearing the cache does have
2024 # the side-effect that we won't have a cache hit when the raw
2020 # the side-effect that we won't have a cache hit when the raw
2025 # revision data is accessed. But this case should be rare and
2021 # revision data is accessed. But this case should be rare and
2026 # it is extra work to teach the cache about the hash
2022 # it is extra work to teach the cache about the hash
2027 # verification state.
2023 # verification state.
2028 if self._revisioncache and self._revisioncache[0] == node:
2024 if self._revisioncache and self._revisioncache[0] == node:
2029 self._revisioncache = None
2025 self._revisioncache = None
2030
2026
2031 revornode = rev
2027 revornode = rev
2032 if revornode is None:
2028 if revornode is None:
2033 revornode = templatefilters.short(hex(node))
2029 revornode = templatefilters.short(hex(node))
2034 raise error.RevlogError(
2030 raise error.RevlogError(
2035 _(b"integrity check failed on %s:%s")
2031 _(b"integrity check failed on %s:%s")
2036 % (self.indexfile, pycompat.bytestr(revornode))
2032 % (self.indexfile, pycompat.bytestr(revornode))
2037 )
2033 )
2038 except error.RevlogError:
2034 except error.RevlogError:
2039 if self._censorable and storageutil.iscensoredtext(text):
2035 if self._censorable and storageutil.iscensoredtext(text):
2040 raise error.CensoredNodeError(self.indexfile, node, text)
2036 raise error.CensoredNodeError(self.indexfile, node, text)
2041 raise
2037 raise
2042
2038
2043 def _enforceinlinesize(self, tr, fp=None):
2039 def _enforceinlinesize(self, tr, fp=None):
2044 """Check if the revlog is too big for inline and convert if so.
2040 """Check if the revlog is too big for inline and convert if so.
2045
2041
2046 This should be called after revisions are added to the revlog. If the
2042 This should be called after revisions are added to the revlog. If the
2047 revlog has grown too large to be an inline revlog, it will convert it
2043 revlog has grown too large to be an inline revlog, it will convert it
2048 to use multiple index and data files.
2044 to use multiple index and data files.
2049 """
2045 """
2050 tiprev = len(self) - 1
2046 tiprev = len(self) - 1
2051 if (
2047 if (
2052 not self._inline
2048 not self._inline
2053 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2049 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2054 ):
2050 ):
2055 return
2051 return
2056
2052
2057 troffset = tr.findoffset(self.indexfile)
2053 troffset = tr.findoffset(self.indexfile)
2058 if troffset is None:
2054 if troffset is None:
2059 raise error.RevlogError(
2055 raise error.RevlogError(
2060 _(b"%s not found in the transaction") % self.indexfile
2056 _(b"%s not found in the transaction") % self.indexfile
2061 )
2057 )
2062 trindex = 0
2058 trindex = 0
2063 tr.add(self.datafile, 0)
2059 tr.add(self.datafile, 0)
2064
2060
2065 if fp:
2061 if fp:
2066 fp.flush()
2062 fp.flush()
2067 fp.close()
2063 fp.close()
2068 # We can't use the cached file handle after close(). So prevent
2064 # We can't use the cached file handle after close(). So prevent
2069 # its usage.
2065 # its usage.
2070 self._writinghandles = None
2066 self._writinghandles = None
2071
2067
2072 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2068 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2073 for r in self:
2069 for r in self:
2074 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2070 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2075 if troffset <= self.start(r):
2071 if troffset <= self.start(r):
2076 trindex = r
2072 trindex = r
2077
2073
2078 with self._indexfp(b'w') as fp:
2074 with self._indexfp(b'w') as fp:
2079 self.version &= ~FLAG_INLINE_DATA
2075 self.version &= ~FLAG_INLINE_DATA
2080 self._inline = False
2076 self._inline = False
2081 io = self._io
2077 io = self._io
2082 for i in self:
2078 for i in self:
2083 e = io.packentry(self.index[i], self.node, self.version, i)
2079 e = io.packentry(self.index[i], self.node, self.version, i)
2084 fp.write(e)
2080 fp.write(e)
2085
2081
2086 # the temp file replace the real index when we exit the context
2082 # the temp file replace the real index when we exit the context
2087 # manager
2083 # manager
2088
2084
2089 tr.replace(self.indexfile, trindex * self._io.size)
2085 tr.replace(self.indexfile, trindex * self._io.size)
2090 nodemaputil.setup_persistent_nodemap(tr, self)
2086 nodemaputil.setup_persistent_nodemap(tr, self)
2091 self._chunkclear()
2087 self._chunkclear()
2092
2088
2093 def _nodeduplicatecallback(self, transaction, node):
2089 def _nodeduplicatecallback(self, transaction, node):
2094 """called when trying to add a node already stored."""
2090 """called when trying to add a node already stored."""
2095
2091
2096 def addrevision(
2092 def addrevision(
2097 self,
2093 self,
2098 text,
2094 text,
2099 transaction,
2095 transaction,
2100 link,
2096 link,
2101 p1,
2097 p1,
2102 p2,
2098 p2,
2103 cachedelta=None,
2099 cachedelta=None,
2104 node=None,
2100 node=None,
2105 flags=REVIDX_DEFAULT_FLAGS,
2101 flags=REVIDX_DEFAULT_FLAGS,
2106 deltacomputer=None,
2102 deltacomputer=None,
2107 sidedata=None,
2103 sidedata=None,
2108 ):
2104 ):
2109 """add a revision to the log
2105 """add a revision to the log
2110
2106
2111 text - the revision data to add
2107 text - the revision data to add
2112 transaction - the transaction object used for rollback
2108 transaction - the transaction object used for rollback
2113 link - the linkrev data to add
2109 link - the linkrev data to add
2114 p1, p2 - the parent nodeids of the revision
2110 p1, p2 - the parent nodeids of the revision
2115 cachedelta - an optional precomputed delta
2111 cachedelta - an optional precomputed delta
2116 node - nodeid of revision; typically node is not specified, and it is
2112 node - nodeid of revision; typically node is not specified, and it is
2117 computed by default as hash(text, p1, p2), however subclasses might
2113 computed by default as hash(text, p1, p2), however subclasses might
2118 use different hashing method (and override checkhash() in such case)
2114 use different hashing method (and override checkhash() in such case)
2119 flags - the known flags to set on the revision
2115 flags - the known flags to set on the revision
2120 deltacomputer - an optional deltacomputer instance shared between
2116 deltacomputer - an optional deltacomputer instance shared between
2121 multiple calls
2117 multiple calls
2122 """
2118 """
2123 if link == nullrev:
2119 if link == nullrev:
2124 raise error.RevlogError(
2120 raise error.RevlogError(
2125 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2121 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2126 )
2122 )
2127
2123
2128 if sidedata is None:
2124 if sidedata is None:
2129 sidedata = {}
2125 sidedata = {}
2130 elif not self.hassidedata:
2126 elif not self.hassidedata:
2131 raise error.ProgrammingError(
2127 raise error.ProgrammingError(
2132 _(b"trying to add sidedata to a revlog who don't support them")
2128 _(b"trying to add sidedata to a revlog who don't support them")
2133 )
2129 )
2134
2130
2135 if flags:
2131 if flags:
2136 node = node or self.hash(text, p1, p2)
2132 node = node or self.hash(text, p1, p2)
2137
2133
2138 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2134 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2139
2135
2140 # If the flag processor modifies the revision data, ignore any provided
2136 # If the flag processor modifies the revision data, ignore any provided
2141 # cachedelta.
2137 # cachedelta.
2142 if rawtext != text:
2138 if rawtext != text:
2143 cachedelta = None
2139 cachedelta = None
2144
2140
2145 if len(rawtext) > _maxentrysize:
2141 if len(rawtext) > _maxentrysize:
2146 raise error.RevlogError(
2142 raise error.RevlogError(
2147 _(
2143 _(
2148 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2144 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2149 )
2145 )
2150 % (self.indexfile, len(rawtext))
2146 % (self.indexfile, len(rawtext))
2151 )
2147 )
2152
2148
2153 node = node or self.hash(rawtext, p1, p2)
2149 node = node or self.hash(rawtext, p1, p2)
2154 rev = self.index.get_rev(node)
2150 rev = self.index.get_rev(node)
2155 if rev is not None:
2151 if rev is not None:
2156 return rev
2152 return rev
2157
2153
2158 if validatehash:
2154 if validatehash:
2159 self.checkhash(rawtext, node, p1=p1, p2=p2)
2155 self.checkhash(rawtext, node, p1=p1, p2=p2)
2160
2156
2161 return self.addrawrevision(
2157 return self.addrawrevision(
2162 rawtext,
2158 rawtext,
2163 transaction,
2159 transaction,
2164 link,
2160 link,
2165 p1,
2161 p1,
2166 p2,
2162 p2,
2167 node,
2163 node,
2168 flags,
2164 flags,
2169 cachedelta=cachedelta,
2165 cachedelta=cachedelta,
2170 deltacomputer=deltacomputer,
2166 deltacomputer=deltacomputer,
2171 sidedata=sidedata,
2167 sidedata=sidedata,
2172 )
2168 )
2173
2169
2174 def addrawrevision(
2170 def addrawrevision(
2175 self,
2171 self,
2176 rawtext,
2172 rawtext,
2177 transaction,
2173 transaction,
2178 link,
2174 link,
2179 p1,
2175 p1,
2180 p2,
2176 p2,
2181 node,
2177 node,
2182 flags,
2178 flags,
2183 cachedelta=None,
2179 cachedelta=None,
2184 deltacomputer=None,
2180 deltacomputer=None,
2185 sidedata=None,
2181 sidedata=None,
2186 ):
2182 ):
2187 """add a raw revision with known flags, node and parents
2183 """add a raw revision with known flags, node and parents
2188 useful when reusing a revision not stored in this revlog (ex: received
2184 useful when reusing a revision not stored in this revlog (ex: received
2189 over wire, or read from an external bundle).
2185 over wire, or read from an external bundle).
2190 """
2186 """
2191 dfh = None
2187 dfh = None
2192 if not self._inline:
2188 if not self._inline:
2193 dfh = self._datafp(b"a+")
2189 dfh = self._datafp(b"a+")
2194 ifh = self._indexfp(b"a+")
2190 ifh = self._indexfp(b"a+")
2195 try:
2191 try:
2196 return self._addrevision(
2192 return self._addrevision(
2197 node,
2193 node,
2198 rawtext,
2194 rawtext,
2199 transaction,
2195 transaction,
2200 link,
2196 link,
2201 p1,
2197 p1,
2202 p2,
2198 p2,
2203 flags,
2199 flags,
2204 cachedelta,
2200 cachedelta,
2205 ifh,
2201 ifh,
2206 dfh,
2202 dfh,
2207 deltacomputer=deltacomputer,
2203 deltacomputer=deltacomputer,
2208 sidedata=sidedata,
2204 sidedata=sidedata,
2209 )
2205 )
2210 finally:
2206 finally:
2211 if dfh:
2207 if dfh:
2212 dfh.close()
2208 dfh.close()
2213 ifh.close()
2209 ifh.close()
2214
2210
2215 def compress(self, data):
2211 def compress(self, data):
2216 """Generate a possibly-compressed representation of data."""
2212 """Generate a possibly-compressed representation of data."""
2217 if not data:
2213 if not data:
2218 return b'', data
2214 return b'', data
2219
2215
2220 compressed = self._compressor.compress(data)
2216 compressed = self._compressor.compress(data)
2221
2217
2222 if compressed:
2218 if compressed:
2223 # The revlog compressor added the header in the returned data.
2219 # The revlog compressor added the header in the returned data.
2224 return b'', compressed
2220 return b'', compressed
2225
2221
2226 if data[0:1] == b'\0':
2222 if data[0:1] == b'\0':
2227 return b'', data
2223 return b'', data
2228 return b'u', data
2224 return b'u', data
2229
2225
2230 def decompress(self, data):
2226 def decompress(self, data):
2231 """Decompress a revlog chunk.
2227 """Decompress a revlog chunk.
2232
2228
2233 The chunk is expected to begin with a header identifying the
2229 The chunk is expected to begin with a header identifying the
2234 format type so it can be routed to an appropriate decompressor.
2230 format type so it can be routed to an appropriate decompressor.
2235 """
2231 """
2236 if not data:
2232 if not data:
2237 return data
2233 return data
2238
2234
2239 # Revlogs are read much more frequently than they are written and many
2235 # Revlogs are read much more frequently than they are written and many
2240 # chunks only take microseconds to decompress, so performance is
2236 # chunks only take microseconds to decompress, so performance is
2241 # important here.
2237 # important here.
2242 #
2238 #
2243 # We can make a few assumptions about revlogs:
2239 # We can make a few assumptions about revlogs:
2244 #
2240 #
2245 # 1) the majority of chunks will be compressed (as opposed to inline
2241 # 1) the majority of chunks will be compressed (as opposed to inline
2246 # raw data).
2242 # raw data).
2247 # 2) decompressing *any* data will likely by at least 10x slower than
2243 # 2) decompressing *any* data will likely by at least 10x slower than
2248 # returning raw inline data.
2244 # returning raw inline data.
2249 # 3) we want to prioritize common and officially supported compression
2245 # 3) we want to prioritize common and officially supported compression
2250 # engines
2246 # engines
2251 #
2247 #
2252 # It follows that we want to optimize for "decompress compressed data
2248 # It follows that we want to optimize for "decompress compressed data
2253 # when encoded with common and officially supported compression engines"
2249 # when encoded with common and officially supported compression engines"
2254 # case over "raw data" and "data encoded by less common or non-official
2250 # case over "raw data" and "data encoded by less common or non-official
2255 # compression engines." That is why we have the inline lookup first
2251 # compression engines." That is why we have the inline lookup first
2256 # followed by the compengines lookup.
2252 # followed by the compengines lookup.
2257 #
2253 #
2258 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2254 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2259 # compressed chunks. And this matters for changelog and manifest reads.
2255 # compressed chunks. And this matters for changelog and manifest reads.
2260 t = data[0:1]
2256 t = data[0:1]
2261
2257
2262 if t == b'x':
2258 if t == b'x':
2263 try:
2259 try:
2264 return _zlibdecompress(data)
2260 return _zlibdecompress(data)
2265 except zlib.error as e:
2261 except zlib.error as e:
2266 raise error.RevlogError(
2262 raise error.RevlogError(
2267 _(b'revlog decompress error: %s')
2263 _(b'revlog decompress error: %s')
2268 % stringutil.forcebytestr(e)
2264 % stringutil.forcebytestr(e)
2269 )
2265 )
2270 # '\0' is more common than 'u' so it goes first.
2266 # '\0' is more common than 'u' so it goes first.
2271 elif t == b'\0':
2267 elif t == b'\0':
2272 return data
2268 return data
2273 elif t == b'u':
2269 elif t == b'u':
2274 return util.buffer(data, 1)
2270 return util.buffer(data, 1)
2275
2271
2276 try:
2272 try:
2277 compressor = self._decompressors[t]
2273 compressor = self._decompressors[t]
2278 except KeyError:
2274 except KeyError:
2279 try:
2275 try:
2280 engine = util.compengines.forrevlogheader(t)
2276 engine = util.compengines.forrevlogheader(t)
2281 compressor = engine.revlogcompressor(self._compengineopts)
2277 compressor = engine.revlogcompressor(self._compengineopts)
2282 self._decompressors[t] = compressor
2278 self._decompressors[t] = compressor
2283 except KeyError:
2279 except KeyError:
2284 raise error.RevlogError(
2280 raise error.RevlogError(
2285 _(b'unknown compression type %s') % binascii.hexlify(t)
2281 _(b'unknown compression type %s') % binascii.hexlify(t)
2286 )
2282 )
2287
2283
2288 return compressor.decompress(data)
2284 return compressor.decompress(data)
2289
2285
2290 def _addrevision(
2286 def _addrevision(
2291 self,
2287 self,
2292 node,
2288 node,
2293 rawtext,
2289 rawtext,
2294 transaction,
2290 transaction,
2295 link,
2291 link,
2296 p1,
2292 p1,
2297 p2,
2293 p2,
2298 flags,
2294 flags,
2299 cachedelta,
2295 cachedelta,
2300 ifh,
2296 ifh,
2301 dfh,
2297 dfh,
2302 alwayscache=False,
2298 alwayscache=False,
2303 deltacomputer=None,
2299 deltacomputer=None,
2304 sidedata=None,
2300 sidedata=None,
2305 ):
2301 ):
2306 """internal function to add revisions to the log
2302 """internal function to add revisions to the log
2307
2303
2308 see addrevision for argument descriptions.
2304 see addrevision for argument descriptions.
2309
2305
2310 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2306 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2311
2307
2312 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2308 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2313 be used.
2309 be used.
2314
2310
2315 invariants:
2311 invariants:
2316 - rawtext is optional (can be None); if not set, cachedelta must be set.
2312 - rawtext is optional (can be None); if not set, cachedelta must be set.
2317 if both are set, they must correspond to each other.
2313 if both are set, they must correspond to each other.
2318 """
2314 """
2319 if node == nullid:
2315 if node == nullid:
2320 raise error.RevlogError(
2316 raise error.RevlogError(
2321 _(b"%s: attempt to add null revision") % self.indexfile
2317 _(b"%s: attempt to add null revision") % self.indexfile
2322 )
2318 )
2323 if node == wdirid or node in wdirfilenodeids:
2319 if node == wdirid or node in wdirfilenodeids:
2324 raise error.RevlogError(
2320 raise error.RevlogError(
2325 _(b"%s: attempt to add wdir revision") % self.indexfile
2321 _(b"%s: attempt to add wdir revision") % self.indexfile
2326 )
2322 )
2327
2323
2328 if self._inline:
2324 if self._inline:
2329 fh = ifh
2325 fh = ifh
2330 else:
2326 else:
2331 fh = dfh
2327 fh = dfh
2332
2328
2333 btext = [rawtext]
2329 btext = [rawtext]
2334
2330
2335 curr = len(self)
2331 curr = len(self)
2336 prev = curr - 1
2332 prev = curr - 1
2337
2333
2338 offset = self._get_data_offset(prev)
2334 offset = self._get_data_offset(prev)
2339
2335
2340 if self._concurrencychecker:
2336 if self._concurrencychecker:
2341 if self._inline:
2337 if self._inline:
2342 # offset is "as if" it were in the .d file, so we need to add on
2338 # offset is "as if" it were in the .d file, so we need to add on
2343 # the size of the entry metadata.
2339 # the size of the entry metadata.
2344 self._concurrencychecker(
2340 self._concurrencychecker(
2345 ifh, self.indexfile, offset + curr * self._io.size
2341 ifh, self.indexfile, offset + curr * self._io.size
2346 )
2342 )
2347 else:
2343 else:
2348 # Entries in the .i are a consistent size.
2344 # Entries in the .i are a consistent size.
2349 self._concurrencychecker(
2345 self._concurrencychecker(
2350 ifh, self.indexfile, curr * self._io.size
2346 ifh, self.indexfile, curr * self._io.size
2351 )
2347 )
2352 self._concurrencychecker(dfh, self.datafile, offset)
2348 self._concurrencychecker(dfh, self.datafile, offset)
2353
2349
2354 p1r, p2r = self.rev(p1), self.rev(p2)
2350 p1r, p2r = self.rev(p1), self.rev(p2)
2355
2351
2356 # full versions are inserted when the needed deltas
2352 # full versions are inserted when the needed deltas
2357 # become comparable to the uncompressed text
2353 # become comparable to the uncompressed text
2358 if rawtext is None:
2354 if rawtext is None:
2359 # need rawtext size, before changed by flag processors, which is
2355 # need rawtext size, before changed by flag processors, which is
2360 # the non-raw size. use revlog explicitly to avoid filelog's extra
2356 # the non-raw size. use revlog explicitly to avoid filelog's extra
2361 # logic that might remove metadata size.
2357 # logic that might remove metadata size.
2362 textlen = mdiff.patchedsize(
2358 textlen = mdiff.patchedsize(
2363 revlog.size(self, cachedelta[0]), cachedelta[1]
2359 revlog.size(self, cachedelta[0]), cachedelta[1]
2364 )
2360 )
2365 else:
2361 else:
2366 textlen = len(rawtext)
2362 textlen = len(rawtext)
2367
2363
2368 if deltacomputer is None:
2364 if deltacomputer is None:
2369 deltacomputer = deltautil.deltacomputer(self)
2365 deltacomputer = deltautil.deltacomputer(self)
2370
2366
2371 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2367 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2372
2368
2373 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2369 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2374
2370
2375 if sidedata:
2371 if sidedata:
2376 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2372 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2377 sidedata_offset = offset + deltainfo.deltalen
2373 sidedata_offset = offset + deltainfo.deltalen
2378 else:
2374 else:
2379 serialized_sidedata = b""
2375 serialized_sidedata = b""
2380 # Don't store the offset if the sidedata is empty, that way
2376 # Don't store the offset if the sidedata is empty, that way
2381 # we can easily detect empty sidedata and they will be no different
2377 # we can easily detect empty sidedata and they will be no different
2382 # than ones we manually add.
2378 # than ones we manually add.
2383 sidedata_offset = 0
2379 sidedata_offset = 0
2384
2380
2385 e = (
2381 e = (
2386 offset_type(offset, flags),
2382 offset_type(offset, flags),
2387 deltainfo.deltalen,
2383 deltainfo.deltalen,
2388 textlen,
2384 textlen,
2389 deltainfo.base,
2385 deltainfo.base,
2390 link,
2386 link,
2391 p1r,
2387 p1r,
2392 p2r,
2388 p2r,
2393 node,
2389 node,
2394 sidedata_offset,
2390 sidedata_offset,
2395 len(serialized_sidedata),
2391 len(serialized_sidedata),
2396 )
2392 )
2397
2393
2398 if self.version & 0xFFFF != REVLOGV2:
2394 if self.version & 0xFFFF != REVLOGV2:
2399 e = e[:8]
2395 e = e[:8]
2400
2396
2401 self.index.append(e)
2397 self.index.append(e)
2402 entry = self._io.packentry(e, self.node, self.version, curr)
2398 entry = self._io.packentry(e, self.node, self.version, curr)
2403 self._writeentry(
2399 self._writeentry(
2404 transaction,
2400 transaction,
2405 ifh,
2401 ifh,
2406 dfh,
2402 dfh,
2407 entry,
2403 entry,
2408 deltainfo.data,
2404 deltainfo.data,
2409 link,
2405 link,
2410 offset,
2406 offset,
2411 serialized_sidedata,
2407 serialized_sidedata,
2412 )
2408 )
2413
2409
2414 rawtext = btext[0]
2410 rawtext = btext[0]
2415
2411
2416 if alwayscache and rawtext is None:
2412 if alwayscache and rawtext is None:
2417 rawtext = deltacomputer.buildtext(revinfo, fh)
2413 rawtext = deltacomputer.buildtext(revinfo, fh)
2418
2414
2419 if type(rawtext) == bytes: # only accept immutable objects
2415 if type(rawtext) == bytes: # only accept immutable objects
2420 self._revisioncache = (node, curr, rawtext)
2416 self._revisioncache = (node, curr, rawtext)
2421 self._chainbasecache[curr] = deltainfo.chainbase
2417 self._chainbasecache[curr] = deltainfo.chainbase
2422 return curr
2418 return curr
2423
2419
2424 def _get_data_offset(self, prev):
2420 def _get_data_offset(self, prev):
2425 """Returns the current offset in the (in-transaction) data file.
2421 """Returns the current offset in the (in-transaction) data file.
2426 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2422 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2427 file to store that information: since sidedata can be rewritten to the
2423 file to store that information: since sidedata can be rewritten to the
2428 end of the data file within a transaction, you can have cases where, for
2424 end of the data file within a transaction, you can have cases where, for
2429 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2425 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2430 to `n - 1`'s sidedata being written after `n`'s data.
2426 to `n - 1`'s sidedata being written after `n`'s data.
2431
2427
2432 TODO cache this in a docket file before getting out of experimental."""
2428 TODO cache this in a docket file before getting out of experimental."""
2433 if self.version & 0xFFFF != REVLOGV2:
2429 if self.version & 0xFFFF != REVLOGV2:
2434 return self.end(prev)
2430 return self.end(prev)
2435
2431
2436 offset = 0
2432 offset = 0
2437 for rev, entry in enumerate(self.index):
2433 for rev, entry in enumerate(self.index):
2438 sidedata_end = entry[8] + entry[9]
2434 sidedata_end = entry[8] + entry[9]
2439 # Sidedata for a previous rev has potentially been written after
2435 # Sidedata for a previous rev has potentially been written after
2440 # this rev's end, so take the max.
2436 # this rev's end, so take the max.
2441 offset = max(self.end(rev), offset, sidedata_end)
2437 offset = max(self.end(rev), offset, sidedata_end)
2442 return offset
2438 return offset
2443
2439
2444 def _writeentry(
2440 def _writeentry(
2445 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2441 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2446 ):
2442 ):
2447 # Files opened in a+ mode have inconsistent behavior on various
2443 # Files opened in a+ mode have inconsistent behavior on various
2448 # platforms. Windows requires that a file positioning call be made
2444 # platforms. Windows requires that a file positioning call be made
2449 # when the file handle transitions between reads and writes. See
2445 # when the file handle transitions between reads and writes. See
2450 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2446 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2451 # platforms, Python or the platform itself can be buggy. Some versions
2447 # platforms, Python or the platform itself can be buggy. Some versions
2452 # of Solaris have been observed to not append at the end of the file
2448 # of Solaris have been observed to not append at the end of the file
2453 # if the file was seeked to before the end. See issue4943 for more.
2449 # if the file was seeked to before the end. See issue4943 for more.
2454 #
2450 #
2455 # We work around this issue by inserting a seek() before writing.
2451 # We work around this issue by inserting a seek() before writing.
2456 # Note: This is likely not necessary on Python 3. However, because
2452 # Note: This is likely not necessary on Python 3. However, because
2457 # the file handle is reused for reads and may be seeked there, we need
2453 # the file handle is reused for reads and may be seeked there, we need
2458 # to be careful before changing this.
2454 # to be careful before changing this.
2459 ifh.seek(0, os.SEEK_END)
2455 ifh.seek(0, os.SEEK_END)
2460 if dfh:
2456 if dfh:
2461 dfh.seek(0, os.SEEK_END)
2457 dfh.seek(0, os.SEEK_END)
2462
2458
2463 curr = len(self) - 1
2459 curr = len(self) - 1
2464 if not self._inline:
2460 if not self._inline:
2465 transaction.add(self.datafile, offset)
2461 transaction.add(self.datafile, offset)
2466 transaction.add(self.indexfile, curr * len(entry))
2462 transaction.add(self.indexfile, curr * len(entry))
2467 if data[0]:
2463 if data[0]:
2468 dfh.write(data[0])
2464 dfh.write(data[0])
2469 dfh.write(data[1])
2465 dfh.write(data[1])
2470 if sidedata:
2466 if sidedata:
2471 dfh.write(sidedata)
2467 dfh.write(sidedata)
2472 ifh.write(entry)
2468 ifh.write(entry)
2473 else:
2469 else:
2474 offset += curr * self._io.size
2470 offset += curr * self._io.size
2475 transaction.add(self.indexfile, offset)
2471 transaction.add(self.indexfile, offset)
2476 ifh.write(entry)
2472 ifh.write(entry)
2477 ifh.write(data[0])
2473 ifh.write(data[0])
2478 ifh.write(data[1])
2474 ifh.write(data[1])
2479 if sidedata:
2475 if sidedata:
2480 ifh.write(sidedata)
2476 ifh.write(sidedata)
2481 self._enforceinlinesize(transaction, ifh)
2477 self._enforceinlinesize(transaction, ifh)
2482 nodemaputil.setup_persistent_nodemap(transaction, self)
2478 nodemaputil.setup_persistent_nodemap(transaction, self)
2483
2479
2484 def addgroup(
2480 def addgroup(
2485 self,
2481 self,
2486 deltas,
2482 deltas,
2487 linkmapper,
2483 linkmapper,
2488 transaction,
2484 transaction,
2489 alwayscache=False,
2485 alwayscache=False,
2490 addrevisioncb=None,
2486 addrevisioncb=None,
2491 duplicaterevisioncb=None,
2487 duplicaterevisioncb=None,
2492 ):
2488 ):
2493 """
2489 """
2494 add a delta group
2490 add a delta group
2495
2491
2496 given a set of deltas, add them to the revision log. the
2492 given a set of deltas, add them to the revision log. the
2497 first delta is against its parent, which should be in our
2493 first delta is against its parent, which should be in our
2498 log, the rest are against the previous delta.
2494 log, the rest are against the previous delta.
2499
2495
2500 If ``addrevisioncb`` is defined, it will be called with arguments of
2496 If ``addrevisioncb`` is defined, it will be called with arguments of
2501 this revlog and the node that was added.
2497 this revlog and the node that was added.
2502 """
2498 """
2503
2499
2504 if self._writinghandles:
2500 if self._writinghandles:
2505 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2501 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2506
2502
2507 r = len(self)
2503 r = len(self)
2508 end = 0
2504 end = 0
2509 if r:
2505 if r:
2510 end = self.end(r - 1)
2506 end = self.end(r - 1)
2511 ifh = self._indexfp(b"a+")
2507 ifh = self._indexfp(b"a+")
2512 isize = r * self._io.size
2508 isize = r * self._io.size
2513 if self._inline:
2509 if self._inline:
2514 transaction.add(self.indexfile, end + isize)
2510 transaction.add(self.indexfile, end + isize)
2515 dfh = None
2511 dfh = None
2516 else:
2512 else:
2517 transaction.add(self.indexfile, isize)
2513 transaction.add(self.indexfile, isize)
2518 transaction.add(self.datafile, end)
2514 transaction.add(self.datafile, end)
2519 dfh = self._datafp(b"a+")
2515 dfh = self._datafp(b"a+")
2520
2516
2521 def flush():
2517 def flush():
2522 if dfh:
2518 if dfh:
2523 dfh.flush()
2519 dfh.flush()
2524 ifh.flush()
2520 ifh.flush()
2525
2521
2526 self._writinghandles = (ifh, dfh)
2522 self._writinghandles = (ifh, dfh)
2527 empty = True
2523 empty = True
2528
2524
2529 try:
2525 try:
2530 deltacomputer = deltautil.deltacomputer(self)
2526 deltacomputer = deltautil.deltacomputer(self)
2531 # loop through our set of deltas
2527 # loop through our set of deltas
2532 for data in deltas:
2528 for data in deltas:
2533 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2529 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2534 link = linkmapper(linknode)
2530 link = linkmapper(linknode)
2535 flags = flags or REVIDX_DEFAULT_FLAGS
2531 flags = flags or REVIDX_DEFAULT_FLAGS
2536
2532
2537 rev = self.index.get_rev(node)
2533 rev = self.index.get_rev(node)
2538 if rev is not None:
2534 if rev is not None:
2539 # this can happen if two branches make the same change
2535 # this can happen if two branches make the same change
2540 self._nodeduplicatecallback(transaction, rev)
2536 self._nodeduplicatecallback(transaction, rev)
2541 if duplicaterevisioncb:
2537 if duplicaterevisioncb:
2542 duplicaterevisioncb(self, rev)
2538 duplicaterevisioncb(self, rev)
2543 empty = False
2539 empty = False
2544 continue
2540 continue
2545
2541
2546 for p in (p1, p2):
2542 for p in (p1, p2):
2547 if not self.index.has_node(p):
2543 if not self.index.has_node(p):
2548 raise error.LookupError(
2544 raise error.LookupError(
2549 p, self.indexfile, _(b'unknown parent')
2545 p, self.indexfile, _(b'unknown parent')
2550 )
2546 )
2551
2547
2552 if not self.index.has_node(deltabase):
2548 if not self.index.has_node(deltabase):
2553 raise error.LookupError(
2549 raise error.LookupError(
2554 deltabase, self.indexfile, _(b'unknown delta base')
2550 deltabase, self.indexfile, _(b'unknown delta base')
2555 )
2551 )
2556
2552
2557 baserev = self.rev(deltabase)
2553 baserev = self.rev(deltabase)
2558
2554
2559 if baserev != nullrev and self.iscensored(baserev):
2555 if baserev != nullrev and self.iscensored(baserev):
2560 # if base is censored, delta must be full replacement in a
2556 # if base is censored, delta must be full replacement in a
2561 # single patch operation
2557 # single patch operation
2562 hlen = struct.calcsize(b">lll")
2558 hlen = struct.calcsize(b">lll")
2563 oldlen = self.rawsize(baserev)
2559 oldlen = self.rawsize(baserev)
2564 newlen = len(delta) - hlen
2560 newlen = len(delta) - hlen
2565 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2561 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2566 raise error.CensoredBaseError(
2562 raise error.CensoredBaseError(
2567 self.indexfile, self.node(baserev)
2563 self.indexfile, self.node(baserev)
2568 )
2564 )
2569
2565
2570 if not flags and self._peek_iscensored(baserev, delta, flush):
2566 if not flags and self._peek_iscensored(baserev, delta, flush):
2571 flags |= REVIDX_ISCENSORED
2567 flags |= REVIDX_ISCENSORED
2572
2568
2573 # We assume consumers of addrevisioncb will want to retrieve
2569 # We assume consumers of addrevisioncb will want to retrieve
2574 # the added revision, which will require a call to
2570 # the added revision, which will require a call to
2575 # revision(). revision() will fast path if there is a cache
2571 # revision(). revision() will fast path if there is a cache
2576 # hit. So, we tell _addrevision() to always cache in this case.
2572 # hit. So, we tell _addrevision() to always cache in this case.
2577 # We're only using addgroup() in the context of changegroup
2573 # We're only using addgroup() in the context of changegroup
2578 # generation so the revision data can always be handled as raw
2574 # generation so the revision data can always be handled as raw
2579 # by the flagprocessor.
2575 # by the flagprocessor.
2580 rev = self._addrevision(
2576 rev = self._addrevision(
2581 node,
2577 node,
2582 None,
2578 None,
2583 transaction,
2579 transaction,
2584 link,
2580 link,
2585 p1,
2581 p1,
2586 p2,
2582 p2,
2587 flags,
2583 flags,
2588 (baserev, delta),
2584 (baserev, delta),
2589 ifh,
2585 ifh,
2590 dfh,
2586 dfh,
2591 alwayscache=alwayscache,
2587 alwayscache=alwayscache,
2592 deltacomputer=deltacomputer,
2588 deltacomputer=deltacomputer,
2593 sidedata=sidedata,
2589 sidedata=sidedata,
2594 )
2590 )
2595
2591
2596 if addrevisioncb:
2592 if addrevisioncb:
2597 addrevisioncb(self, rev)
2593 addrevisioncb(self, rev)
2598 empty = False
2594 empty = False
2599
2595
2600 if not dfh and not self._inline:
2596 if not dfh and not self._inline:
2601 # addrevision switched from inline to conventional
2597 # addrevision switched from inline to conventional
2602 # reopen the index
2598 # reopen the index
2603 ifh.close()
2599 ifh.close()
2604 dfh = self._datafp(b"a+")
2600 dfh = self._datafp(b"a+")
2605 ifh = self._indexfp(b"a+")
2601 ifh = self._indexfp(b"a+")
2606 self._writinghandles = (ifh, dfh)
2602 self._writinghandles = (ifh, dfh)
2607 finally:
2603 finally:
2608 self._writinghandles = None
2604 self._writinghandles = None
2609
2605
2610 if dfh:
2606 if dfh:
2611 dfh.close()
2607 dfh.close()
2612 ifh.close()
2608 ifh.close()
2613 return not empty
2609 return not empty
2614
2610
2615 def iscensored(self, rev):
2611 def iscensored(self, rev):
2616 """Check if a file revision is censored."""
2612 """Check if a file revision is censored."""
2617 if not self._censorable:
2613 if not self._censorable:
2618 return False
2614 return False
2619
2615
2620 return self.flags(rev) & REVIDX_ISCENSORED
2616 return self.flags(rev) & REVIDX_ISCENSORED
2621
2617
2622 def _peek_iscensored(self, baserev, delta, flush):
2618 def _peek_iscensored(self, baserev, delta, flush):
2623 """Quickly check if a delta produces a censored revision."""
2619 """Quickly check if a delta produces a censored revision."""
2624 if not self._censorable:
2620 if not self._censorable:
2625 return False
2621 return False
2626
2622
2627 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2623 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2628
2624
2629 def getstrippoint(self, minlink):
2625 def getstrippoint(self, minlink):
2630 """find the minimum rev that must be stripped to strip the linkrev
2626 """find the minimum rev that must be stripped to strip the linkrev
2631
2627
2632 Returns a tuple containing the minimum rev and a set of all revs that
2628 Returns a tuple containing the minimum rev and a set of all revs that
2633 have linkrevs that will be broken by this strip.
2629 have linkrevs that will be broken by this strip.
2634 """
2630 """
2635 return storageutil.resolvestripinfo(
2631 return storageutil.resolvestripinfo(
2636 minlink,
2632 minlink,
2637 len(self) - 1,
2633 len(self) - 1,
2638 self.headrevs(),
2634 self.headrevs(),
2639 self.linkrev,
2635 self.linkrev,
2640 self.parentrevs,
2636 self.parentrevs,
2641 )
2637 )
2642
2638
2643 def strip(self, minlink, transaction):
2639 def strip(self, minlink, transaction):
2644 """truncate the revlog on the first revision with a linkrev >= minlink
2640 """truncate the revlog on the first revision with a linkrev >= minlink
2645
2641
2646 This function is called when we're stripping revision minlink and
2642 This function is called when we're stripping revision minlink and
2647 its descendants from the repository.
2643 its descendants from the repository.
2648
2644
2649 We have to remove all revisions with linkrev >= minlink, because
2645 We have to remove all revisions with linkrev >= minlink, because
2650 the equivalent changelog revisions will be renumbered after the
2646 the equivalent changelog revisions will be renumbered after the
2651 strip.
2647 strip.
2652
2648
2653 So we truncate the revlog on the first of these revisions, and
2649 So we truncate the revlog on the first of these revisions, and
2654 trust that the caller has saved the revisions that shouldn't be
2650 trust that the caller has saved the revisions that shouldn't be
2655 removed and that it'll re-add them after this truncation.
2651 removed and that it'll re-add them after this truncation.
2656 """
2652 """
2657 if len(self) == 0:
2653 if len(self) == 0:
2658 return
2654 return
2659
2655
2660 rev, _ = self.getstrippoint(minlink)
2656 rev, _ = self.getstrippoint(minlink)
2661 if rev == len(self):
2657 if rev == len(self):
2662 return
2658 return
2663
2659
2664 # first truncate the files on disk
2660 # first truncate the files on disk
2665 end = self.start(rev)
2661 end = self.start(rev)
2666 if not self._inline:
2662 if not self._inline:
2667 transaction.add(self.datafile, end)
2663 transaction.add(self.datafile, end)
2668 end = rev * self._io.size
2664 end = rev * self._io.size
2669 else:
2665 else:
2670 end += rev * self._io.size
2666 end += rev * self._io.size
2671
2667
2672 transaction.add(self.indexfile, end)
2668 transaction.add(self.indexfile, end)
2673
2669
2674 # then reset internal state in memory to forget those revisions
2670 # then reset internal state in memory to forget those revisions
2675 self._revisioncache = None
2671 self._revisioncache = None
2676 self._chaininfocache = util.lrucachedict(500)
2672 self._chaininfocache = util.lrucachedict(500)
2677 self._chunkclear()
2673 self._chunkclear()
2678
2674
2679 del self.index[rev:-1]
2675 del self.index[rev:-1]
2680
2676
2681 def checksize(self):
2677 def checksize(self):
2682 """Check size of index and data files
2678 """Check size of index and data files
2683
2679
2684 return a (dd, di) tuple.
2680 return a (dd, di) tuple.
2685 - dd: extra bytes for the "data" file
2681 - dd: extra bytes for the "data" file
2686 - di: extra bytes for the "index" file
2682 - di: extra bytes for the "index" file
2687
2683
2688 A healthy revlog will return (0, 0).
2684 A healthy revlog will return (0, 0).
2689 """
2685 """
2690 expected = 0
2686 expected = 0
2691 if len(self):
2687 if len(self):
2692 expected = max(0, self.end(len(self) - 1))
2688 expected = max(0, self.end(len(self) - 1))
2693
2689
2694 try:
2690 try:
2695 with self._datafp() as f:
2691 with self._datafp() as f:
2696 f.seek(0, io.SEEK_END)
2692 f.seek(0, io.SEEK_END)
2697 actual = f.tell()
2693 actual = f.tell()
2698 dd = actual - expected
2694 dd = actual - expected
2699 except IOError as inst:
2695 except IOError as inst:
2700 if inst.errno != errno.ENOENT:
2696 if inst.errno != errno.ENOENT:
2701 raise
2697 raise
2702 dd = 0
2698 dd = 0
2703
2699
2704 try:
2700 try:
2705 f = self.opener(self.indexfile)
2701 f = self.opener(self.indexfile)
2706 f.seek(0, io.SEEK_END)
2702 f.seek(0, io.SEEK_END)
2707 actual = f.tell()
2703 actual = f.tell()
2708 f.close()
2704 f.close()
2709 s = self._io.size
2705 s = self._io.size
2710 i = max(0, actual // s)
2706 i = max(0, actual // s)
2711 di = actual - (i * s)
2707 di = actual - (i * s)
2712 if self._inline:
2708 if self._inline:
2713 databytes = 0
2709 databytes = 0
2714 for r in self:
2710 for r in self:
2715 databytes += max(0, self.length(r))
2711 databytes += max(0, self.length(r))
2716 dd = 0
2712 dd = 0
2717 di = actual - len(self) * s - databytes
2713 di = actual - len(self) * s - databytes
2718 except IOError as inst:
2714 except IOError as inst:
2719 if inst.errno != errno.ENOENT:
2715 if inst.errno != errno.ENOENT:
2720 raise
2716 raise
2721 di = 0
2717 di = 0
2722
2718
2723 return (dd, di)
2719 return (dd, di)
2724
2720
2725 def files(self):
2721 def files(self):
2726 res = [self.indexfile]
2722 res = [self.indexfile]
2727 if not self._inline:
2723 if not self._inline:
2728 res.append(self.datafile)
2724 res.append(self.datafile)
2729 return res
2725 return res
2730
2726
2731 def emitrevisions(
2727 def emitrevisions(
2732 self,
2728 self,
2733 nodes,
2729 nodes,
2734 nodesorder=None,
2730 nodesorder=None,
2735 revisiondata=False,
2731 revisiondata=False,
2736 assumehaveparentrevisions=False,
2732 assumehaveparentrevisions=False,
2737 deltamode=repository.CG_DELTAMODE_STD,
2733 deltamode=repository.CG_DELTAMODE_STD,
2738 sidedata_helpers=None,
2734 sidedata_helpers=None,
2739 ):
2735 ):
2740 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2736 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2741 raise error.ProgrammingError(
2737 raise error.ProgrammingError(
2742 b'unhandled value for nodesorder: %s' % nodesorder
2738 b'unhandled value for nodesorder: %s' % nodesorder
2743 )
2739 )
2744
2740
2745 if nodesorder is None and not self._generaldelta:
2741 if nodesorder is None and not self._generaldelta:
2746 nodesorder = b'storage'
2742 nodesorder = b'storage'
2747
2743
2748 if (
2744 if (
2749 not self._storedeltachains
2745 not self._storedeltachains
2750 and deltamode != repository.CG_DELTAMODE_PREV
2746 and deltamode != repository.CG_DELTAMODE_PREV
2751 ):
2747 ):
2752 deltamode = repository.CG_DELTAMODE_FULL
2748 deltamode = repository.CG_DELTAMODE_FULL
2753
2749
2754 return storageutil.emitrevisions(
2750 return storageutil.emitrevisions(
2755 self,
2751 self,
2756 nodes,
2752 nodes,
2757 nodesorder,
2753 nodesorder,
2758 revlogrevisiondelta,
2754 revlogrevisiondelta,
2759 deltaparentfn=self.deltaparent,
2755 deltaparentfn=self.deltaparent,
2760 candeltafn=self.candelta,
2756 candeltafn=self.candelta,
2761 rawsizefn=self.rawsize,
2757 rawsizefn=self.rawsize,
2762 revdifffn=self.revdiff,
2758 revdifffn=self.revdiff,
2763 flagsfn=self.flags,
2759 flagsfn=self.flags,
2764 deltamode=deltamode,
2760 deltamode=deltamode,
2765 revisiondata=revisiondata,
2761 revisiondata=revisiondata,
2766 assumehaveparentrevisions=assumehaveparentrevisions,
2762 assumehaveparentrevisions=assumehaveparentrevisions,
2767 sidedata_helpers=sidedata_helpers,
2763 sidedata_helpers=sidedata_helpers,
2768 )
2764 )
2769
2765
2770 DELTAREUSEALWAYS = b'always'
2766 DELTAREUSEALWAYS = b'always'
2771 DELTAREUSESAMEREVS = b'samerevs'
2767 DELTAREUSESAMEREVS = b'samerevs'
2772 DELTAREUSENEVER = b'never'
2768 DELTAREUSENEVER = b'never'
2773
2769
2774 DELTAREUSEFULLADD = b'fulladd'
2770 DELTAREUSEFULLADD = b'fulladd'
2775
2771
2776 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2772 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2777
2773
2778 def clone(
2774 def clone(
2779 self,
2775 self,
2780 tr,
2776 tr,
2781 destrevlog,
2777 destrevlog,
2782 addrevisioncb=None,
2778 addrevisioncb=None,
2783 deltareuse=DELTAREUSESAMEREVS,
2779 deltareuse=DELTAREUSESAMEREVS,
2784 forcedeltabothparents=None,
2780 forcedeltabothparents=None,
2785 sidedatacompanion=None,
2781 sidedatacompanion=None,
2786 ):
2782 ):
2787 """Copy this revlog to another, possibly with format changes.
2783 """Copy this revlog to another, possibly with format changes.
2788
2784
2789 The destination revlog will contain the same revisions and nodes.
2785 The destination revlog will contain the same revisions and nodes.
2790 However, it may not be bit-for-bit identical due to e.g. delta encoding
2786 However, it may not be bit-for-bit identical due to e.g. delta encoding
2791 differences.
2787 differences.
2792
2788
2793 The ``deltareuse`` argument control how deltas from the existing revlog
2789 The ``deltareuse`` argument control how deltas from the existing revlog
2794 are preserved in the destination revlog. The argument can have the
2790 are preserved in the destination revlog. The argument can have the
2795 following values:
2791 following values:
2796
2792
2797 DELTAREUSEALWAYS
2793 DELTAREUSEALWAYS
2798 Deltas will always be reused (if possible), even if the destination
2794 Deltas will always be reused (if possible), even if the destination
2799 revlog would not select the same revisions for the delta. This is the
2795 revlog would not select the same revisions for the delta. This is the
2800 fastest mode of operation.
2796 fastest mode of operation.
2801 DELTAREUSESAMEREVS
2797 DELTAREUSESAMEREVS
2802 Deltas will be reused if the destination revlog would pick the same
2798 Deltas will be reused if the destination revlog would pick the same
2803 revisions for the delta. This mode strikes a balance between speed
2799 revisions for the delta. This mode strikes a balance between speed
2804 and optimization.
2800 and optimization.
2805 DELTAREUSENEVER
2801 DELTAREUSENEVER
2806 Deltas will never be reused. This is the slowest mode of execution.
2802 Deltas will never be reused. This is the slowest mode of execution.
2807 This mode can be used to recompute deltas (e.g. if the diff/delta
2803 This mode can be used to recompute deltas (e.g. if the diff/delta
2808 algorithm changes).
2804 algorithm changes).
2809 DELTAREUSEFULLADD
2805 DELTAREUSEFULLADD
2810 Revision will be re-added as if their were new content. This is
2806 Revision will be re-added as if their were new content. This is
2811 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2807 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2812 eg: large file detection and handling.
2808 eg: large file detection and handling.
2813
2809
2814 Delta computation can be slow, so the choice of delta reuse policy can
2810 Delta computation can be slow, so the choice of delta reuse policy can
2815 significantly affect run time.
2811 significantly affect run time.
2816
2812
2817 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2813 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2818 two extremes. Deltas will be reused if they are appropriate. But if the
2814 two extremes. Deltas will be reused if they are appropriate. But if the
2819 delta could choose a better revision, it will do so. This means if you
2815 delta could choose a better revision, it will do so. This means if you
2820 are converting a non-generaldelta revlog to a generaldelta revlog,
2816 are converting a non-generaldelta revlog to a generaldelta revlog,
2821 deltas will be recomputed if the delta's parent isn't a parent of the
2817 deltas will be recomputed if the delta's parent isn't a parent of the
2822 revision.
2818 revision.
2823
2819
2824 In addition to the delta policy, the ``forcedeltabothparents``
2820 In addition to the delta policy, the ``forcedeltabothparents``
2825 argument controls whether to force compute deltas against both parents
2821 argument controls whether to force compute deltas against both parents
2826 for merges. By default, the current default is used.
2822 for merges. By default, the current default is used.
2827
2823
2828 If not None, the `sidedatacompanion` is callable that accept two
2824 If not None, the `sidedatacompanion` is callable that accept two
2829 arguments:
2825 arguments:
2830
2826
2831 (srcrevlog, rev)
2827 (srcrevlog, rev)
2832
2828
2833 and return a quintet that control changes to sidedata content from the
2829 and return a quintet that control changes to sidedata content from the
2834 old revision to the new clone result:
2830 old revision to the new clone result:
2835
2831
2836 (dropall, filterout, update, new_flags, dropped_flags)
2832 (dropall, filterout, update, new_flags, dropped_flags)
2837
2833
2838 * if `dropall` is True, all sidedata should be dropped
2834 * if `dropall` is True, all sidedata should be dropped
2839 * `filterout` is a set of sidedata keys that should be dropped
2835 * `filterout` is a set of sidedata keys that should be dropped
2840 * `update` is a mapping of additionnal/new key -> value
2836 * `update` is a mapping of additionnal/new key -> value
2841 * new_flags is a bitfields of new flags that the revision should get
2837 * new_flags is a bitfields of new flags that the revision should get
2842 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2838 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2843 """
2839 """
2844 if deltareuse not in self.DELTAREUSEALL:
2840 if deltareuse not in self.DELTAREUSEALL:
2845 raise ValueError(
2841 raise ValueError(
2846 _(b'value for deltareuse invalid: %s') % deltareuse
2842 _(b'value for deltareuse invalid: %s') % deltareuse
2847 )
2843 )
2848
2844
2849 if len(destrevlog):
2845 if len(destrevlog):
2850 raise ValueError(_(b'destination revlog is not empty'))
2846 raise ValueError(_(b'destination revlog is not empty'))
2851
2847
2852 if getattr(self, 'filteredrevs', None):
2848 if getattr(self, 'filteredrevs', None):
2853 raise ValueError(_(b'source revlog has filtered revisions'))
2849 raise ValueError(_(b'source revlog has filtered revisions'))
2854 if getattr(destrevlog, 'filteredrevs', None):
2850 if getattr(destrevlog, 'filteredrevs', None):
2855 raise ValueError(_(b'destination revlog has filtered revisions'))
2851 raise ValueError(_(b'destination revlog has filtered revisions'))
2856
2852
2857 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2853 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2858 # if possible.
2854 # if possible.
2859 oldlazydelta = destrevlog._lazydelta
2855 oldlazydelta = destrevlog._lazydelta
2860 oldlazydeltabase = destrevlog._lazydeltabase
2856 oldlazydeltabase = destrevlog._lazydeltabase
2861 oldamd = destrevlog._deltabothparents
2857 oldamd = destrevlog._deltabothparents
2862
2858
2863 try:
2859 try:
2864 if deltareuse == self.DELTAREUSEALWAYS:
2860 if deltareuse == self.DELTAREUSEALWAYS:
2865 destrevlog._lazydeltabase = True
2861 destrevlog._lazydeltabase = True
2866 destrevlog._lazydelta = True
2862 destrevlog._lazydelta = True
2867 elif deltareuse == self.DELTAREUSESAMEREVS:
2863 elif deltareuse == self.DELTAREUSESAMEREVS:
2868 destrevlog._lazydeltabase = False
2864 destrevlog._lazydeltabase = False
2869 destrevlog._lazydelta = True
2865 destrevlog._lazydelta = True
2870 elif deltareuse == self.DELTAREUSENEVER:
2866 elif deltareuse == self.DELTAREUSENEVER:
2871 destrevlog._lazydeltabase = False
2867 destrevlog._lazydeltabase = False
2872 destrevlog._lazydelta = False
2868 destrevlog._lazydelta = False
2873
2869
2874 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2870 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2875
2871
2876 self._clone(
2872 self._clone(
2877 tr,
2873 tr,
2878 destrevlog,
2874 destrevlog,
2879 addrevisioncb,
2875 addrevisioncb,
2880 deltareuse,
2876 deltareuse,
2881 forcedeltabothparents,
2877 forcedeltabothparents,
2882 sidedatacompanion,
2878 sidedatacompanion,
2883 )
2879 )
2884
2880
2885 finally:
2881 finally:
2886 destrevlog._lazydelta = oldlazydelta
2882 destrevlog._lazydelta = oldlazydelta
2887 destrevlog._lazydeltabase = oldlazydeltabase
2883 destrevlog._lazydeltabase = oldlazydeltabase
2888 destrevlog._deltabothparents = oldamd
2884 destrevlog._deltabothparents = oldamd
2889
2885
2890 def _clone(
2886 def _clone(
2891 self,
2887 self,
2892 tr,
2888 tr,
2893 destrevlog,
2889 destrevlog,
2894 addrevisioncb,
2890 addrevisioncb,
2895 deltareuse,
2891 deltareuse,
2896 forcedeltabothparents,
2892 forcedeltabothparents,
2897 sidedatacompanion,
2893 sidedatacompanion,
2898 ):
2894 ):
2899 """perform the core duty of `revlog.clone` after parameter processing"""
2895 """perform the core duty of `revlog.clone` after parameter processing"""
2900 deltacomputer = deltautil.deltacomputer(destrevlog)
2896 deltacomputer = deltautil.deltacomputer(destrevlog)
2901 index = self.index
2897 index = self.index
2902 for rev in self:
2898 for rev in self:
2903 entry = index[rev]
2899 entry = index[rev]
2904
2900
2905 # Some classes override linkrev to take filtered revs into
2901 # Some classes override linkrev to take filtered revs into
2906 # account. Use raw entry from index.
2902 # account. Use raw entry from index.
2907 flags = entry[0] & 0xFFFF
2903 flags = entry[0] & 0xFFFF
2908 linkrev = entry[4]
2904 linkrev = entry[4]
2909 p1 = index[entry[5]][7]
2905 p1 = index[entry[5]][7]
2910 p2 = index[entry[6]][7]
2906 p2 = index[entry[6]][7]
2911 node = entry[7]
2907 node = entry[7]
2912
2908
2913 sidedataactions = (False, [], {}, 0, 0)
2909 sidedataactions = (False, [], {}, 0, 0)
2914 if sidedatacompanion is not None:
2910 if sidedatacompanion is not None:
2915 sidedataactions = sidedatacompanion(self, rev)
2911 sidedataactions = sidedatacompanion(self, rev)
2916
2912
2917 # (Possibly) reuse the delta from the revlog if allowed and
2913 # (Possibly) reuse the delta from the revlog if allowed and
2918 # the revlog chunk is a delta.
2914 # the revlog chunk is a delta.
2919 cachedelta = None
2915 cachedelta = None
2920 rawtext = None
2916 rawtext = None
2921 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2917 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2922 dropall = sidedataactions[0]
2918 dropall = sidedataactions[0]
2923 filterout = sidedataactions[1]
2919 filterout = sidedataactions[1]
2924 update = sidedataactions[2]
2920 update = sidedataactions[2]
2925 new_flags = sidedataactions[3]
2921 new_flags = sidedataactions[3]
2926 dropped_flags = sidedataactions[4]
2922 dropped_flags = sidedataactions[4]
2927 text, sidedata = self._revisiondata(rev)
2923 text, sidedata = self._revisiondata(rev)
2928 if dropall:
2924 if dropall:
2929 sidedata = {}
2925 sidedata = {}
2930 for key in filterout:
2926 for key in filterout:
2931 sidedata.pop(key, None)
2927 sidedata.pop(key, None)
2932 sidedata.update(update)
2928 sidedata.update(update)
2933 if not sidedata:
2929 if not sidedata:
2934 sidedata = None
2930 sidedata = None
2935
2931
2936 flags |= new_flags
2932 flags |= new_flags
2937 flags &= ~dropped_flags
2933 flags &= ~dropped_flags
2938
2934
2939 destrevlog.addrevision(
2935 destrevlog.addrevision(
2940 text,
2936 text,
2941 tr,
2937 tr,
2942 linkrev,
2938 linkrev,
2943 p1,
2939 p1,
2944 p2,
2940 p2,
2945 cachedelta=cachedelta,
2941 cachedelta=cachedelta,
2946 node=node,
2942 node=node,
2947 flags=flags,
2943 flags=flags,
2948 deltacomputer=deltacomputer,
2944 deltacomputer=deltacomputer,
2949 sidedata=sidedata,
2945 sidedata=sidedata,
2950 )
2946 )
2951 else:
2947 else:
2952 if destrevlog._lazydelta:
2948 if destrevlog._lazydelta:
2953 dp = self.deltaparent(rev)
2949 dp = self.deltaparent(rev)
2954 if dp != nullrev:
2950 if dp != nullrev:
2955 cachedelta = (dp, bytes(self._chunk(rev)))
2951 cachedelta = (dp, bytes(self._chunk(rev)))
2956
2952
2957 if not cachedelta:
2953 if not cachedelta:
2958 rawtext = self.rawdata(rev)
2954 rawtext = self.rawdata(rev)
2959
2955
2960 ifh = destrevlog.opener(
2956 ifh = destrevlog.opener(
2961 destrevlog.indexfile, b'a+', checkambig=False
2957 destrevlog.indexfile, b'a+', checkambig=False
2962 )
2958 )
2963 dfh = None
2959 dfh = None
2964 if not destrevlog._inline:
2960 if not destrevlog._inline:
2965 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2961 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2966 try:
2962 try:
2967 destrevlog._addrevision(
2963 destrevlog._addrevision(
2968 node,
2964 node,
2969 rawtext,
2965 rawtext,
2970 tr,
2966 tr,
2971 linkrev,
2967 linkrev,
2972 p1,
2968 p1,
2973 p2,
2969 p2,
2974 flags,
2970 flags,
2975 cachedelta,
2971 cachedelta,
2976 ifh,
2972 ifh,
2977 dfh,
2973 dfh,
2978 deltacomputer=deltacomputer,
2974 deltacomputer=deltacomputer,
2979 )
2975 )
2980 finally:
2976 finally:
2981 if dfh:
2977 if dfh:
2982 dfh.close()
2978 dfh.close()
2983 ifh.close()
2979 ifh.close()
2984
2980
2985 if addrevisioncb:
2981 if addrevisioncb:
2986 addrevisioncb(self, rev, node)
2982 addrevisioncb(self, rev, node)
2987
2983
2988 def censorrevision(self, tr, censornode, tombstone=b''):
2984 def censorrevision(self, tr, censornode, tombstone=b''):
2989 if (self.version & 0xFFFF) == REVLOGV0:
2985 if (self.version & 0xFFFF) == REVLOGV0:
2990 raise error.RevlogError(
2986 raise error.RevlogError(
2991 _(b'cannot censor with version %d revlogs') % self.version
2987 _(b'cannot censor with version %d revlogs') % self.version
2992 )
2988 )
2993
2989
2994 censorrev = self.rev(censornode)
2990 censorrev = self.rev(censornode)
2995 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2991 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2996
2992
2997 if len(tombstone) > self.rawsize(censorrev):
2993 if len(tombstone) > self.rawsize(censorrev):
2998 raise error.Abort(
2994 raise error.Abort(
2999 _(b'censor tombstone must be no longer than censored data')
2995 _(b'censor tombstone must be no longer than censored data')
3000 )
2996 )
3001
2997
3002 # Rewriting the revlog in place is hard. Our strategy for censoring is
2998 # Rewriting the revlog in place is hard. Our strategy for censoring is
3003 # to create a new revlog, copy all revisions to it, then replace the
2999 # to create a new revlog, copy all revisions to it, then replace the
3004 # revlogs on transaction close.
3000 # revlogs on transaction close.
3005
3001
3006 newindexfile = self.indexfile + b'.tmpcensored'
3002 newindexfile = self.indexfile + b'.tmpcensored'
3007 newdatafile = self.datafile + b'.tmpcensored'
3003 newdatafile = self.datafile + b'.tmpcensored'
3008
3004
3009 # This is a bit dangerous. We could easily have a mismatch of state.
3005 # This is a bit dangerous. We could easily have a mismatch of state.
3010 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3006 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3011 newrl.version = self.version
3007 newrl.version = self.version
3012 newrl._generaldelta = self._generaldelta
3008 newrl._generaldelta = self._generaldelta
3013 newrl._io = self._io
3009 newrl._io = self._io
3014
3010
3015 for rev in self.revs():
3011 for rev in self.revs():
3016 node = self.node(rev)
3012 node = self.node(rev)
3017 p1, p2 = self.parents(node)
3013 p1, p2 = self.parents(node)
3018
3014
3019 if rev == censorrev:
3015 if rev == censorrev:
3020 newrl.addrawrevision(
3016 newrl.addrawrevision(
3021 tombstone,
3017 tombstone,
3022 tr,
3018 tr,
3023 self.linkrev(censorrev),
3019 self.linkrev(censorrev),
3024 p1,
3020 p1,
3025 p2,
3021 p2,
3026 censornode,
3022 censornode,
3027 REVIDX_ISCENSORED,
3023 REVIDX_ISCENSORED,
3028 )
3024 )
3029
3025
3030 if newrl.deltaparent(rev) != nullrev:
3026 if newrl.deltaparent(rev) != nullrev:
3031 raise error.Abort(
3027 raise error.Abort(
3032 _(
3028 _(
3033 b'censored revision stored as delta; '
3029 b'censored revision stored as delta; '
3034 b'cannot censor'
3030 b'cannot censor'
3035 ),
3031 ),
3036 hint=_(
3032 hint=_(
3037 b'censoring of revlogs is not '
3033 b'censoring of revlogs is not '
3038 b'fully implemented; please report '
3034 b'fully implemented; please report '
3039 b'this bug'
3035 b'this bug'
3040 ),
3036 ),
3041 )
3037 )
3042 continue
3038 continue
3043
3039
3044 if self.iscensored(rev):
3040 if self.iscensored(rev):
3045 if self.deltaparent(rev) != nullrev:
3041 if self.deltaparent(rev) != nullrev:
3046 raise error.Abort(
3042 raise error.Abort(
3047 _(
3043 _(
3048 b'cannot censor due to censored '
3044 b'cannot censor due to censored '
3049 b'revision having delta stored'
3045 b'revision having delta stored'
3050 )
3046 )
3051 )
3047 )
3052 rawtext = self._chunk(rev)
3048 rawtext = self._chunk(rev)
3053 else:
3049 else:
3054 rawtext = self.rawdata(rev)
3050 rawtext = self.rawdata(rev)
3055
3051
3056 newrl.addrawrevision(
3052 newrl.addrawrevision(
3057 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3053 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3058 )
3054 )
3059
3055
3060 tr.addbackup(self.indexfile, location=b'store')
3056 tr.addbackup(self.indexfile, location=b'store')
3061 if not self._inline:
3057 if not self._inline:
3062 tr.addbackup(self.datafile, location=b'store')
3058 tr.addbackup(self.datafile, location=b'store')
3063
3059
3064 self.opener.rename(newrl.indexfile, self.indexfile)
3060 self.opener.rename(newrl.indexfile, self.indexfile)
3065 if not self._inline:
3061 if not self._inline:
3066 self.opener.rename(newrl.datafile, self.datafile)
3062 self.opener.rename(newrl.datafile, self.datafile)
3067
3063
3068 self.clearcaches()
3064 self.clearcaches()
3069 self._loadindex()
3065 self._loadindex()
3070
3066
3071 def verifyintegrity(self, state):
3067 def verifyintegrity(self, state):
3072 """Verifies the integrity of the revlog.
3068 """Verifies the integrity of the revlog.
3073
3069
3074 Yields ``revlogproblem`` instances describing problems that are
3070 Yields ``revlogproblem`` instances describing problems that are
3075 found.
3071 found.
3076 """
3072 """
3077 dd, di = self.checksize()
3073 dd, di = self.checksize()
3078 if dd:
3074 if dd:
3079 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3075 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3080 if di:
3076 if di:
3081 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3077 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3082
3078
3083 version = self.version & 0xFFFF
3079 version = self.version & 0xFFFF
3084
3080
3085 # The verifier tells us what version revlog we should be.
3081 # The verifier tells us what version revlog we should be.
3086 if version != state[b'expectedversion']:
3082 if version != state[b'expectedversion']:
3087 yield revlogproblem(
3083 yield revlogproblem(
3088 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3084 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3089 % (self.indexfile, version, state[b'expectedversion'])
3085 % (self.indexfile, version, state[b'expectedversion'])
3090 )
3086 )
3091
3087
3092 state[b'skipread'] = set()
3088 state[b'skipread'] = set()
3093 state[b'safe_renamed'] = set()
3089 state[b'safe_renamed'] = set()
3094
3090
3095 for rev in self:
3091 for rev in self:
3096 node = self.node(rev)
3092 node = self.node(rev)
3097
3093
3098 # Verify contents. 4 cases to care about:
3094 # Verify contents. 4 cases to care about:
3099 #
3095 #
3100 # common: the most common case
3096 # common: the most common case
3101 # rename: with a rename
3097 # rename: with a rename
3102 # meta: file content starts with b'\1\n', the metadata
3098 # meta: file content starts with b'\1\n', the metadata
3103 # header defined in filelog.py, but without a rename
3099 # header defined in filelog.py, but without a rename
3104 # ext: content stored externally
3100 # ext: content stored externally
3105 #
3101 #
3106 # More formally, their differences are shown below:
3102 # More formally, their differences are shown below:
3107 #
3103 #
3108 # | common | rename | meta | ext
3104 # | common | rename | meta | ext
3109 # -------------------------------------------------------
3105 # -------------------------------------------------------
3110 # flags() | 0 | 0 | 0 | not 0
3106 # flags() | 0 | 0 | 0 | not 0
3111 # renamed() | False | True | False | ?
3107 # renamed() | False | True | False | ?
3112 # rawtext[0:2]=='\1\n'| False | True | True | ?
3108 # rawtext[0:2]=='\1\n'| False | True | True | ?
3113 #
3109 #
3114 # "rawtext" means the raw text stored in revlog data, which
3110 # "rawtext" means the raw text stored in revlog data, which
3115 # could be retrieved by "rawdata(rev)". "text"
3111 # could be retrieved by "rawdata(rev)". "text"
3116 # mentioned below is "revision(rev)".
3112 # mentioned below is "revision(rev)".
3117 #
3113 #
3118 # There are 3 different lengths stored physically:
3114 # There are 3 different lengths stored physically:
3119 # 1. L1: rawsize, stored in revlog index
3115 # 1. L1: rawsize, stored in revlog index
3120 # 2. L2: len(rawtext), stored in revlog data
3116 # 2. L2: len(rawtext), stored in revlog data
3121 # 3. L3: len(text), stored in revlog data if flags==0, or
3117 # 3. L3: len(text), stored in revlog data if flags==0, or
3122 # possibly somewhere else if flags!=0
3118 # possibly somewhere else if flags!=0
3123 #
3119 #
3124 # L1 should be equal to L2. L3 could be different from them.
3120 # L1 should be equal to L2. L3 could be different from them.
3125 # "text" may or may not affect commit hash depending on flag
3121 # "text" may or may not affect commit hash depending on flag
3126 # processors (see flagutil.addflagprocessor).
3122 # processors (see flagutil.addflagprocessor).
3127 #
3123 #
3128 # | common | rename | meta | ext
3124 # | common | rename | meta | ext
3129 # -------------------------------------------------
3125 # -------------------------------------------------
3130 # rawsize() | L1 | L1 | L1 | L1
3126 # rawsize() | L1 | L1 | L1 | L1
3131 # size() | L1 | L2-LM | L1(*) | L1 (?)
3127 # size() | L1 | L2-LM | L1(*) | L1 (?)
3132 # len(rawtext) | L2 | L2 | L2 | L2
3128 # len(rawtext) | L2 | L2 | L2 | L2
3133 # len(text) | L2 | L2 | L2 | L3
3129 # len(text) | L2 | L2 | L2 | L3
3134 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3130 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3135 #
3131 #
3136 # LM: length of metadata, depending on rawtext
3132 # LM: length of metadata, depending on rawtext
3137 # (*): not ideal, see comment in filelog.size
3133 # (*): not ideal, see comment in filelog.size
3138 # (?): could be "- len(meta)" if the resolved content has
3134 # (?): could be "- len(meta)" if the resolved content has
3139 # rename metadata
3135 # rename metadata
3140 #
3136 #
3141 # Checks needed to be done:
3137 # Checks needed to be done:
3142 # 1. length check: L1 == L2, in all cases.
3138 # 1. length check: L1 == L2, in all cases.
3143 # 2. hash check: depending on flag processor, we may need to
3139 # 2. hash check: depending on flag processor, we may need to
3144 # use either "text" (external), or "rawtext" (in revlog).
3140 # use either "text" (external), or "rawtext" (in revlog).
3145
3141
3146 try:
3142 try:
3147 skipflags = state.get(b'skipflags', 0)
3143 skipflags = state.get(b'skipflags', 0)
3148 if skipflags:
3144 if skipflags:
3149 skipflags &= self.flags(rev)
3145 skipflags &= self.flags(rev)
3150
3146
3151 _verify_revision(self, skipflags, state, node)
3147 _verify_revision(self, skipflags, state, node)
3152
3148
3153 l1 = self.rawsize(rev)
3149 l1 = self.rawsize(rev)
3154 l2 = len(self.rawdata(node))
3150 l2 = len(self.rawdata(node))
3155
3151
3156 if l1 != l2:
3152 if l1 != l2:
3157 yield revlogproblem(
3153 yield revlogproblem(
3158 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3154 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3159 node=node,
3155 node=node,
3160 )
3156 )
3161
3157
3162 except error.CensoredNodeError:
3158 except error.CensoredNodeError:
3163 if state[b'erroroncensored']:
3159 if state[b'erroroncensored']:
3164 yield revlogproblem(
3160 yield revlogproblem(
3165 error=_(b'censored file data'), node=node
3161 error=_(b'censored file data'), node=node
3166 )
3162 )
3167 state[b'skipread'].add(node)
3163 state[b'skipread'].add(node)
3168 except Exception as e:
3164 except Exception as e:
3169 yield revlogproblem(
3165 yield revlogproblem(
3170 error=_(b'unpacking %s: %s')
3166 error=_(b'unpacking %s: %s')
3171 % (short(node), stringutil.forcebytestr(e)),
3167 % (short(node), stringutil.forcebytestr(e)),
3172 node=node,
3168 node=node,
3173 )
3169 )
3174 state[b'skipread'].add(node)
3170 state[b'skipread'].add(node)
3175
3171
3176 def storageinfo(
3172 def storageinfo(
3177 self,
3173 self,
3178 exclusivefiles=False,
3174 exclusivefiles=False,
3179 sharedfiles=False,
3175 sharedfiles=False,
3180 revisionscount=False,
3176 revisionscount=False,
3181 trackedsize=False,
3177 trackedsize=False,
3182 storedsize=False,
3178 storedsize=False,
3183 ):
3179 ):
3184 d = {}
3180 d = {}
3185
3181
3186 if exclusivefiles:
3182 if exclusivefiles:
3187 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3183 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3188 if not self._inline:
3184 if not self._inline:
3189 d[b'exclusivefiles'].append((self.opener, self.datafile))
3185 d[b'exclusivefiles'].append((self.opener, self.datafile))
3190
3186
3191 if sharedfiles:
3187 if sharedfiles:
3192 d[b'sharedfiles'] = []
3188 d[b'sharedfiles'] = []
3193
3189
3194 if revisionscount:
3190 if revisionscount:
3195 d[b'revisionscount'] = len(self)
3191 d[b'revisionscount'] = len(self)
3196
3192
3197 if trackedsize:
3193 if trackedsize:
3198 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3194 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3199
3195
3200 if storedsize:
3196 if storedsize:
3201 d[b'storedsize'] = sum(
3197 d[b'storedsize'] = sum(
3202 self.opener.stat(path).st_size for path in self.files()
3198 self.opener.stat(path).st_size for path in self.files()
3203 )
3199 )
3204
3200
3205 return d
3201 return d
3206
3202
3207 def rewrite_sidedata(self, helpers, startrev, endrev):
3203 def rewrite_sidedata(self, helpers, startrev, endrev):
3208 if self.version & 0xFFFF != REVLOGV2:
3204 if self.version & 0xFFFF != REVLOGV2:
3209 return
3205 return
3210 # inline are not yet supported because they suffer from an issue when
3206 # inline are not yet supported because they suffer from an issue when
3211 # rewriting them (since it's not an append-only operation).
3207 # rewriting them (since it's not an append-only operation).
3212 # See issue6485.
3208 # See issue6485.
3213 assert not self._inline
3209 assert not self._inline
3214 if not helpers[1] and not helpers[2]:
3210 if not helpers[1] and not helpers[2]:
3215 # Nothing to generate or remove
3211 # Nothing to generate or remove
3216 return
3212 return
3217
3213
3218 new_entries = []
3214 new_entries = []
3219 # append the new sidedata
3215 # append the new sidedata
3220 with self._datafp(b'a+') as fp:
3216 with self._datafp(b'a+') as fp:
3221 # Maybe this bug still exists, see revlog._writeentry
3217 # Maybe this bug still exists, see revlog._writeentry
3222 fp.seek(0, os.SEEK_END)
3218 fp.seek(0, os.SEEK_END)
3223 current_offset = fp.tell()
3219 current_offset = fp.tell()
3224 for rev in range(startrev, endrev + 1):
3220 for rev in range(startrev, endrev + 1):
3225 entry = self.index[rev]
3221 entry = self.index[rev]
3226 new_sidedata = storageutil.run_sidedata_helpers(
3222 new_sidedata = storageutil.run_sidedata_helpers(
3227 store=self,
3223 store=self,
3228 sidedata_helpers=helpers,
3224 sidedata_helpers=helpers,
3229 sidedata={},
3225 sidedata={},
3230 rev=rev,
3226 rev=rev,
3231 )
3227 )
3232
3228
3233 serialized_sidedata = sidedatautil.serialize_sidedata(
3229 serialized_sidedata = sidedatautil.serialize_sidedata(
3234 new_sidedata
3230 new_sidedata
3235 )
3231 )
3236 if entry[8] != 0 or entry[9] != 0:
3232 if entry[8] != 0 or entry[9] != 0:
3237 # rewriting entries that already have sidedata is not
3233 # rewriting entries that already have sidedata is not
3238 # supported yet, because it introduces garbage data in the
3234 # supported yet, because it introduces garbage data in the
3239 # revlog.
3235 # revlog.
3240 msg = b"Rewriting existing sidedata is not supported yet"
3236 msg = b"Rewriting existing sidedata is not supported yet"
3241 raise error.Abort(msg)
3237 raise error.Abort(msg)
3242 entry = entry[:8]
3238 entry = entry[:8]
3243 entry += (current_offset, len(serialized_sidedata))
3239 entry += (current_offset, len(serialized_sidedata))
3244
3240
3245 fp.write(serialized_sidedata)
3241 fp.write(serialized_sidedata)
3246 new_entries.append(entry)
3242 new_entries.append(entry)
3247 current_offset += len(serialized_sidedata)
3243 current_offset += len(serialized_sidedata)
3248
3244
3249 # rewrite the new index entries
3245 # rewrite the new index entries
3250 with self._indexfp(b'w+') as fp:
3246 with self._indexfp(b'w+') as fp:
3251 fp.seek(startrev * self._io.size)
3247 fp.seek(startrev * self._io.size)
3252 for i, entry in enumerate(new_entries):
3248 for i, entry in enumerate(new_entries):
3253 rev = startrev + i
3249 rev = startrev + i
3254 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3250 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3255 packed = self._io.packentry(entry, self.node, self.version, rev)
3251 packed = self._io.packentry(entry, self.node, self.version, rev)
3256 fp.write(packed)
3252 fp.write(packed)
@@ -1,90 +1,105 b''
1 # revlogdeltas.py - constant used for revlog logic
1 # revlogdeltas.py - constant used for revlog logic
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import struct
12 import struct
13
13
14 from ..interfaces import repository
14 from ..interfaces import repository
15
15
16 ### main revlog header
16 ### main revlog header
17
17
18 ## revlog version
18 ## revlog version
19 REVLOGV0 = 0
19 REVLOGV0 = 0
20 REVLOGV1 = 1
20 REVLOGV1 = 1
21 # Dummy value until file format is finalized.
21 # Dummy value until file format is finalized.
22 REVLOGV2 = 0xDEAD
22 REVLOGV2 = 0xDEAD
23
23
24 ## global revlog header flags
24 ## global revlog header flags
25 # Shared across v1 and v2.
25 # Shared across v1 and v2.
26 FLAG_INLINE_DATA = 1 << 16
26 FLAG_INLINE_DATA = 1 << 16
27 # Only used by v1, implied by v2.
27 # Only used by v1, implied by v2.
28 FLAG_GENERALDELTA = 1 << 17
28 FLAG_GENERALDELTA = 1 << 17
29 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
29 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
30 REVLOG_DEFAULT_FORMAT = REVLOGV1
30 REVLOG_DEFAULT_FORMAT = REVLOGV1
31 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
31 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
32 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
33 REVLOGV2_FLAGS = FLAG_INLINE_DATA
33 REVLOGV2_FLAGS = FLAG_INLINE_DATA
34
34
35 ### individual entry
35 ### individual entry
36
36
37 ## index v0:
37 ## index v0:
38 # 4 bytes: offset
38 # 4 bytes: offset
39 # 4 bytes: compressed length
39 # 4 bytes: compressed length
40 # 4 bytes: base rev
40 # 4 bytes: base rev
41 # 4 bytes: link rev
41 # 4 bytes: link rev
42 # 20 bytes: parent 1 nodeid
42 # 20 bytes: parent 1 nodeid
43 # 20 bytes: parent 2 nodeid
43 # 20 bytes: parent 2 nodeid
44 # 20 bytes: nodeid
44 # 20 bytes: nodeid
45 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
45 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
46
46
47 ## index v1
47 ## index v1
48 # 6 bytes: offset
48 # 6 bytes: offset
49 # 2 bytes: flags
49 # 2 bytes: flags
50 # 4 bytes: compressed length
50 # 4 bytes: compressed length
51 # 4 bytes: uncompressed length
51 # 4 bytes: uncompressed length
52 # 4 bytes: base rev
52 # 4 bytes: base rev
53 # 4 bytes: link rev
53 # 4 bytes: link rev
54 # 4 bytes: parent 1 rev
54 # 4 bytes: parent 1 rev
55 # 4 bytes: parent 2 rev
55 # 4 bytes: parent 2 rev
56 # 32 bytes: nodeid
56 # 32 bytes: nodeid
57 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
57 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
58 assert INDEX_ENTRY_V1.size == 32 * 2
58 assert INDEX_ENTRY_V1.size == 32 * 2
59
59
60 # 6 bytes: offset
61 # 2 bytes: flags
62 # 4 bytes: compressed length
63 # 4 bytes: uncompressed length
64 # 4 bytes: base rev
65 # 4 bytes: link rev
66 # 4 bytes: parent 1 rev
67 # 4 bytes: parent 2 rev
68 # 32 bytes: nodeid
69 # 8 bytes: sidedata offset
70 # 4 bytes: sidedata compressed length
71 # 20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
72 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQi20x")
73 assert INDEX_ENTRY_V2.size == 32 * 3
74
60 # revlog index flags
75 # revlog index flags
61
76
62 # For historical reasons, revlog's internal flags were exposed via the
77 # For historical reasons, revlog's internal flags were exposed via the
63 # wire protocol and are even exposed in parts of the storage APIs.
78 # wire protocol and are even exposed in parts of the storage APIs.
64
79
65 # revision has censor metadata, must be verified
80 # revision has censor metadata, must be verified
66 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
81 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
67 # revision hash does not match data (narrowhg)
82 # revision hash does not match data (narrowhg)
68 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
83 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
69 # revision data is stored externally
84 # revision data is stored externally
70 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
85 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
71 # revision data contains extra metadata not part of the official digest
86 # revision data contains extra metadata not part of the official digest
72 REVIDX_SIDEDATA = repository.REVISION_FLAG_SIDEDATA
87 REVIDX_SIDEDATA = repository.REVISION_FLAG_SIDEDATA
73 # revision changes files in a way that could affect copy tracing.
88 # revision changes files in a way that could affect copy tracing.
74 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
89 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
75 REVIDX_DEFAULT_FLAGS = 0
90 REVIDX_DEFAULT_FLAGS = 0
76 # stable order in which flags need to be processed and their processors applied
91 # stable order in which flags need to be processed and their processors applied
77 REVIDX_FLAGS_ORDER = [
92 REVIDX_FLAGS_ORDER = [
78 REVIDX_ISCENSORED,
93 REVIDX_ISCENSORED,
79 REVIDX_ELLIPSIS,
94 REVIDX_ELLIPSIS,
80 REVIDX_EXTSTORED,
95 REVIDX_EXTSTORED,
81 REVIDX_SIDEDATA,
96 REVIDX_SIDEDATA,
82 REVIDX_HASCOPIESINFO,
97 REVIDX_HASCOPIESINFO,
83 ]
98 ]
84
99
85 # bitmark for flags that could cause rawdata content change
100 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = (
101 REVIDX_RAWTEXT_CHANGING_FLAGS = (
87 REVIDX_ISCENSORED | REVIDX_EXTSTORED | REVIDX_SIDEDATA
102 REVIDX_ISCENSORED | REVIDX_EXTSTORED | REVIDX_SIDEDATA
88 )
103 )
89
104
90 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
105 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
General Comments 0
You need to be logged in to leave comments. Login now