##// END OF EJS Templates
revlog: move the details of revlog "v1" index inside revlog.utils.constants...
marmoute -
r47616:cc65cea9 default
parent child Browse files
Show More
@@ -1,364 +1,365 b''
1 # parsers.py - Python implementation of parsers.c
1 # parsers.py - Python implementation of parsers.c
2 #
2 #
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11 import zlib
11 import zlib
12
12
13 from ..node import nullid, nullrev
13 from ..node import nullid, nullrev
14 from .. import (
14 from .. import (
15 pycompat,
15 pycompat,
16 util,
16 util,
17 )
17 )
18
18
19 from ..revlogutils import nodemap as nodemaputil
19 from ..revlogutils import nodemap as nodemaputil
20 from ..revlogutils import constants as revlog_constants
20
21
21 stringio = pycompat.bytesio
22 stringio = pycompat.bytesio
22
23
23
24
24 _pack = struct.pack
25 _pack = struct.pack
25 _unpack = struct.unpack
26 _unpack = struct.unpack
26 _compress = zlib.compress
27 _compress = zlib.compress
27 _decompress = zlib.decompress
28 _decompress = zlib.decompress
28
29
29 # Some code below makes tuples directly because it's more convenient. However,
30 # Some code below makes tuples directly because it's more convenient. However,
30 # code outside this module should always use dirstatetuple.
31 # code outside this module should always use dirstatetuple.
31 def dirstatetuple(*x):
32 def dirstatetuple(*x):
32 # x is a tuple
33 # x is a tuple
33 return x
34 return x
34
35
35
36
36 def gettype(q):
37 def gettype(q):
37 return int(q & 0xFFFF)
38 return int(q & 0xFFFF)
38
39
39
40
40 def offset_type(offset, type):
41 def offset_type(offset, type):
41 return int(int(offset) << 16 | type)
42 return int(int(offset) << 16 | type)
42
43
43
44
44 class BaseIndexObject(object):
45 class BaseIndexObject(object):
45 # Format of an index entry according to Python's `struct` language
46 # Format of an index entry according to Python's `struct` language
46 index_format = b">Qiiiiii20s12x"
47 index_format = revlog_constants.INDEX_ENTRY_V1.format
47 # Size of a C unsigned long long int, platform independent
48 # Size of a C unsigned long long int, platform independent
48 big_int_size = struct.calcsize(b'>Q')
49 big_int_size = struct.calcsize(b'>Q')
49 # Size of a C long int, platform independent
50 # Size of a C long int, platform independent
50 int_size = struct.calcsize(b'>i')
51 int_size = struct.calcsize(b'>i')
51 # Size of the entire index format
52 # Size of the entire index format
52 index_size = struct.calcsize(index_format)
53 index_size = revlog_constants.INDEX_ENTRY_V1.size
53 # An empty index entry, used as a default value to be overridden, or nullrev
54 # An empty index entry, used as a default value to be overridden, or nullrev
54 null_item = (0, 0, 0, -1, -1, -1, -1, nullid)
55 null_item = (0, 0, 0, -1, -1, -1, -1, nullid)
55
56
56 @property
57 @property
57 def nodemap(self):
58 def nodemap(self):
58 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
59 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
59 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
60 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
60 return self._nodemap
61 return self._nodemap
61
62
62 @util.propertycache
63 @util.propertycache
63 def _nodemap(self):
64 def _nodemap(self):
64 nodemap = nodemaputil.NodeMap({nullid: nullrev})
65 nodemap = nodemaputil.NodeMap({nullid: nullrev})
65 for r in range(0, len(self)):
66 for r in range(0, len(self)):
66 n = self[r][7]
67 n = self[r][7]
67 nodemap[n] = r
68 nodemap[n] = r
68 return nodemap
69 return nodemap
69
70
70 def has_node(self, node):
71 def has_node(self, node):
71 """return True if the node exist in the index"""
72 """return True if the node exist in the index"""
72 return node in self._nodemap
73 return node in self._nodemap
73
74
74 def rev(self, node):
75 def rev(self, node):
75 """return a revision for a node
76 """return a revision for a node
76
77
77 If the node is unknown, raise a RevlogError"""
78 If the node is unknown, raise a RevlogError"""
78 return self._nodemap[node]
79 return self._nodemap[node]
79
80
80 def get_rev(self, node):
81 def get_rev(self, node):
81 """return a revision for a node
82 """return a revision for a node
82
83
83 If the node is unknown, return None"""
84 If the node is unknown, return None"""
84 return self._nodemap.get(node)
85 return self._nodemap.get(node)
85
86
86 def _stripnodes(self, start):
87 def _stripnodes(self, start):
87 if '_nodemap' in vars(self):
88 if '_nodemap' in vars(self):
88 for r in range(start, len(self)):
89 for r in range(start, len(self)):
89 n = self[r][7]
90 n = self[r][7]
90 del self._nodemap[n]
91 del self._nodemap[n]
91
92
92 def clearcaches(self):
93 def clearcaches(self):
93 self.__dict__.pop('_nodemap', None)
94 self.__dict__.pop('_nodemap', None)
94
95
95 def __len__(self):
96 def __len__(self):
96 return self._lgt + len(self._extra)
97 return self._lgt + len(self._extra)
97
98
98 def append(self, tup):
99 def append(self, tup):
99 if '_nodemap' in vars(self):
100 if '_nodemap' in vars(self):
100 self._nodemap[tup[7]] = len(self)
101 self._nodemap[tup[7]] = len(self)
101 data = _pack(self.index_format, *tup)
102 data = _pack(self.index_format, *tup)
102 self._extra.append(data)
103 self._extra.append(data)
103
104
104 def _check_index(self, i):
105 def _check_index(self, i):
105 if not isinstance(i, int):
106 if not isinstance(i, int):
106 raise TypeError(b"expecting int indexes")
107 raise TypeError(b"expecting int indexes")
107 if i < 0 or i >= len(self):
108 if i < 0 or i >= len(self):
108 raise IndexError
109 raise IndexError
109
110
110 def __getitem__(self, i):
111 def __getitem__(self, i):
111 if i == -1:
112 if i == -1:
112 return self.null_item
113 return self.null_item
113 self._check_index(i)
114 self._check_index(i)
114 if i >= self._lgt:
115 if i >= self._lgt:
115 data = self._extra[i - self._lgt]
116 data = self._extra[i - self._lgt]
116 else:
117 else:
117 index = self._calculate_index(i)
118 index = self._calculate_index(i)
118 data = self._data[index : index + self.index_size]
119 data = self._data[index : index + self.index_size]
119 r = _unpack(self.index_format, data)
120 r = _unpack(self.index_format, data)
120 if self._lgt and i == 0:
121 if self._lgt and i == 0:
121 r = (offset_type(0, gettype(r[0])),) + r[1:]
122 r = (offset_type(0, gettype(r[0])),) + r[1:]
122 return r
123 return r
123
124
124
125
125 class IndexObject(BaseIndexObject):
126 class IndexObject(BaseIndexObject):
126 def __init__(self, data):
127 def __init__(self, data):
127 assert len(data) % self.index_size == 0
128 assert len(data) % self.index_size == 0
128 self._data = data
129 self._data = data
129 self._lgt = len(data) // self.index_size
130 self._lgt = len(data) // self.index_size
130 self._extra = []
131 self._extra = []
131
132
132 def _calculate_index(self, i):
133 def _calculate_index(self, i):
133 return i * self.index_size
134 return i * self.index_size
134
135
135 def __delitem__(self, i):
136 def __delitem__(self, i):
136 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
137 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
137 raise ValueError(b"deleting slices only supports a:-1 with step 1")
138 raise ValueError(b"deleting slices only supports a:-1 with step 1")
138 i = i.start
139 i = i.start
139 self._check_index(i)
140 self._check_index(i)
140 self._stripnodes(i)
141 self._stripnodes(i)
141 if i < self._lgt:
142 if i < self._lgt:
142 self._data = self._data[: i * self.index_size]
143 self._data = self._data[: i * self.index_size]
143 self._lgt = i
144 self._lgt = i
144 self._extra = []
145 self._extra = []
145 else:
146 else:
146 self._extra = self._extra[: i - self._lgt]
147 self._extra = self._extra[: i - self._lgt]
147
148
148
149
149 class PersistentNodeMapIndexObject(IndexObject):
150 class PersistentNodeMapIndexObject(IndexObject):
150 """a Debug oriented class to test persistent nodemap
151 """a Debug oriented class to test persistent nodemap
151
152
152 We need a simple python object to test API and higher level behavior. See
153 We need a simple python object to test API and higher level behavior. See
153 the Rust implementation for more serious usage. This should be used only
154 the Rust implementation for more serious usage. This should be used only
154 through the dedicated `devel.persistent-nodemap` config.
155 through the dedicated `devel.persistent-nodemap` config.
155 """
156 """
156
157
157 def nodemap_data_all(self):
158 def nodemap_data_all(self):
158 """Return bytes containing a full serialization of a nodemap
159 """Return bytes containing a full serialization of a nodemap
159
160
160 The nodemap should be valid for the full set of revisions in the
161 The nodemap should be valid for the full set of revisions in the
161 index."""
162 index."""
162 return nodemaputil.persistent_data(self)
163 return nodemaputil.persistent_data(self)
163
164
164 def nodemap_data_incremental(self):
165 def nodemap_data_incremental(self):
165 """Return bytes containing a incremental update to persistent nodemap
166 """Return bytes containing a incremental update to persistent nodemap
166
167
167 This containst the data for an append-only update of the data provided
168 This containst the data for an append-only update of the data provided
168 in the last call to `update_nodemap_data`.
169 in the last call to `update_nodemap_data`.
169 """
170 """
170 if self._nm_root is None:
171 if self._nm_root is None:
171 return None
172 return None
172 docket = self._nm_docket
173 docket = self._nm_docket
173 changed, data = nodemaputil.update_persistent_data(
174 changed, data = nodemaputil.update_persistent_data(
174 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
175 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
175 )
176 )
176
177
177 self._nm_root = self._nm_max_idx = self._nm_docket = None
178 self._nm_root = self._nm_max_idx = self._nm_docket = None
178 return docket, changed, data
179 return docket, changed, data
179
180
180 def update_nodemap_data(self, docket, nm_data):
181 def update_nodemap_data(self, docket, nm_data):
181 """provide full block of persisted binary data for a nodemap
182 """provide full block of persisted binary data for a nodemap
182
183
183 The data are expected to come from disk. See `nodemap_data_all` for a
184 The data are expected to come from disk. See `nodemap_data_all` for a
184 produceur of such data."""
185 produceur of such data."""
185 if nm_data is not None:
186 if nm_data is not None:
186 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
187 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
187 if self._nm_root:
188 if self._nm_root:
188 self._nm_docket = docket
189 self._nm_docket = docket
189 else:
190 else:
190 self._nm_root = self._nm_max_idx = self._nm_docket = None
191 self._nm_root = self._nm_max_idx = self._nm_docket = None
191
192
192
193
193 class InlinedIndexObject(BaseIndexObject):
194 class InlinedIndexObject(BaseIndexObject):
194 def __init__(self, data, inline=0):
195 def __init__(self, data, inline=0):
195 self._data = data
196 self._data = data
196 self._lgt = self._inline_scan(None)
197 self._lgt = self._inline_scan(None)
197 self._inline_scan(self._lgt)
198 self._inline_scan(self._lgt)
198 self._extra = []
199 self._extra = []
199
200
200 def _inline_scan(self, lgt):
201 def _inline_scan(self, lgt):
201 off = 0
202 off = 0
202 if lgt is not None:
203 if lgt is not None:
203 self._offsets = [0] * lgt
204 self._offsets = [0] * lgt
204 count = 0
205 count = 0
205 while off <= len(self._data) - self.index_size:
206 while off <= len(self._data) - self.index_size:
206 start = off + self.big_int_size
207 start = off + self.big_int_size
207 (s,) = struct.unpack(
208 (s,) = struct.unpack(
208 b'>i',
209 b'>i',
209 self._data[start : start + self.int_size],
210 self._data[start : start + self.int_size],
210 )
211 )
211 if lgt is not None:
212 if lgt is not None:
212 self._offsets[count] = off
213 self._offsets[count] = off
213 count += 1
214 count += 1
214 off += self.index_size + s
215 off += self.index_size + s
215 if off != len(self._data):
216 if off != len(self._data):
216 raise ValueError(b"corrupted data")
217 raise ValueError(b"corrupted data")
217 return count
218 return count
218
219
219 def __delitem__(self, i):
220 def __delitem__(self, i):
220 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
221 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
221 raise ValueError(b"deleting slices only supports a:-1 with step 1")
222 raise ValueError(b"deleting slices only supports a:-1 with step 1")
222 i = i.start
223 i = i.start
223 self._check_index(i)
224 self._check_index(i)
224 self._stripnodes(i)
225 self._stripnodes(i)
225 if i < self._lgt:
226 if i < self._lgt:
226 self._offsets = self._offsets[:i]
227 self._offsets = self._offsets[:i]
227 self._lgt = i
228 self._lgt = i
228 self._extra = []
229 self._extra = []
229 else:
230 else:
230 self._extra = self._extra[: i - self._lgt]
231 self._extra = self._extra[: i - self._lgt]
231
232
232 def _calculate_index(self, i):
233 def _calculate_index(self, i):
233 return self._offsets[i]
234 return self._offsets[i]
234
235
235
236
236 def parse_index2(data, inline, revlogv2=False):
237 def parse_index2(data, inline, revlogv2=False):
237 if not inline:
238 if not inline:
238 cls = IndexObject2 if revlogv2 else IndexObject
239 cls = IndexObject2 if revlogv2 else IndexObject
239 return cls(data), None
240 return cls(data), None
240 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
241 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
241 return cls(data, inline), (0, data)
242 return cls(data, inline), (0, data)
242
243
243
244
244 class Index2Mixin(object):
245 class Index2Mixin(object):
245 # 6 bytes: offset
246 # 6 bytes: offset
246 # 2 bytes: flags
247 # 2 bytes: flags
247 # 4 bytes: compressed length
248 # 4 bytes: compressed length
248 # 4 bytes: uncompressed length
249 # 4 bytes: uncompressed length
249 # 4 bytes: base rev
250 # 4 bytes: base rev
250 # 4 bytes: link rev
251 # 4 bytes: link rev
251 # 4 bytes: parent 1 rev
252 # 4 bytes: parent 1 rev
252 # 4 bytes: parent 2 rev
253 # 4 bytes: parent 2 rev
253 # 32 bytes: nodeid
254 # 32 bytes: nodeid
254 # 8 bytes: sidedata offset
255 # 8 bytes: sidedata offset
255 # 4 bytes: sidedata compressed length
256 # 4 bytes: sidedata compressed length
256 # 20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
257 # 20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
257 index_format = b">Qiiiiii20s12xQi20x"
258 index_format = b">Qiiiiii20s12xQi20x"
258 index_size = struct.calcsize(index_format)
259 index_size = struct.calcsize(index_format)
259 assert index_size == 96, index_size
260 assert index_size == 96, index_size
260 null_item = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0)
261 null_item = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0)
261
262
262 def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
263 def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
263 """
264 """
264 Replace an existing index entry's sidedata offset and length with new
265 Replace an existing index entry's sidedata offset and length with new
265 ones.
266 ones.
266 This cannot be used outside of the context of sidedata rewriting,
267 This cannot be used outside of the context of sidedata rewriting,
267 inside the transaction that creates the revision `i`.
268 inside the transaction that creates the revision `i`.
268 """
269 """
269 if i < 0:
270 if i < 0:
270 raise KeyError
271 raise KeyError
271 self._check_index(i)
272 self._check_index(i)
272 sidedata_format = b">Qi"
273 sidedata_format = b">Qi"
273 packed_size = struct.calcsize(sidedata_format)
274 packed_size = struct.calcsize(sidedata_format)
274 if i >= self._lgt:
275 if i >= self._lgt:
275 packed = _pack(sidedata_format, sidedata_offset, sidedata_length)
276 packed = _pack(sidedata_format, sidedata_offset, sidedata_length)
276 old = self._extra[i - self._lgt]
277 old = self._extra[i - self._lgt]
277 new = old[:64] + packed + old[64 + packed_size :]
278 new = old[:64] + packed + old[64 + packed_size :]
278 self._extra[i - self._lgt] = new
279 self._extra[i - self._lgt] = new
279 else:
280 else:
280 msg = b"cannot rewrite entries outside of this transaction"
281 msg = b"cannot rewrite entries outside of this transaction"
281 raise KeyError(msg)
282 raise KeyError(msg)
282
283
283
284
284 class IndexObject2(Index2Mixin, IndexObject):
285 class IndexObject2(Index2Mixin, IndexObject):
285 pass
286 pass
286
287
287
288
288 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
289 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
289 def _inline_scan(self, lgt):
290 def _inline_scan(self, lgt):
290 sidedata_length_pos = 72
291 sidedata_length_pos = 72
291 off = 0
292 off = 0
292 if lgt is not None:
293 if lgt is not None:
293 self._offsets = [0] * lgt
294 self._offsets = [0] * lgt
294 count = 0
295 count = 0
295 while off <= len(self._data) - self.index_size:
296 while off <= len(self._data) - self.index_size:
296 start = off + self.big_int_size
297 start = off + self.big_int_size
297 (data_size,) = struct.unpack(
298 (data_size,) = struct.unpack(
298 b'>i',
299 b'>i',
299 self._data[start : start + self.int_size],
300 self._data[start : start + self.int_size],
300 )
301 )
301 start = off + sidedata_length_pos
302 start = off + sidedata_length_pos
302 (side_data_size,) = struct.unpack(
303 (side_data_size,) = struct.unpack(
303 b'>i', self._data[start : start + self.int_size]
304 b'>i', self._data[start : start + self.int_size]
304 )
305 )
305 if lgt is not None:
306 if lgt is not None:
306 self._offsets[count] = off
307 self._offsets[count] = off
307 count += 1
308 count += 1
308 off += self.index_size + data_size + side_data_size
309 off += self.index_size + data_size + side_data_size
309 if off != len(self._data):
310 if off != len(self._data):
310 raise ValueError(b"corrupted data")
311 raise ValueError(b"corrupted data")
311 return count
312 return count
312
313
313
314
314 def parse_index_devel_nodemap(data, inline):
315 def parse_index_devel_nodemap(data, inline):
315 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
316 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
316 return PersistentNodeMapIndexObject(data), None
317 return PersistentNodeMapIndexObject(data), None
317
318
318
319
319 def parse_dirstate(dmap, copymap, st):
320 def parse_dirstate(dmap, copymap, st):
320 parents = [st[:20], st[20:40]]
321 parents = [st[:20], st[20:40]]
321 # dereference fields so they will be local in loop
322 # dereference fields so they will be local in loop
322 format = b">cllll"
323 format = b">cllll"
323 e_size = struct.calcsize(format)
324 e_size = struct.calcsize(format)
324 pos1 = 40
325 pos1 = 40
325 l = len(st)
326 l = len(st)
326
327
327 # the inner loop
328 # the inner loop
328 while pos1 < l:
329 while pos1 < l:
329 pos2 = pos1 + e_size
330 pos2 = pos1 + e_size
330 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
331 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
331 pos1 = pos2 + e[4]
332 pos1 = pos2 + e[4]
332 f = st[pos2:pos1]
333 f = st[pos2:pos1]
333 if b'\0' in f:
334 if b'\0' in f:
334 f, c = f.split(b'\0')
335 f, c = f.split(b'\0')
335 copymap[f] = c
336 copymap[f] = c
336 dmap[f] = e[:4]
337 dmap[f] = e[:4]
337 return parents
338 return parents
338
339
339
340
340 def pack_dirstate(dmap, copymap, pl, now):
341 def pack_dirstate(dmap, copymap, pl, now):
341 now = int(now)
342 now = int(now)
342 cs = stringio()
343 cs = stringio()
343 write = cs.write
344 write = cs.write
344 write(b"".join(pl))
345 write(b"".join(pl))
345 for f, e in pycompat.iteritems(dmap):
346 for f, e in pycompat.iteritems(dmap):
346 if e[0] == b'n' and e[3] == now:
347 if e[0] == b'n' and e[3] == now:
347 # The file was last modified "simultaneously" with the current
348 # The file was last modified "simultaneously" with the current
348 # write to dirstate (i.e. within the same second for file-
349 # write to dirstate (i.e. within the same second for file-
349 # systems with a granularity of 1 sec). This commonly happens
350 # systems with a granularity of 1 sec). This commonly happens
350 # for at least a couple of files on 'update'.
351 # for at least a couple of files on 'update'.
351 # The user could change the file without changing its size
352 # The user could change the file without changing its size
352 # within the same second. Invalidate the file's mtime in
353 # within the same second. Invalidate the file's mtime in
353 # dirstate, forcing future 'status' calls to compare the
354 # dirstate, forcing future 'status' calls to compare the
354 # contents of the file if the size is the same. This prevents
355 # contents of the file if the size is the same. This prevents
355 # mistakenly treating such files as clean.
356 # mistakenly treating such files as clean.
356 e = dirstatetuple(e[0], e[1], e[2], -1)
357 e = dirstatetuple(e[0], e[1], e[2], -1)
357 dmap[f] = e
358 dmap[f] = e
358
359
359 if f in copymap:
360 if f in copymap:
360 f = b"%s\0%s" % (f, copymap[f])
361 f = b"%s\0%s" % (f, copymap[f])
361 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
362 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
362 write(e)
363 write(e)
363 write(f)
364 write(f)
364 return cs.getvalue()
365 return cs.getvalue()
@@ -1,3267 +1,3256 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullhex,
29 nullhex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 sha1nodeconstants,
32 sha1nodeconstants,
33 short,
33 short,
34 wdirfilenodeids,
34 wdirfilenodeids,
35 wdirhex,
35 wdirhex,
36 wdirid,
36 wdirid,
37 wdirrev,
37 wdirrev,
38 )
38 )
39 from .i18n import _
39 from .i18n import _
40 from .pycompat import getattr
40 from .pycompat import getattr
41 from .revlogutils.constants import (
41 from .revlogutils.constants import (
42 FLAG_GENERALDELTA,
42 FLAG_GENERALDELTA,
43 FLAG_INLINE_DATA,
43 FLAG_INLINE_DATA,
44 INDEX_ENTRY_V0,
44 INDEX_ENTRY_V0,
45 INDEX_ENTRY_V1,
45 REVLOGV0,
46 REVLOGV0,
46 REVLOGV1,
47 REVLOGV1,
47 REVLOGV1_FLAGS,
48 REVLOGV1_FLAGS,
48 REVLOGV2,
49 REVLOGV2,
49 REVLOGV2_FLAGS,
50 REVLOGV2_FLAGS,
50 REVLOG_DEFAULT_FLAGS,
51 REVLOG_DEFAULT_FLAGS,
51 REVLOG_DEFAULT_FORMAT,
52 REVLOG_DEFAULT_FORMAT,
52 REVLOG_DEFAULT_VERSION,
53 REVLOG_DEFAULT_VERSION,
53 )
54 )
54 from .revlogutils.flagutil import (
55 from .revlogutils.flagutil import (
55 REVIDX_DEFAULT_FLAGS,
56 REVIDX_DEFAULT_FLAGS,
56 REVIDX_ELLIPSIS,
57 REVIDX_ELLIPSIS,
57 REVIDX_EXTSTORED,
58 REVIDX_EXTSTORED,
58 REVIDX_FLAGS_ORDER,
59 REVIDX_FLAGS_ORDER,
59 REVIDX_HASCOPIESINFO,
60 REVIDX_HASCOPIESINFO,
60 REVIDX_ISCENSORED,
61 REVIDX_ISCENSORED,
61 REVIDX_RAWTEXT_CHANGING_FLAGS,
62 REVIDX_RAWTEXT_CHANGING_FLAGS,
62 REVIDX_SIDEDATA,
63 REVIDX_SIDEDATA,
63 )
64 )
64 from .thirdparty import attr
65 from .thirdparty import attr
65 from . import (
66 from . import (
66 ancestor,
67 ancestor,
67 dagop,
68 dagop,
68 error,
69 error,
69 mdiff,
70 mdiff,
70 policy,
71 policy,
71 pycompat,
72 pycompat,
72 templatefilters,
73 templatefilters,
73 util,
74 util,
74 )
75 )
75 from .interfaces import (
76 from .interfaces import (
76 repository,
77 repository,
77 util as interfaceutil,
78 util as interfaceutil,
78 )
79 )
79 from .revlogutils import (
80 from .revlogutils import (
80 deltas as deltautil,
81 deltas as deltautil,
81 flagutil,
82 flagutil,
82 nodemap as nodemaputil,
83 nodemap as nodemaputil,
83 sidedata as sidedatautil,
84 sidedata as sidedatautil,
84 )
85 )
85 from .utils import (
86 from .utils import (
86 storageutil,
87 storageutil,
87 stringutil,
88 stringutil,
88 )
89 )
89 from .pure import parsers as pureparsers
90 from .pure import parsers as pureparsers
90
91
91 # blanked usage of all the name to prevent pyflakes constraints
92 # blanked usage of all the name to prevent pyflakes constraints
92 # We need these name available in the module for extensions.
93 # We need these name available in the module for extensions.
93 REVLOGV0
94 REVLOGV0
94 REVLOGV1
95 REVLOGV1
95 REVLOGV2
96 REVLOGV2
96 FLAG_INLINE_DATA
97 FLAG_INLINE_DATA
97 FLAG_GENERALDELTA
98 FLAG_GENERALDELTA
98 REVLOG_DEFAULT_FLAGS
99 REVLOG_DEFAULT_FLAGS
99 REVLOG_DEFAULT_FORMAT
100 REVLOG_DEFAULT_FORMAT
100 REVLOG_DEFAULT_VERSION
101 REVLOG_DEFAULT_VERSION
101 REVLOGV1_FLAGS
102 REVLOGV1_FLAGS
102 REVLOGV2_FLAGS
103 REVLOGV2_FLAGS
103 REVIDX_ISCENSORED
104 REVIDX_ISCENSORED
104 REVIDX_ELLIPSIS
105 REVIDX_ELLIPSIS
105 REVIDX_SIDEDATA
106 REVIDX_SIDEDATA
106 REVIDX_HASCOPIESINFO
107 REVIDX_HASCOPIESINFO
107 REVIDX_EXTSTORED
108 REVIDX_EXTSTORED
108 REVIDX_DEFAULT_FLAGS
109 REVIDX_DEFAULT_FLAGS
109 REVIDX_FLAGS_ORDER
110 REVIDX_FLAGS_ORDER
110 REVIDX_RAWTEXT_CHANGING_FLAGS
111 REVIDX_RAWTEXT_CHANGING_FLAGS
111
112
112 parsers = policy.importmod('parsers')
113 parsers = policy.importmod('parsers')
113 rustancestor = policy.importrust('ancestor')
114 rustancestor = policy.importrust('ancestor')
114 rustdagop = policy.importrust('dagop')
115 rustdagop = policy.importrust('dagop')
115 rustrevlog = policy.importrust('revlog')
116 rustrevlog = policy.importrust('revlog')
116
117
117 # Aliased for performance.
118 # Aliased for performance.
118 _zlibdecompress = zlib.decompress
119 _zlibdecompress = zlib.decompress
119
120
120 # max size of revlog with inline data
121 # max size of revlog with inline data
121 _maxinline = 131072
122 _maxinline = 131072
122 _chunksize = 1048576
123 _chunksize = 1048576
123
124
124 # Flag processors for REVIDX_ELLIPSIS.
125 # Flag processors for REVIDX_ELLIPSIS.
125 def ellipsisreadprocessor(rl, text):
126 def ellipsisreadprocessor(rl, text):
126 return text, False
127 return text, False
127
128
128
129
129 def ellipsiswriteprocessor(rl, text):
130 def ellipsiswriteprocessor(rl, text):
130 return text, False
131 return text, False
131
132
132
133
133 def ellipsisrawprocessor(rl, text):
134 def ellipsisrawprocessor(rl, text):
134 return False
135 return False
135
136
136
137
137 ellipsisprocessor = (
138 ellipsisprocessor = (
138 ellipsisreadprocessor,
139 ellipsisreadprocessor,
139 ellipsiswriteprocessor,
140 ellipsiswriteprocessor,
140 ellipsisrawprocessor,
141 ellipsisrawprocessor,
141 )
142 )
142
143
143
144
144 def getoffset(q):
145 def getoffset(q):
145 return int(q >> 16)
146 return int(q >> 16)
146
147
147
148
148 def gettype(q):
149 def gettype(q):
149 return int(q & 0xFFFF)
150 return int(q & 0xFFFF)
150
151
151
152
152 def offset_type(offset, type):
153 def offset_type(offset, type):
153 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
154 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
154 raise ValueError(b'unknown revlog index flags')
155 raise ValueError(b'unknown revlog index flags')
155 return int(int(offset) << 16 | type)
156 return int(int(offset) << 16 | type)
156
157
157
158
158 def _verify_revision(rl, skipflags, state, node):
159 def _verify_revision(rl, skipflags, state, node):
159 """Verify the integrity of the given revlog ``node`` while providing a hook
160 """Verify the integrity of the given revlog ``node`` while providing a hook
160 point for extensions to influence the operation."""
161 point for extensions to influence the operation."""
161 if skipflags:
162 if skipflags:
162 state[b'skipread'].add(node)
163 state[b'skipread'].add(node)
163 else:
164 else:
164 # Side-effect: read content and verify hash.
165 # Side-effect: read content and verify hash.
165 rl.revision(node)
166 rl.revision(node)
166
167
167
168
168 # True if a fast implementation for persistent-nodemap is available
169 # True if a fast implementation for persistent-nodemap is available
169 #
170 #
170 # We also consider we have a "fast" implementation in "pure" python because
171 # We also consider we have a "fast" implementation in "pure" python because
171 # people using pure don't really have performance consideration (and a
172 # people using pure don't really have performance consideration (and a
172 # wheelbarrow of other slowness source)
173 # wheelbarrow of other slowness source)
173 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
174 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
174 parsers, 'BaseIndexObject'
175 parsers, 'BaseIndexObject'
175 )
176 )
176
177
177
178
178 @attr.s(slots=True, frozen=True)
179 @attr.s(slots=True, frozen=True)
179 class _revisioninfo(object):
180 class _revisioninfo(object):
180 """Information about a revision that allows building its fulltext
181 """Information about a revision that allows building its fulltext
181 node: expected hash of the revision
182 node: expected hash of the revision
182 p1, p2: parent revs of the revision
183 p1, p2: parent revs of the revision
183 btext: built text cache consisting of a one-element list
184 btext: built text cache consisting of a one-element list
184 cachedelta: (baserev, uncompressed_delta) or None
185 cachedelta: (baserev, uncompressed_delta) or None
185 flags: flags associated to the revision storage
186 flags: flags associated to the revision storage
186
187
187 One of btext[0] or cachedelta must be set.
188 One of btext[0] or cachedelta must be set.
188 """
189 """
189
190
190 node = attr.ib()
191 node = attr.ib()
191 p1 = attr.ib()
192 p1 = attr.ib()
192 p2 = attr.ib()
193 p2 = attr.ib()
193 btext = attr.ib()
194 btext = attr.ib()
194 textlen = attr.ib()
195 textlen = attr.ib()
195 cachedelta = attr.ib()
196 cachedelta = attr.ib()
196 flags = attr.ib()
197 flags = attr.ib()
197
198
198
199
199 @interfaceutil.implementer(repository.irevisiondelta)
200 @interfaceutil.implementer(repository.irevisiondelta)
200 @attr.s(slots=True)
201 @attr.s(slots=True)
201 class revlogrevisiondelta(object):
202 class revlogrevisiondelta(object):
202 node = attr.ib()
203 node = attr.ib()
203 p1node = attr.ib()
204 p1node = attr.ib()
204 p2node = attr.ib()
205 p2node = attr.ib()
205 basenode = attr.ib()
206 basenode = attr.ib()
206 flags = attr.ib()
207 flags = attr.ib()
207 baserevisionsize = attr.ib()
208 baserevisionsize = attr.ib()
208 revision = attr.ib()
209 revision = attr.ib()
209 delta = attr.ib()
210 delta = attr.ib()
210 sidedata = attr.ib()
211 sidedata = attr.ib()
211 linknode = attr.ib(default=None)
212 linknode = attr.ib(default=None)
212
213
213
214
214 @interfaceutil.implementer(repository.iverifyproblem)
215 @interfaceutil.implementer(repository.iverifyproblem)
215 @attr.s(frozen=True)
216 @attr.s(frozen=True)
216 class revlogproblem(object):
217 class revlogproblem(object):
217 warning = attr.ib(default=None)
218 warning = attr.ib(default=None)
218 error = attr.ib(default=None)
219 error = attr.ib(default=None)
219 node = attr.ib(default=None)
220 node = attr.ib(default=None)
220
221
221
222
222 class revlogoldindex(list):
223 class revlogoldindex(list):
223 @property
224 @property
224 def nodemap(self):
225 def nodemap(self):
225 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
226 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
226 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
227 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
227 return self._nodemap
228 return self._nodemap
228
229
229 @util.propertycache
230 @util.propertycache
230 def _nodemap(self):
231 def _nodemap(self):
231 nodemap = nodemaputil.NodeMap({nullid: nullrev})
232 nodemap = nodemaputil.NodeMap({nullid: nullrev})
232 for r in range(0, len(self)):
233 for r in range(0, len(self)):
233 n = self[r][7]
234 n = self[r][7]
234 nodemap[n] = r
235 nodemap[n] = r
235 return nodemap
236 return nodemap
236
237
237 def has_node(self, node):
238 def has_node(self, node):
238 """return True if the node exist in the index"""
239 """return True if the node exist in the index"""
239 return node in self._nodemap
240 return node in self._nodemap
240
241
241 def rev(self, node):
242 def rev(self, node):
242 """return a revision for a node
243 """return a revision for a node
243
244
244 If the node is unknown, raise a RevlogError"""
245 If the node is unknown, raise a RevlogError"""
245 return self._nodemap[node]
246 return self._nodemap[node]
246
247
247 def get_rev(self, node):
248 def get_rev(self, node):
248 """return a revision for a node
249 """return a revision for a node
249
250
250 If the node is unknown, return None"""
251 If the node is unknown, return None"""
251 return self._nodemap.get(node)
252 return self._nodemap.get(node)
252
253
253 def append(self, tup):
254 def append(self, tup):
254 self._nodemap[tup[7]] = len(self)
255 self._nodemap[tup[7]] = len(self)
255 super(revlogoldindex, self).append(tup)
256 super(revlogoldindex, self).append(tup)
256
257
257 def __delitem__(self, i):
258 def __delitem__(self, i):
258 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
259 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
259 raise ValueError(b"deleting slices only supports a:-1 with step 1")
260 raise ValueError(b"deleting slices only supports a:-1 with step 1")
260 for r in pycompat.xrange(i.start, len(self)):
261 for r in pycompat.xrange(i.start, len(self)):
261 del self._nodemap[self[r][7]]
262 del self._nodemap[self[r][7]]
262 super(revlogoldindex, self).__delitem__(i)
263 super(revlogoldindex, self).__delitem__(i)
263
264
264 def clearcaches(self):
265 def clearcaches(self):
265 self.__dict__.pop('_nodemap', None)
266 self.__dict__.pop('_nodemap', None)
266
267
267 def __getitem__(self, i):
268 def __getitem__(self, i):
268 if i == -1:
269 if i == -1:
269 return (0, 0, 0, -1, -1, -1, -1, nullid)
270 return (0, 0, 0, -1, -1, -1, -1, nullid)
270 return list.__getitem__(self, i)
271 return list.__getitem__(self, i)
271
272
272
273
273 class revlogoldio(object):
274 class revlogoldio(object):
274 def __init__(self):
275 def __init__(self):
275 self.size = INDEX_ENTRY_V0.size
276 self.size = INDEX_ENTRY_V0.size
276
277
277 def parseindex(self, data, inline):
278 def parseindex(self, data, inline):
278 s = self.size
279 s = self.size
279 index = []
280 index = []
280 nodemap = nodemaputil.NodeMap({nullid: nullrev})
281 nodemap = nodemaputil.NodeMap({nullid: nullrev})
281 n = off = 0
282 n = off = 0
282 l = len(data)
283 l = len(data)
283 while off + s <= l:
284 while off + s <= l:
284 cur = data[off : off + s]
285 cur = data[off : off + s]
285 off += s
286 off += s
286 e = INDEX_ENTRY_V0.unpack(cur)
287 e = INDEX_ENTRY_V0.unpack(cur)
287 # transform to revlogv1 format
288 # transform to revlogv1 format
288 e2 = (
289 e2 = (
289 offset_type(e[0], 0),
290 offset_type(e[0], 0),
290 e[1],
291 e[1],
291 -1,
292 -1,
292 e[2],
293 e[2],
293 e[3],
294 e[3],
294 nodemap.get(e[4], nullrev),
295 nodemap.get(e[4], nullrev),
295 nodemap.get(e[5], nullrev),
296 nodemap.get(e[5], nullrev),
296 e[6],
297 e[6],
297 )
298 )
298 index.append(e2)
299 index.append(e2)
299 nodemap[e[6]] = n
300 nodemap[e[6]] = n
300 n += 1
301 n += 1
301
302
302 index = revlogoldindex(index)
303 index = revlogoldindex(index)
303 return index, None
304 return index, None
304
305
305 def packentry(self, entry, node, version, rev):
306 def packentry(self, entry, node, version, rev):
306 """return the binary representation of an entry
307 """return the binary representation of an entry
307
308
308 entry: a tuple containing all the values (see index.__getitem__)
309 entry: a tuple containing all the values (see index.__getitem__)
309 node: a callback to convert a revision to nodeid
310 node: a callback to convert a revision to nodeid
310 version: the changelog version
311 version: the changelog version
311 rev: the revision number
312 rev: the revision number
312 """
313 """
313 if gettype(entry[0]):
314 if gettype(entry[0]):
314 raise error.RevlogError(
315 raise error.RevlogError(
315 _(b'index entry flags need revlog version 1')
316 _(b'index entry flags need revlog version 1')
316 )
317 )
317 e2 = (
318 e2 = (
318 getoffset(entry[0]),
319 getoffset(entry[0]),
319 entry[1],
320 entry[1],
320 entry[3],
321 entry[3],
321 entry[4],
322 entry[4],
322 node(entry[5]),
323 node(entry[5]),
323 node(entry[6]),
324 node(entry[6]),
324 entry[7],
325 entry[7],
325 )
326 )
326 return INDEX_ENTRY_V0.pack(*e2)
327 return INDEX_ENTRY_V0.pack(*e2)
327
328
328
329
329 # index ng:
330 # 6 bytes: offset
331 # 2 bytes: flags
332 # 4 bytes: compressed length
333 # 4 bytes: uncompressed length
334 # 4 bytes: base rev
335 # 4 bytes: link rev
336 # 4 bytes: parent 1 rev
337 # 4 bytes: parent 2 rev
338 # 32 bytes: nodeid
339 indexformatng = struct.Struct(b">Qiiiiii20s12x")
340 indexformatng_pack = indexformatng.pack
341 versionformat = struct.Struct(b">I")
330 versionformat = struct.Struct(b">I")
342 versionformat_pack = versionformat.pack
331 versionformat_pack = versionformat.pack
343 versionformat_unpack = versionformat.unpack
332 versionformat_unpack = versionformat.unpack
344
333
345 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
346 # signed integer)
335 # signed integer)
347 _maxentrysize = 0x7FFFFFFF
336 _maxentrysize = 0x7FFFFFFF
348
337
349
338
350 class revlogio(object):
339 class revlogio(object):
351 def __init__(self):
340 def __init__(self):
352 self.size = indexformatng.size
341 self.size = INDEX_ENTRY_V1.size
353
342
354 def parseindex(self, data, inline):
343 def parseindex(self, data, inline):
355 # call the C implementation to parse the index data
344 # call the C implementation to parse the index data
356 index, cache = parsers.parse_index2(data, inline)
345 index, cache = parsers.parse_index2(data, inline)
357 return index, cache
346 return index, cache
358
347
359 def packentry(self, entry, node, version, rev):
348 def packentry(self, entry, node, version, rev):
360 p = indexformatng_pack(*entry)
349 p = INDEX_ENTRY_V1.pack(*entry)
361 if rev == 0:
350 if rev == 0:
362 p = versionformat_pack(version) + p[4:]
351 p = versionformat_pack(version) + p[4:]
363 return p
352 return p
364
353
365
354
366 indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format)
355 indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format)
367 indexformatv2_pack = indexformatv2.pack
356 indexformatv2_pack = indexformatv2.pack
368
357
369
358
370 class revlogv2io(object):
359 class revlogv2io(object):
371 def __init__(self):
360 def __init__(self):
372 self.size = indexformatv2.size
361 self.size = indexformatv2.size
373
362
374 def parseindex(self, data, inline):
363 def parseindex(self, data, inline):
375 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
364 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
376 return index, cache
365 return index, cache
377
366
378 def packentry(self, entry, node, version, rev):
367 def packentry(self, entry, node, version, rev):
379 p = indexformatv2_pack(*entry)
368 p = indexformatv2_pack(*entry)
380 if rev == 0:
369 if rev == 0:
381 p = versionformat_pack(version) + p[4:]
370 p = versionformat_pack(version) + p[4:]
382 return p
371 return p
383
372
384
373
385 NodemapRevlogIO = None
374 NodemapRevlogIO = None
386
375
387 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
376 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
388
377
389 class NodemapRevlogIO(revlogio):
378 class NodemapRevlogIO(revlogio):
390 """A debug oriented IO class that return a PersistentNodeMapIndexObject
379 """A debug oriented IO class that return a PersistentNodeMapIndexObject
391
380
392 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
381 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
393 """
382 """
394
383
395 def parseindex(self, data, inline):
384 def parseindex(self, data, inline):
396 index, cache = parsers.parse_index_devel_nodemap(data, inline)
385 index, cache = parsers.parse_index_devel_nodemap(data, inline)
397 return index, cache
386 return index, cache
398
387
399
388
400 class rustrevlogio(revlogio):
389 class rustrevlogio(revlogio):
401 def parseindex(self, data, inline):
390 def parseindex(self, data, inline):
402 index, cache = super(rustrevlogio, self).parseindex(data, inline)
391 index, cache = super(rustrevlogio, self).parseindex(data, inline)
403 return rustrevlog.MixedIndex(index), cache
392 return rustrevlog.MixedIndex(index), cache
404
393
405
394
406 class revlog(object):
395 class revlog(object):
407 """
396 """
408 the underlying revision storage object
397 the underlying revision storage object
409
398
410 A revlog consists of two parts, an index and the revision data.
399 A revlog consists of two parts, an index and the revision data.
411
400
412 The index is a file with a fixed record size containing
401 The index is a file with a fixed record size containing
413 information on each revision, including its nodeid (hash), the
402 information on each revision, including its nodeid (hash), the
414 nodeids of its parents, the position and offset of its data within
403 nodeids of its parents, the position and offset of its data within
415 the data file, and the revision it's based on. Finally, each entry
404 the data file, and the revision it's based on. Finally, each entry
416 contains a linkrev entry that can serve as a pointer to external
405 contains a linkrev entry that can serve as a pointer to external
417 data.
406 data.
418
407
419 The revision data itself is a linear collection of data chunks.
408 The revision data itself is a linear collection of data chunks.
420 Each chunk represents a revision and is usually represented as a
409 Each chunk represents a revision and is usually represented as a
421 delta against the previous chunk. To bound lookup time, runs of
410 delta against the previous chunk. To bound lookup time, runs of
422 deltas are limited to about 2 times the length of the original
411 deltas are limited to about 2 times the length of the original
423 version data. This makes retrieval of a version proportional to
412 version data. This makes retrieval of a version proportional to
424 its size, or O(1) relative to the number of revisions.
413 its size, or O(1) relative to the number of revisions.
425
414
426 Both pieces of the revlog are written to in an append-only
415 Both pieces of the revlog are written to in an append-only
427 fashion, which means we never need to rewrite a file to insert or
416 fashion, which means we never need to rewrite a file to insert or
428 remove data, and can use some simple techniques to avoid the need
417 remove data, and can use some simple techniques to avoid the need
429 for locking while reading.
418 for locking while reading.
430
419
431 If checkambig, indexfile is opened with checkambig=True at
420 If checkambig, indexfile is opened with checkambig=True at
432 writing, to avoid file stat ambiguity.
421 writing, to avoid file stat ambiguity.
433
422
434 If mmaplargeindex is True, and an mmapindexthreshold is set, the
423 If mmaplargeindex is True, and an mmapindexthreshold is set, the
435 index will be mmapped rather than read if it is larger than the
424 index will be mmapped rather than read if it is larger than the
436 configured threshold.
425 configured threshold.
437
426
438 If censorable is True, the revlog can have censored revisions.
427 If censorable is True, the revlog can have censored revisions.
439
428
440 If `upperboundcomp` is not None, this is the expected maximal gain from
429 If `upperboundcomp` is not None, this is the expected maximal gain from
441 compression for the data content.
430 compression for the data content.
442
431
443 `concurrencychecker` is an optional function that receives 3 arguments: a
432 `concurrencychecker` is an optional function that receives 3 arguments: a
444 file handle, a filename, and an expected position. It should check whether
433 file handle, a filename, and an expected position. It should check whether
445 the current position in the file handle is valid, and log/warn/fail (by
434 the current position in the file handle is valid, and log/warn/fail (by
446 raising).
435 raising).
447 """
436 """
448
437
449 _flagserrorclass = error.RevlogError
438 _flagserrorclass = error.RevlogError
450
439
451 def __init__(
440 def __init__(
452 self,
441 self,
453 opener,
442 opener,
454 indexfile,
443 indexfile,
455 datafile=None,
444 datafile=None,
456 checkambig=False,
445 checkambig=False,
457 mmaplargeindex=False,
446 mmaplargeindex=False,
458 censorable=False,
447 censorable=False,
459 upperboundcomp=None,
448 upperboundcomp=None,
460 persistentnodemap=False,
449 persistentnodemap=False,
461 concurrencychecker=None,
450 concurrencychecker=None,
462 ):
451 ):
463 """
452 """
464 create a revlog object
453 create a revlog object
465
454
466 opener is a function that abstracts the file opening operation
455 opener is a function that abstracts the file opening operation
467 and can be used to implement COW semantics or the like.
456 and can be used to implement COW semantics or the like.
468
457
469 """
458 """
470 self.upperboundcomp = upperboundcomp
459 self.upperboundcomp = upperboundcomp
471 self.indexfile = indexfile
460 self.indexfile = indexfile
472 self.datafile = datafile or (indexfile[:-2] + b".d")
461 self.datafile = datafile or (indexfile[:-2] + b".d")
473 self.nodemap_file = None
462 self.nodemap_file = None
474 if persistentnodemap:
463 if persistentnodemap:
475 self.nodemap_file = nodemaputil.get_nodemap_file(
464 self.nodemap_file = nodemaputil.get_nodemap_file(
476 opener, self.indexfile
465 opener, self.indexfile
477 )
466 )
478
467
479 self.opener = opener
468 self.opener = opener
480 # When True, indexfile is opened with checkambig=True at writing, to
469 # When True, indexfile is opened with checkambig=True at writing, to
481 # avoid file stat ambiguity.
470 # avoid file stat ambiguity.
482 self._checkambig = checkambig
471 self._checkambig = checkambig
483 self._mmaplargeindex = mmaplargeindex
472 self._mmaplargeindex = mmaplargeindex
484 self._censorable = censorable
473 self._censorable = censorable
485 # 3-tuple of (node, rev, text) for a raw revision.
474 # 3-tuple of (node, rev, text) for a raw revision.
486 self._revisioncache = None
475 self._revisioncache = None
487 # Maps rev to chain base rev.
476 # Maps rev to chain base rev.
488 self._chainbasecache = util.lrucachedict(100)
477 self._chainbasecache = util.lrucachedict(100)
489 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
478 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
490 self._chunkcache = (0, b'')
479 self._chunkcache = (0, b'')
491 # How much data to read and cache into the raw revlog data cache.
480 # How much data to read and cache into the raw revlog data cache.
492 self._chunkcachesize = 65536
481 self._chunkcachesize = 65536
493 self._maxchainlen = None
482 self._maxchainlen = None
494 self._deltabothparents = True
483 self._deltabothparents = True
495 self.index = None
484 self.index = None
496 self._nodemap_docket = None
485 self._nodemap_docket = None
497 # Mapping of partial identifiers to full nodes.
486 # Mapping of partial identifiers to full nodes.
498 self._pcache = {}
487 self._pcache = {}
499 # Mapping of revision integer to full node.
488 # Mapping of revision integer to full node.
500 self._compengine = b'zlib'
489 self._compengine = b'zlib'
501 self._compengineopts = {}
490 self._compengineopts = {}
502 self._maxdeltachainspan = -1
491 self._maxdeltachainspan = -1
503 self._withsparseread = False
492 self._withsparseread = False
504 self._sparserevlog = False
493 self._sparserevlog = False
505 self._srdensitythreshold = 0.50
494 self._srdensitythreshold = 0.50
506 self._srmingapsize = 262144
495 self._srmingapsize = 262144
507
496
508 # Make copy of flag processors so each revlog instance can support
497 # Make copy of flag processors so each revlog instance can support
509 # custom flags.
498 # custom flags.
510 self._flagprocessors = dict(flagutil.flagprocessors)
499 self._flagprocessors = dict(flagutil.flagprocessors)
511
500
512 # 2-tuple of file handles being used for active writing.
501 # 2-tuple of file handles being used for active writing.
513 self._writinghandles = None
502 self._writinghandles = None
514
503
515 self._loadindex()
504 self._loadindex()
516
505
517 self._concurrencychecker = concurrencychecker
506 self._concurrencychecker = concurrencychecker
518
507
519 def _loadindex(self):
508 def _loadindex(self):
520 mmapindexthreshold = None
509 mmapindexthreshold = None
521 opts = self.opener.options
510 opts = self.opener.options
522
511
523 if b'revlogv2' in opts:
512 if b'revlogv2' in opts:
524 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
513 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
525 elif b'revlogv1' in opts:
514 elif b'revlogv1' in opts:
526 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
515 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
527 if b'generaldelta' in opts:
516 if b'generaldelta' in opts:
528 newversionflags |= FLAG_GENERALDELTA
517 newversionflags |= FLAG_GENERALDELTA
529 elif b'revlogv0' in self.opener.options:
518 elif b'revlogv0' in self.opener.options:
530 newversionflags = REVLOGV0
519 newversionflags = REVLOGV0
531 else:
520 else:
532 newversionflags = REVLOG_DEFAULT_VERSION
521 newversionflags = REVLOG_DEFAULT_VERSION
533
522
534 if b'chunkcachesize' in opts:
523 if b'chunkcachesize' in opts:
535 self._chunkcachesize = opts[b'chunkcachesize']
524 self._chunkcachesize = opts[b'chunkcachesize']
536 if b'maxchainlen' in opts:
525 if b'maxchainlen' in opts:
537 self._maxchainlen = opts[b'maxchainlen']
526 self._maxchainlen = opts[b'maxchainlen']
538 if b'deltabothparents' in opts:
527 if b'deltabothparents' in opts:
539 self._deltabothparents = opts[b'deltabothparents']
528 self._deltabothparents = opts[b'deltabothparents']
540 self._lazydelta = bool(opts.get(b'lazydelta', True))
529 self._lazydelta = bool(opts.get(b'lazydelta', True))
541 self._lazydeltabase = False
530 self._lazydeltabase = False
542 if self._lazydelta:
531 if self._lazydelta:
543 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
532 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
544 if b'compengine' in opts:
533 if b'compengine' in opts:
545 self._compengine = opts[b'compengine']
534 self._compengine = opts[b'compengine']
546 if b'zlib.level' in opts:
535 if b'zlib.level' in opts:
547 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
536 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
548 if b'zstd.level' in opts:
537 if b'zstd.level' in opts:
549 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
538 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
550 if b'maxdeltachainspan' in opts:
539 if b'maxdeltachainspan' in opts:
551 self._maxdeltachainspan = opts[b'maxdeltachainspan']
540 self._maxdeltachainspan = opts[b'maxdeltachainspan']
552 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
541 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
553 mmapindexthreshold = opts[b'mmapindexthreshold']
542 mmapindexthreshold = opts[b'mmapindexthreshold']
554 self.hassidedata = bool(opts.get(b'side-data', False))
543 self.hassidedata = bool(opts.get(b'side-data', False))
555 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
544 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
556 withsparseread = bool(opts.get(b'with-sparse-read', False))
545 withsparseread = bool(opts.get(b'with-sparse-read', False))
557 # sparse-revlog forces sparse-read
546 # sparse-revlog forces sparse-read
558 self._withsparseread = self._sparserevlog or withsparseread
547 self._withsparseread = self._sparserevlog or withsparseread
559 if b'sparse-read-density-threshold' in opts:
548 if b'sparse-read-density-threshold' in opts:
560 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
549 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
561 if b'sparse-read-min-gap-size' in opts:
550 if b'sparse-read-min-gap-size' in opts:
562 self._srmingapsize = opts[b'sparse-read-min-gap-size']
551 self._srmingapsize = opts[b'sparse-read-min-gap-size']
563 if opts.get(b'enableellipsis'):
552 if opts.get(b'enableellipsis'):
564 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
553 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
565
554
566 # revlog v0 doesn't have flag processors
555 # revlog v0 doesn't have flag processors
567 for flag, processor in pycompat.iteritems(
556 for flag, processor in pycompat.iteritems(
568 opts.get(b'flagprocessors', {})
557 opts.get(b'flagprocessors', {})
569 ):
558 ):
570 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
559 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
571
560
572 if self._chunkcachesize <= 0:
561 if self._chunkcachesize <= 0:
573 raise error.RevlogError(
562 raise error.RevlogError(
574 _(b'revlog chunk cache size %r is not greater than 0')
563 _(b'revlog chunk cache size %r is not greater than 0')
575 % self._chunkcachesize
564 % self._chunkcachesize
576 )
565 )
577 elif self._chunkcachesize & (self._chunkcachesize - 1):
566 elif self._chunkcachesize & (self._chunkcachesize - 1):
578 raise error.RevlogError(
567 raise error.RevlogError(
579 _(b'revlog chunk cache size %r is not a power of 2')
568 _(b'revlog chunk cache size %r is not a power of 2')
580 % self._chunkcachesize
569 % self._chunkcachesize
581 )
570 )
582
571
583 indexdata = b''
572 indexdata = b''
584 self._initempty = True
573 self._initempty = True
585 try:
574 try:
586 with self._indexfp() as f:
575 with self._indexfp() as f:
587 if (
576 if (
588 mmapindexthreshold is not None
577 mmapindexthreshold is not None
589 and self.opener.fstat(f).st_size >= mmapindexthreshold
578 and self.opener.fstat(f).st_size >= mmapindexthreshold
590 ):
579 ):
591 # TODO: should .close() to release resources without
580 # TODO: should .close() to release resources without
592 # relying on Python GC
581 # relying on Python GC
593 indexdata = util.buffer(util.mmapread(f))
582 indexdata = util.buffer(util.mmapread(f))
594 else:
583 else:
595 indexdata = f.read()
584 indexdata = f.read()
596 if len(indexdata) > 0:
585 if len(indexdata) > 0:
597 versionflags = versionformat_unpack(indexdata[:4])[0]
586 versionflags = versionformat_unpack(indexdata[:4])[0]
598 self._initempty = False
587 self._initempty = False
599 else:
588 else:
600 versionflags = newversionflags
589 versionflags = newversionflags
601 except IOError as inst:
590 except IOError as inst:
602 if inst.errno != errno.ENOENT:
591 if inst.errno != errno.ENOENT:
603 raise
592 raise
604
593
605 versionflags = newversionflags
594 versionflags = newversionflags
606
595
607 self.version = versionflags
596 self.version = versionflags
608
597
609 flags = versionflags & ~0xFFFF
598 flags = versionflags & ~0xFFFF
610 fmt = versionflags & 0xFFFF
599 fmt = versionflags & 0xFFFF
611
600
612 if fmt == REVLOGV0:
601 if fmt == REVLOGV0:
613 if flags:
602 if flags:
614 raise error.RevlogError(
603 raise error.RevlogError(
615 _(b'unknown flags (%#04x) in version %d revlog %s')
604 _(b'unknown flags (%#04x) in version %d revlog %s')
616 % (flags >> 16, fmt, self.indexfile)
605 % (flags >> 16, fmt, self.indexfile)
617 )
606 )
618
607
619 self._inline = False
608 self._inline = False
620 self._generaldelta = False
609 self._generaldelta = False
621
610
622 elif fmt == REVLOGV1:
611 elif fmt == REVLOGV1:
623 if flags & ~REVLOGV1_FLAGS:
612 if flags & ~REVLOGV1_FLAGS:
624 raise error.RevlogError(
613 raise error.RevlogError(
625 _(b'unknown flags (%#04x) in version %d revlog %s')
614 _(b'unknown flags (%#04x) in version %d revlog %s')
626 % (flags >> 16, fmt, self.indexfile)
615 % (flags >> 16, fmt, self.indexfile)
627 )
616 )
628
617
629 self._inline = versionflags & FLAG_INLINE_DATA
618 self._inline = versionflags & FLAG_INLINE_DATA
630 self._generaldelta = versionflags & FLAG_GENERALDELTA
619 self._generaldelta = versionflags & FLAG_GENERALDELTA
631
620
632 elif fmt == REVLOGV2:
621 elif fmt == REVLOGV2:
633 if flags & ~REVLOGV2_FLAGS:
622 if flags & ~REVLOGV2_FLAGS:
634 raise error.RevlogError(
623 raise error.RevlogError(
635 _(b'unknown flags (%#04x) in version %d revlog %s')
624 _(b'unknown flags (%#04x) in version %d revlog %s')
636 % (flags >> 16, fmt, self.indexfile)
625 % (flags >> 16, fmt, self.indexfile)
637 )
626 )
638
627
639 # There is a bug in the transaction handling when going from an
628 # There is a bug in the transaction handling when going from an
640 # inline revlog to a separate index and data file. Turn it off until
629 # inline revlog to a separate index and data file. Turn it off until
641 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
630 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
642 # See issue6485
631 # See issue6485
643 self._inline = False
632 self._inline = False
644 # generaldelta implied by version 2 revlogs.
633 # generaldelta implied by version 2 revlogs.
645 self._generaldelta = True
634 self._generaldelta = True
646
635
647 else:
636 else:
648 raise error.RevlogError(
637 raise error.RevlogError(
649 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
638 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
650 )
639 )
651
640
652 self.nodeconstants = sha1nodeconstants
641 self.nodeconstants = sha1nodeconstants
653 self.nullid = self.nodeconstants.nullid
642 self.nullid = self.nodeconstants.nullid
654
643
655 # sparse-revlog can't be on without general-delta (issue6056)
644 # sparse-revlog can't be on without general-delta (issue6056)
656 if not self._generaldelta:
645 if not self._generaldelta:
657 self._sparserevlog = False
646 self._sparserevlog = False
658
647
659 self._storedeltachains = True
648 self._storedeltachains = True
660
649
661 devel_nodemap = (
650 devel_nodemap = (
662 self.nodemap_file
651 self.nodemap_file
663 and opts.get(b'devel-force-nodemap', False)
652 and opts.get(b'devel-force-nodemap', False)
664 and NodemapRevlogIO is not None
653 and NodemapRevlogIO is not None
665 )
654 )
666
655
667 use_rust_index = False
656 use_rust_index = False
668 if rustrevlog is not None:
657 if rustrevlog is not None:
669 if self.nodemap_file is not None:
658 if self.nodemap_file is not None:
670 use_rust_index = True
659 use_rust_index = True
671 else:
660 else:
672 use_rust_index = self.opener.options.get(b'rust.index')
661 use_rust_index = self.opener.options.get(b'rust.index')
673
662
674 self._io = revlogio()
663 self._io = revlogio()
675 if self.version == REVLOGV0:
664 if self.version == REVLOGV0:
676 self._io = revlogoldio()
665 self._io = revlogoldio()
677 elif fmt == REVLOGV2:
666 elif fmt == REVLOGV2:
678 self._io = revlogv2io()
667 self._io = revlogv2io()
679 elif devel_nodemap:
668 elif devel_nodemap:
680 self._io = NodemapRevlogIO()
669 self._io = NodemapRevlogIO()
681 elif use_rust_index:
670 elif use_rust_index:
682 self._io = rustrevlogio()
671 self._io = rustrevlogio()
683 try:
672 try:
684 d = self._io.parseindex(indexdata, self._inline)
673 d = self._io.parseindex(indexdata, self._inline)
685 index, _chunkcache = d
674 index, _chunkcache = d
686 use_nodemap = (
675 use_nodemap = (
687 not self._inline
676 not self._inline
688 and self.nodemap_file is not None
677 and self.nodemap_file is not None
689 and util.safehasattr(index, 'update_nodemap_data')
678 and util.safehasattr(index, 'update_nodemap_data')
690 )
679 )
691 if use_nodemap:
680 if use_nodemap:
692 nodemap_data = nodemaputil.persisted_data(self)
681 nodemap_data = nodemaputil.persisted_data(self)
693 if nodemap_data is not None:
682 if nodemap_data is not None:
694 docket = nodemap_data[0]
683 docket = nodemap_data[0]
695 if (
684 if (
696 len(d[0]) > docket.tip_rev
685 len(d[0]) > docket.tip_rev
697 and d[0][docket.tip_rev][7] == docket.tip_node
686 and d[0][docket.tip_rev][7] == docket.tip_node
698 ):
687 ):
699 # no changelog tampering
688 # no changelog tampering
700 self._nodemap_docket = docket
689 self._nodemap_docket = docket
701 index.update_nodemap_data(*nodemap_data)
690 index.update_nodemap_data(*nodemap_data)
702 except (ValueError, IndexError):
691 except (ValueError, IndexError):
703 raise error.RevlogError(
692 raise error.RevlogError(
704 _(b"index %s is corrupted") % self.indexfile
693 _(b"index %s is corrupted") % self.indexfile
705 )
694 )
706 self.index, self._chunkcache = d
695 self.index, self._chunkcache = d
707 if not self._chunkcache:
696 if not self._chunkcache:
708 self._chunkclear()
697 self._chunkclear()
709 # revnum -> (chain-length, sum-delta-length)
698 # revnum -> (chain-length, sum-delta-length)
710 self._chaininfocache = util.lrucachedict(500)
699 self._chaininfocache = util.lrucachedict(500)
711 # revlog header -> revlog compressor
700 # revlog header -> revlog compressor
712 self._decompressors = {}
701 self._decompressors = {}
713
702
714 @util.propertycache
703 @util.propertycache
715 def _compressor(self):
704 def _compressor(self):
716 engine = util.compengines[self._compengine]
705 engine = util.compengines[self._compengine]
717 return engine.revlogcompressor(self._compengineopts)
706 return engine.revlogcompressor(self._compengineopts)
718
707
719 def _indexfp(self, mode=b'r'):
708 def _indexfp(self, mode=b'r'):
720 """file object for the revlog's index file"""
709 """file object for the revlog's index file"""
721 args = {'mode': mode}
710 args = {'mode': mode}
722 if mode != b'r':
711 if mode != b'r':
723 args['checkambig'] = self._checkambig
712 args['checkambig'] = self._checkambig
724 if mode == b'w':
713 if mode == b'w':
725 args['atomictemp'] = True
714 args['atomictemp'] = True
726 return self.opener(self.indexfile, **args)
715 return self.opener(self.indexfile, **args)
727
716
728 def _datafp(self, mode=b'r'):
717 def _datafp(self, mode=b'r'):
729 """file object for the revlog's data file"""
718 """file object for the revlog's data file"""
730 return self.opener(self.datafile, mode=mode)
719 return self.opener(self.datafile, mode=mode)
731
720
732 @contextlib.contextmanager
721 @contextlib.contextmanager
733 def _datareadfp(self, existingfp=None):
722 def _datareadfp(self, existingfp=None):
734 """file object suitable to read data"""
723 """file object suitable to read data"""
735 # Use explicit file handle, if given.
724 # Use explicit file handle, if given.
736 if existingfp is not None:
725 if existingfp is not None:
737 yield existingfp
726 yield existingfp
738
727
739 # Use a file handle being actively used for writes, if available.
728 # Use a file handle being actively used for writes, if available.
740 # There is some danger to doing this because reads will seek the
729 # There is some danger to doing this because reads will seek the
741 # file. However, _writeentry() performs a SEEK_END before all writes,
730 # file. However, _writeentry() performs a SEEK_END before all writes,
742 # so we should be safe.
731 # so we should be safe.
743 elif self._writinghandles:
732 elif self._writinghandles:
744 if self._inline:
733 if self._inline:
745 yield self._writinghandles[0]
734 yield self._writinghandles[0]
746 else:
735 else:
747 yield self._writinghandles[1]
736 yield self._writinghandles[1]
748
737
749 # Otherwise open a new file handle.
738 # Otherwise open a new file handle.
750 else:
739 else:
751 if self._inline:
740 if self._inline:
752 func = self._indexfp
741 func = self._indexfp
753 else:
742 else:
754 func = self._datafp
743 func = self._datafp
755 with func() as fp:
744 with func() as fp:
756 yield fp
745 yield fp
757
746
758 def tiprev(self):
747 def tiprev(self):
759 return len(self.index) - 1
748 return len(self.index) - 1
760
749
761 def tip(self):
750 def tip(self):
762 return self.node(self.tiprev())
751 return self.node(self.tiprev())
763
752
764 def __contains__(self, rev):
753 def __contains__(self, rev):
765 return 0 <= rev < len(self)
754 return 0 <= rev < len(self)
766
755
767 def __len__(self):
756 def __len__(self):
768 return len(self.index)
757 return len(self.index)
769
758
770 def __iter__(self):
759 def __iter__(self):
771 return iter(pycompat.xrange(len(self)))
760 return iter(pycompat.xrange(len(self)))
772
761
773 def revs(self, start=0, stop=None):
762 def revs(self, start=0, stop=None):
774 """iterate over all rev in this revlog (from start to stop)"""
763 """iterate over all rev in this revlog (from start to stop)"""
775 return storageutil.iterrevs(len(self), start=start, stop=stop)
764 return storageutil.iterrevs(len(self), start=start, stop=stop)
776
765
777 @property
766 @property
778 def nodemap(self):
767 def nodemap(self):
779 msg = (
768 msg = (
780 b"revlog.nodemap is deprecated, "
769 b"revlog.nodemap is deprecated, "
781 b"use revlog.index.[has_node|rev|get_rev]"
770 b"use revlog.index.[has_node|rev|get_rev]"
782 )
771 )
783 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
772 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
784 return self.index.nodemap
773 return self.index.nodemap
785
774
786 @property
775 @property
787 def _nodecache(self):
776 def _nodecache(self):
788 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
777 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
789 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
778 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
790 return self.index.nodemap
779 return self.index.nodemap
791
780
792 def hasnode(self, node):
781 def hasnode(self, node):
793 try:
782 try:
794 self.rev(node)
783 self.rev(node)
795 return True
784 return True
796 except KeyError:
785 except KeyError:
797 return False
786 return False
798
787
799 def candelta(self, baserev, rev):
788 def candelta(self, baserev, rev):
800 """whether two revisions (baserev, rev) can be delta-ed or not"""
789 """whether two revisions (baserev, rev) can be delta-ed or not"""
801 # Disable delta if either rev requires a content-changing flag
790 # Disable delta if either rev requires a content-changing flag
802 # processor (ex. LFS). This is because such flag processor can alter
791 # processor (ex. LFS). This is because such flag processor can alter
803 # the rawtext content that the delta will be based on, and two clients
792 # the rawtext content that the delta will be based on, and two clients
804 # could have a same revlog node with different flags (i.e. different
793 # could have a same revlog node with different flags (i.e. different
805 # rawtext contents) and the delta could be incompatible.
794 # rawtext contents) and the delta could be incompatible.
806 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
795 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
807 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
796 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
808 ):
797 ):
809 return False
798 return False
810 return True
799 return True
811
800
812 def update_caches(self, transaction):
801 def update_caches(self, transaction):
813 if self.nodemap_file is not None:
802 if self.nodemap_file is not None:
814 if transaction is None:
803 if transaction is None:
815 nodemaputil.update_persistent_nodemap(self)
804 nodemaputil.update_persistent_nodemap(self)
816 else:
805 else:
817 nodemaputil.setup_persistent_nodemap(transaction, self)
806 nodemaputil.setup_persistent_nodemap(transaction, self)
818
807
819 def clearcaches(self):
808 def clearcaches(self):
820 self._revisioncache = None
809 self._revisioncache = None
821 self._chainbasecache.clear()
810 self._chainbasecache.clear()
822 self._chunkcache = (0, b'')
811 self._chunkcache = (0, b'')
823 self._pcache = {}
812 self._pcache = {}
824 self._nodemap_docket = None
813 self._nodemap_docket = None
825 self.index.clearcaches()
814 self.index.clearcaches()
826 # The python code is the one responsible for validating the docket, we
815 # The python code is the one responsible for validating the docket, we
827 # end up having to refresh it here.
816 # end up having to refresh it here.
828 use_nodemap = (
817 use_nodemap = (
829 not self._inline
818 not self._inline
830 and self.nodemap_file is not None
819 and self.nodemap_file is not None
831 and util.safehasattr(self.index, 'update_nodemap_data')
820 and util.safehasattr(self.index, 'update_nodemap_data')
832 )
821 )
833 if use_nodemap:
822 if use_nodemap:
834 nodemap_data = nodemaputil.persisted_data(self)
823 nodemap_data = nodemaputil.persisted_data(self)
835 if nodemap_data is not None:
824 if nodemap_data is not None:
836 self._nodemap_docket = nodemap_data[0]
825 self._nodemap_docket = nodemap_data[0]
837 self.index.update_nodemap_data(*nodemap_data)
826 self.index.update_nodemap_data(*nodemap_data)
838
827
839 def rev(self, node):
828 def rev(self, node):
840 try:
829 try:
841 return self.index.rev(node)
830 return self.index.rev(node)
842 except TypeError:
831 except TypeError:
843 raise
832 raise
844 except error.RevlogError:
833 except error.RevlogError:
845 # parsers.c radix tree lookup failed
834 # parsers.c radix tree lookup failed
846 if node == wdirid or node in wdirfilenodeids:
835 if node == wdirid or node in wdirfilenodeids:
847 raise error.WdirUnsupported
836 raise error.WdirUnsupported
848 raise error.LookupError(node, self.indexfile, _(b'no node'))
837 raise error.LookupError(node, self.indexfile, _(b'no node'))
849
838
850 # Accessors for index entries.
839 # Accessors for index entries.
851
840
852 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
841 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
853 # are flags.
842 # are flags.
854 def start(self, rev):
843 def start(self, rev):
855 return int(self.index[rev][0] >> 16)
844 return int(self.index[rev][0] >> 16)
856
845
857 def flags(self, rev):
846 def flags(self, rev):
858 return self.index[rev][0] & 0xFFFF
847 return self.index[rev][0] & 0xFFFF
859
848
860 def length(self, rev):
849 def length(self, rev):
861 return self.index[rev][1]
850 return self.index[rev][1]
862
851
863 def sidedata_length(self, rev):
852 def sidedata_length(self, rev):
864 if self.version & 0xFFFF != REVLOGV2:
853 if self.version & 0xFFFF != REVLOGV2:
865 return 0
854 return 0
866 return self.index[rev][9]
855 return self.index[rev][9]
867
856
868 def rawsize(self, rev):
857 def rawsize(self, rev):
869 """return the length of the uncompressed text for a given revision"""
858 """return the length of the uncompressed text for a given revision"""
870 l = self.index[rev][2]
859 l = self.index[rev][2]
871 if l >= 0:
860 if l >= 0:
872 return l
861 return l
873
862
874 t = self.rawdata(rev)
863 t = self.rawdata(rev)
875 return len(t)
864 return len(t)
876
865
877 def size(self, rev):
866 def size(self, rev):
878 """length of non-raw text (processed by a "read" flag processor)"""
867 """length of non-raw text (processed by a "read" flag processor)"""
879 # fast path: if no "read" flag processor could change the content,
868 # fast path: if no "read" flag processor could change the content,
880 # size is rawsize. note: ELLIPSIS is known to not change the content.
869 # size is rawsize. note: ELLIPSIS is known to not change the content.
881 flags = self.flags(rev)
870 flags = self.flags(rev)
882 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
871 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
883 return self.rawsize(rev)
872 return self.rawsize(rev)
884
873
885 return len(self.revision(rev, raw=False))
874 return len(self.revision(rev, raw=False))
886
875
887 def chainbase(self, rev):
876 def chainbase(self, rev):
888 base = self._chainbasecache.get(rev)
877 base = self._chainbasecache.get(rev)
889 if base is not None:
878 if base is not None:
890 return base
879 return base
891
880
892 index = self.index
881 index = self.index
893 iterrev = rev
882 iterrev = rev
894 base = index[iterrev][3]
883 base = index[iterrev][3]
895 while base != iterrev:
884 while base != iterrev:
896 iterrev = base
885 iterrev = base
897 base = index[iterrev][3]
886 base = index[iterrev][3]
898
887
899 self._chainbasecache[rev] = base
888 self._chainbasecache[rev] = base
900 return base
889 return base
901
890
902 def linkrev(self, rev):
891 def linkrev(self, rev):
903 return self.index[rev][4]
892 return self.index[rev][4]
904
893
905 def parentrevs(self, rev):
894 def parentrevs(self, rev):
906 try:
895 try:
907 entry = self.index[rev]
896 entry = self.index[rev]
908 except IndexError:
897 except IndexError:
909 if rev == wdirrev:
898 if rev == wdirrev:
910 raise error.WdirUnsupported
899 raise error.WdirUnsupported
911 raise
900 raise
912 if entry[5] == nullrev:
901 if entry[5] == nullrev:
913 return entry[6], entry[5]
902 return entry[6], entry[5]
914 else:
903 else:
915 return entry[5], entry[6]
904 return entry[5], entry[6]
916
905
917 # fast parentrevs(rev) where rev isn't filtered
906 # fast parentrevs(rev) where rev isn't filtered
918 _uncheckedparentrevs = parentrevs
907 _uncheckedparentrevs = parentrevs
919
908
920 def node(self, rev):
909 def node(self, rev):
921 try:
910 try:
922 return self.index[rev][7]
911 return self.index[rev][7]
923 except IndexError:
912 except IndexError:
924 if rev == wdirrev:
913 if rev == wdirrev:
925 raise error.WdirUnsupported
914 raise error.WdirUnsupported
926 raise
915 raise
927
916
928 # Derived from index values.
917 # Derived from index values.
929
918
930 def end(self, rev):
919 def end(self, rev):
931 return self.start(rev) + self.length(rev)
920 return self.start(rev) + self.length(rev)
932
921
933 def parents(self, node):
922 def parents(self, node):
934 i = self.index
923 i = self.index
935 d = i[self.rev(node)]
924 d = i[self.rev(node)]
936 # inline node() to avoid function call overhead
925 # inline node() to avoid function call overhead
937 if d[5] == nullid:
926 if d[5] == nullid:
938 return i[d[6]][7], i[d[5]][7]
927 return i[d[6]][7], i[d[5]][7]
939 else:
928 else:
940 return i[d[5]][7], i[d[6]][7]
929 return i[d[5]][7], i[d[6]][7]
941
930
942 def chainlen(self, rev):
931 def chainlen(self, rev):
943 return self._chaininfo(rev)[0]
932 return self._chaininfo(rev)[0]
944
933
945 def _chaininfo(self, rev):
934 def _chaininfo(self, rev):
946 chaininfocache = self._chaininfocache
935 chaininfocache = self._chaininfocache
947 if rev in chaininfocache:
936 if rev in chaininfocache:
948 return chaininfocache[rev]
937 return chaininfocache[rev]
949 index = self.index
938 index = self.index
950 generaldelta = self._generaldelta
939 generaldelta = self._generaldelta
951 iterrev = rev
940 iterrev = rev
952 e = index[iterrev]
941 e = index[iterrev]
953 clen = 0
942 clen = 0
954 compresseddeltalen = 0
943 compresseddeltalen = 0
955 while iterrev != e[3]:
944 while iterrev != e[3]:
956 clen += 1
945 clen += 1
957 compresseddeltalen += e[1]
946 compresseddeltalen += e[1]
958 if generaldelta:
947 if generaldelta:
959 iterrev = e[3]
948 iterrev = e[3]
960 else:
949 else:
961 iterrev -= 1
950 iterrev -= 1
962 if iterrev in chaininfocache:
951 if iterrev in chaininfocache:
963 t = chaininfocache[iterrev]
952 t = chaininfocache[iterrev]
964 clen += t[0]
953 clen += t[0]
965 compresseddeltalen += t[1]
954 compresseddeltalen += t[1]
966 break
955 break
967 e = index[iterrev]
956 e = index[iterrev]
968 else:
957 else:
969 # Add text length of base since decompressing that also takes
958 # Add text length of base since decompressing that also takes
970 # work. For cache hits the length is already included.
959 # work. For cache hits the length is already included.
971 compresseddeltalen += e[1]
960 compresseddeltalen += e[1]
972 r = (clen, compresseddeltalen)
961 r = (clen, compresseddeltalen)
973 chaininfocache[rev] = r
962 chaininfocache[rev] = r
974 return r
963 return r
975
964
976 def _deltachain(self, rev, stoprev=None):
965 def _deltachain(self, rev, stoprev=None):
977 """Obtain the delta chain for a revision.
966 """Obtain the delta chain for a revision.
978
967
979 ``stoprev`` specifies a revision to stop at. If not specified, we
968 ``stoprev`` specifies a revision to stop at. If not specified, we
980 stop at the base of the chain.
969 stop at the base of the chain.
981
970
982 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
971 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
983 revs in ascending order and ``stopped`` is a bool indicating whether
972 revs in ascending order and ``stopped`` is a bool indicating whether
984 ``stoprev`` was hit.
973 ``stoprev`` was hit.
985 """
974 """
986 # Try C implementation.
975 # Try C implementation.
987 try:
976 try:
988 return self.index.deltachain(rev, stoprev, self._generaldelta)
977 return self.index.deltachain(rev, stoprev, self._generaldelta)
989 except AttributeError:
978 except AttributeError:
990 pass
979 pass
991
980
992 chain = []
981 chain = []
993
982
994 # Alias to prevent attribute lookup in tight loop.
983 # Alias to prevent attribute lookup in tight loop.
995 index = self.index
984 index = self.index
996 generaldelta = self._generaldelta
985 generaldelta = self._generaldelta
997
986
998 iterrev = rev
987 iterrev = rev
999 e = index[iterrev]
988 e = index[iterrev]
1000 while iterrev != e[3] and iterrev != stoprev:
989 while iterrev != e[3] and iterrev != stoprev:
1001 chain.append(iterrev)
990 chain.append(iterrev)
1002 if generaldelta:
991 if generaldelta:
1003 iterrev = e[3]
992 iterrev = e[3]
1004 else:
993 else:
1005 iterrev -= 1
994 iterrev -= 1
1006 e = index[iterrev]
995 e = index[iterrev]
1007
996
1008 if iterrev == stoprev:
997 if iterrev == stoprev:
1009 stopped = True
998 stopped = True
1010 else:
999 else:
1011 chain.append(iterrev)
1000 chain.append(iterrev)
1012 stopped = False
1001 stopped = False
1013
1002
1014 chain.reverse()
1003 chain.reverse()
1015 return chain, stopped
1004 return chain, stopped
1016
1005
1017 def ancestors(self, revs, stoprev=0, inclusive=False):
1006 def ancestors(self, revs, stoprev=0, inclusive=False):
1018 """Generate the ancestors of 'revs' in reverse revision order.
1007 """Generate the ancestors of 'revs' in reverse revision order.
1019 Does not generate revs lower than stoprev.
1008 Does not generate revs lower than stoprev.
1020
1009
1021 See the documentation for ancestor.lazyancestors for more details."""
1010 See the documentation for ancestor.lazyancestors for more details."""
1022
1011
1023 # first, make sure start revisions aren't filtered
1012 # first, make sure start revisions aren't filtered
1024 revs = list(revs)
1013 revs = list(revs)
1025 checkrev = self.node
1014 checkrev = self.node
1026 for r in revs:
1015 for r in revs:
1027 checkrev(r)
1016 checkrev(r)
1028 # and we're sure ancestors aren't filtered as well
1017 # and we're sure ancestors aren't filtered as well
1029
1018
1030 if rustancestor is not None:
1019 if rustancestor is not None:
1031 lazyancestors = rustancestor.LazyAncestors
1020 lazyancestors = rustancestor.LazyAncestors
1032 arg = self.index
1021 arg = self.index
1033 else:
1022 else:
1034 lazyancestors = ancestor.lazyancestors
1023 lazyancestors = ancestor.lazyancestors
1035 arg = self._uncheckedparentrevs
1024 arg = self._uncheckedparentrevs
1036 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1025 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1037
1026
1038 def descendants(self, revs):
1027 def descendants(self, revs):
1039 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1028 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1040
1029
1041 def findcommonmissing(self, common=None, heads=None):
1030 def findcommonmissing(self, common=None, heads=None):
1042 """Return a tuple of the ancestors of common and the ancestors of heads
1031 """Return a tuple of the ancestors of common and the ancestors of heads
1043 that are not ancestors of common. In revset terminology, we return the
1032 that are not ancestors of common. In revset terminology, we return the
1044 tuple:
1033 tuple:
1045
1034
1046 ::common, (::heads) - (::common)
1035 ::common, (::heads) - (::common)
1047
1036
1048 The list is sorted by revision number, meaning it is
1037 The list is sorted by revision number, meaning it is
1049 topologically sorted.
1038 topologically sorted.
1050
1039
1051 'heads' and 'common' are both lists of node IDs. If heads is
1040 'heads' and 'common' are both lists of node IDs. If heads is
1052 not supplied, uses all of the revlog's heads. If common is not
1041 not supplied, uses all of the revlog's heads. If common is not
1053 supplied, uses nullid."""
1042 supplied, uses nullid."""
1054 if common is None:
1043 if common is None:
1055 common = [nullid]
1044 common = [nullid]
1056 if heads is None:
1045 if heads is None:
1057 heads = self.heads()
1046 heads = self.heads()
1058
1047
1059 common = [self.rev(n) for n in common]
1048 common = [self.rev(n) for n in common]
1060 heads = [self.rev(n) for n in heads]
1049 heads = [self.rev(n) for n in heads]
1061
1050
1062 # we want the ancestors, but inclusive
1051 # we want the ancestors, but inclusive
1063 class lazyset(object):
1052 class lazyset(object):
1064 def __init__(self, lazyvalues):
1053 def __init__(self, lazyvalues):
1065 self.addedvalues = set()
1054 self.addedvalues = set()
1066 self.lazyvalues = lazyvalues
1055 self.lazyvalues = lazyvalues
1067
1056
1068 def __contains__(self, value):
1057 def __contains__(self, value):
1069 return value in self.addedvalues or value in self.lazyvalues
1058 return value in self.addedvalues or value in self.lazyvalues
1070
1059
1071 def __iter__(self):
1060 def __iter__(self):
1072 added = self.addedvalues
1061 added = self.addedvalues
1073 for r in added:
1062 for r in added:
1074 yield r
1063 yield r
1075 for r in self.lazyvalues:
1064 for r in self.lazyvalues:
1076 if not r in added:
1065 if not r in added:
1077 yield r
1066 yield r
1078
1067
1079 def add(self, value):
1068 def add(self, value):
1080 self.addedvalues.add(value)
1069 self.addedvalues.add(value)
1081
1070
1082 def update(self, values):
1071 def update(self, values):
1083 self.addedvalues.update(values)
1072 self.addedvalues.update(values)
1084
1073
1085 has = lazyset(self.ancestors(common))
1074 has = lazyset(self.ancestors(common))
1086 has.add(nullrev)
1075 has.add(nullrev)
1087 has.update(common)
1076 has.update(common)
1088
1077
1089 # take all ancestors from heads that aren't in has
1078 # take all ancestors from heads that aren't in has
1090 missing = set()
1079 missing = set()
1091 visit = collections.deque(r for r in heads if r not in has)
1080 visit = collections.deque(r for r in heads if r not in has)
1092 while visit:
1081 while visit:
1093 r = visit.popleft()
1082 r = visit.popleft()
1094 if r in missing:
1083 if r in missing:
1095 continue
1084 continue
1096 else:
1085 else:
1097 missing.add(r)
1086 missing.add(r)
1098 for p in self.parentrevs(r):
1087 for p in self.parentrevs(r):
1099 if p not in has:
1088 if p not in has:
1100 visit.append(p)
1089 visit.append(p)
1101 missing = list(missing)
1090 missing = list(missing)
1102 missing.sort()
1091 missing.sort()
1103 return has, [self.node(miss) for miss in missing]
1092 return has, [self.node(miss) for miss in missing]
1104
1093
1105 def incrementalmissingrevs(self, common=None):
1094 def incrementalmissingrevs(self, common=None):
1106 """Return an object that can be used to incrementally compute the
1095 """Return an object that can be used to incrementally compute the
1107 revision numbers of the ancestors of arbitrary sets that are not
1096 revision numbers of the ancestors of arbitrary sets that are not
1108 ancestors of common. This is an ancestor.incrementalmissingancestors
1097 ancestors of common. This is an ancestor.incrementalmissingancestors
1109 object.
1098 object.
1110
1099
1111 'common' is a list of revision numbers. If common is not supplied, uses
1100 'common' is a list of revision numbers. If common is not supplied, uses
1112 nullrev.
1101 nullrev.
1113 """
1102 """
1114 if common is None:
1103 if common is None:
1115 common = [nullrev]
1104 common = [nullrev]
1116
1105
1117 if rustancestor is not None:
1106 if rustancestor is not None:
1118 return rustancestor.MissingAncestors(self.index, common)
1107 return rustancestor.MissingAncestors(self.index, common)
1119 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1108 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1120
1109
1121 def findmissingrevs(self, common=None, heads=None):
1110 def findmissingrevs(self, common=None, heads=None):
1122 """Return the revision numbers of the ancestors of heads that
1111 """Return the revision numbers of the ancestors of heads that
1123 are not ancestors of common.
1112 are not ancestors of common.
1124
1113
1125 More specifically, return a list of revision numbers corresponding to
1114 More specifically, return a list of revision numbers corresponding to
1126 nodes N such that every N satisfies the following constraints:
1115 nodes N such that every N satisfies the following constraints:
1127
1116
1128 1. N is an ancestor of some node in 'heads'
1117 1. N is an ancestor of some node in 'heads'
1129 2. N is not an ancestor of any node in 'common'
1118 2. N is not an ancestor of any node in 'common'
1130
1119
1131 The list is sorted by revision number, meaning it is
1120 The list is sorted by revision number, meaning it is
1132 topologically sorted.
1121 topologically sorted.
1133
1122
1134 'heads' and 'common' are both lists of revision numbers. If heads is
1123 'heads' and 'common' are both lists of revision numbers. If heads is
1135 not supplied, uses all of the revlog's heads. If common is not
1124 not supplied, uses all of the revlog's heads. If common is not
1136 supplied, uses nullid."""
1125 supplied, uses nullid."""
1137 if common is None:
1126 if common is None:
1138 common = [nullrev]
1127 common = [nullrev]
1139 if heads is None:
1128 if heads is None:
1140 heads = self.headrevs()
1129 heads = self.headrevs()
1141
1130
1142 inc = self.incrementalmissingrevs(common=common)
1131 inc = self.incrementalmissingrevs(common=common)
1143 return inc.missingancestors(heads)
1132 return inc.missingancestors(heads)
1144
1133
1145 def findmissing(self, common=None, heads=None):
1134 def findmissing(self, common=None, heads=None):
1146 """Return the ancestors of heads that are not ancestors of common.
1135 """Return the ancestors of heads that are not ancestors of common.
1147
1136
1148 More specifically, return a list of nodes N such that every N
1137 More specifically, return a list of nodes N such that every N
1149 satisfies the following constraints:
1138 satisfies the following constraints:
1150
1139
1151 1. N is an ancestor of some node in 'heads'
1140 1. N is an ancestor of some node in 'heads'
1152 2. N is not an ancestor of any node in 'common'
1141 2. N is not an ancestor of any node in 'common'
1153
1142
1154 The list is sorted by revision number, meaning it is
1143 The list is sorted by revision number, meaning it is
1155 topologically sorted.
1144 topologically sorted.
1156
1145
1157 'heads' and 'common' are both lists of node IDs. If heads is
1146 'heads' and 'common' are both lists of node IDs. If heads is
1158 not supplied, uses all of the revlog's heads. If common is not
1147 not supplied, uses all of the revlog's heads. If common is not
1159 supplied, uses nullid."""
1148 supplied, uses nullid."""
1160 if common is None:
1149 if common is None:
1161 common = [nullid]
1150 common = [nullid]
1162 if heads is None:
1151 if heads is None:
1163 heads = self.heads()
1152 heads = self.heads()
1164
1153
1165 common = [self.rev(n) for n in common]
1154 common = [self.rev(n) for n in common]
1166 heads = [self.rev(n) for n in heads]
1155 heads = [self.rev(n) for n in heads]
1167
1156
1168 inc = self.incrementalmissingrevs(common=common)
1157 inc = self.incrementalmissingrevs(common=common)
1169 return [self.node(r) for r in inc.missingancestors(heads)]
1158 return [self.node(r) for r in inc.missingancestors(heads)]
1170
1159
1171 def nodesbetween(self, roots=None, heads=None):
1160 def nodesbetween(self, roots=None, heads=None):
1172 """Return a topological path from 'roots' to 'heads'.
1161 """Return a topological path from 'roots' to 'heads'.
1173
1162
1174 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1163 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1175 topologically sorted list of all nodes N that satisfy both of
1164 topologically sorted list of all nodes N that satisfy both of
1176 these constraints:
1165 these constraints:
1177
1166
1178 1. N is a descendant of some node in 'roots'
1167 1. N is a descendant of some node in 'roots'
1179 2. N is an ancestor of some node in 'heads'
1168 2. N is an ancestor of some node in 'heads'
1180
1169
1181 Every node is considered to be both a descendant and an ancestor
1170 Every node is considered to be both a descendant and an ancestor
1182 of itself, so every reachable node in 'roots' and 'heads' will be
1171 of itself, so every reachable node in 'roots' and 'heads' will be
1183 included in 'nodes'.
1172 included in 'nodes'.
1184
1173
1185 'outroots' is the list of reachable nodes in 'roots', i.e., the
1174 'outroots' is the list of reachable nodes in 'roots', i.e., the
1186 subset of 'roots' that is returned in 'nodes'. Likewise,
1175 subset of 'roots' that is returned in 'nodes'. Likewise,
1187 'outheads' is the subset of 'heads' that is also in 'nodes'.
1176 'outheads' is the subset of 'heads' that is also in 'nodes'.
1188
1177
1189 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1178 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1190 unspecified, uses nullid as the only root. If 'heads' is
1179 unspecified, uses nullid as the only root. If 'heads' is
1191 unspecified, uses list of all of the revlog's heads."""
1180 unspecified, uses list of all of the revlog's heads."""
1192 nonodes = ([], [], [])
1181 nonodes = ([], [], [])
1193 if roots is not None:
1182 if roots is not None:
1194 roots = list(roots)
1183 roots = list(roots)
1195 if not roots:
1184 if not roots:
1196 return nonodes
1185 return nonodes
1197 lowestrev = min([self.rev(n) for n in roots])
1186 lowestrev = min([self.rev(n) for n in roots])
1198 else:
1187 else:
1199 roots = [nullid] # Everybody's a descendant of nullid
1188 roots = [nullid] # Everybody's a descendant of nullid
1200 lowestrev = nullrev
1189 lowestrev = nullrev
1201 if (lowestrev == nullrev) and (heads is None):
1190 if (lowestrev == nullrev) and (heads is None):
1202 # We want _all_ the nodes!
1191 # We want _all_ the nodes!
1203 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1192 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1204 if heads is None:
1193 if heads is None:
1205 # All nodes are ancestors, so the latest ancestor is the last
1194 # All nodes are ancestors, so the latest ancestor is the last
1206 # node.
1195 # node.
1207 highestrev = len(self) - 1
1196 highestrev = len(self) - 1
1208 # Set ancestors to None to signal that every node is an ancestor.
1197 # Set ancestors to None to signal that every node is an ancestor.
1209 ancestors = None
1198 ancestors = None
1210 # Set heads to an empty dictionary for later discovery of heads
1199 # Set heads to an empty dictionary for later discovery of heads
1211 heads = {}
1200 heads = {}
1212 else:
1201 else:
1213 heads = list(heads)
1202 heads = list(heads)
1214 if not heads:
1203 if not heads:
1215 return nonodes
1204 return nonodes
1216 ancestors = set()
1205 ancestors = set()
1217 # Turn heads into a dictionary so we can remove 'fake' heads.
1206 # Turn heads into a dictionary so we can remove 'fake' heads.
1218 # Also, later we will be using it to filter out the heads we can't
1207 # Also, later we will be using it to filter out the heads we can't
1219 # find from roots.
1208 # find from roots.
1220 heads = dict.fromkeys(heads, False)
1209 heads = dict.fromkeys(heads, False)
1221 # Start at the top and keep marking parents until we're done.
1210 # Start at the top and keep marking parents until we're done.
1222 nodestotag = set(heads)
1211 nodestotag = set(heads)
1223 # Remember where the top was so we can use it as a limit later.
1212 # Remember where the top was so we can use it as a limit later.
1224 highestrev = max([self.rev(n) for n in nodestotag])
1213 highestrev = max([self.rev(n) for n in nodestotag])
1225 while nodestotag:
1214 while nodestotag:
1226 # grab a node to tag
1215 # grab a node to tag
1227 n = nodestotag.pop()
1216 n = nodestotag.pop()
1228 # Never tag nullid
1217 # Never tag nullid
1229 if n == nullid:
1218 if n == nullid:
1230 continue
1219 continue
1231 # A node's revision number represents its place in a
1220 # A node's revision number represents its place in a
1232 # topologically sorted list of nodes.
1221 # topologically sorted list of nodes.
1233 r = self.rev(n)
1222 r = self.rev(n)
1234 if r >= lowestrev:
1223 if r >= lowestrev:
1235 if n not in ancestors:
1224 if n not in ancestors:
1236 # If we are possibly a descendant of one of the roots
1225 # If we are possibly a descendant of one of the roots
1237 # and we haven't already been marked as an ancestor
1226 # and we haven't already been marked as an ancestor
1238 ancestors.add(n) # Mark as ancestor
1227 ancestors.add(n) # Mark as ancestor
1239 # Add non-nullid parents to list of nodes to tag.
1228 # Add non-nullid parents to list of nodes to tag.
1240 nodestotag.update(
1229 nodestotag.update(
1241 [p for p in self.parents(n) if p != nullid]
1230 [p for p in self.parents(n) if p != nullid]
1242 )
1231 )
1243 elif n in heads: # We've seen it before, is it a fake head?
1232 elif n in heads: # We've seen it before, is it a fake head?
1244 # So it is, real heads should not be the ancestors of
1233 # So it is, real heads should not be the ancestors of
1245 # any other heads.
1234 # any other heads.
1246 heads.pop(n)
1235 heads.pop(n)
1247 if not ancestors:
1236 if not ancestors:
1248 return nonodes
1237 return nonodes
1249 # Now that we have our set of ancestors, we want to remove any
1238 # Now that we have our set of ancestors, we want to remove any
1250 # roots that are not ancestors.
1239 # roots that are not ancestors.
1251
1240
1252 # If one of the roots was nullid, everything is included anyway.
1241 # If one of the roots was nullid, everything is included anyway.
1253 if lowestrev > nullrev:
1242 if lowestrev > nullrev:
1254 # But, since we weren't, let's recompute the lowest rev to not
1243 # But, since we weren't, let's recompute the lowest rev to not
1255 # include roots that aren't ancestors.
1244 # include roots that aren't ancestors.
1256
1245
1257 # Filter out roots that aren't ancestors of heads
1246 # Filter out roots that aren't ancestors of heads
1258 roots = [root for root in roots if root in ancestors]
1247 roots = [root for root in roots if root in ancestors]
1259 # Recompute the lowest revision
1248 # Recompute the lowest revision
1260 if roots:
1249 if roots:
1261 lowestrev = min([self.rev(root) for root in roots])
1250 lowestrev = min([self.rev(root) for root in roots])
1262 else:
1251 else:
1263 # No more roots? Return empty list
1252 # No more roots? Return empty list
1264 return nonodes
1253 return nonodes
1265 else:
1254 else:
1266 # We are descending from nullid, and don't need to care about
1255 # We are descending from nullid, and don't need to care about
1267 # any other roots.
1256 # any other roots.
1268 lowestrev = nullrev
1257 lowestrev = nullrev
1269 roots = [nullid]
1258 roots = [nullid]
1270 # Transform our roots list into a set.
1259 # Transform our roots list into a set.
1271 descendants = set(roots)
1260 descendants = set(roots)
1272 # Also, keep the original roots so we can filter out roots that aren't
1261 # Also, keep the original roots so we can filter out roots that aren't
1273 # 'real' roots (i.e. are descended from other roots).
1262 # 'real' roots (i.e. are descended from other roots).
1274 roots = descendants.copy()
1263 roots = descendants.copy()
1275 # Our topologically sorted list of output nodes.
1264 # Our topologically sorted list of output nodes.
1276 orderedout = []
1265 orderedout = []
1277 # Don't start at nullid since we don't want nullid in our output list,
1266 # Don't start at nullid since we don't want nullid in our output list,
1278 # and if nullid shows up in descendants, empty parents will look like
1267 # and if nullid shows up in descendants, empty parents will look like
1279 # they're descendants.
1268 # they're descendants.
1280 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1269 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1281 n = self.node(r)
1270 n = self.node(r)
1282 isdescendant = False
1271 isdescendant = False
1283 if lowestrev == nullrev: # Everybody is a descendant of nullid
1272 if lowestrev == nullrev: # Everybody is a descendant of nullid
1284 isdescendant = True
1273 isdescendant = True
1285 elif n in descendants:
1274 elif n in descendants:
1286 # n is already a descendant
1275 # n is already a descendant
1287 isdescendant = True
1276 isdescendant = True
1288 # This check only needs to be done here because all the roots
1277 # This check only needs to be done here because all the roots
1289 # will start being marked is descendants before the loop.
1278 # will start being marked is descendants before the loop.
1290 if n in roots:
1279 if n in roots:
1291 # If n was a root, check if it's a 'real' root.
1280 # If n was a root, check if it's a 'real' root.
1292 p = tuple(self.parents(n))
1281 p = tuple(self.parents(n))
1293 # If any of its parents are descendants, it's not a root.
1282 # If any of its parents are descendants, it's not a root.
1294 if (p[0] in descendants) or (p[1] in descendants):
1283 if (p[0] in descendants) or (p[1] in descendants):
1295 roots.remove(n)
1284 roots.remove(n)
1296 else:
1285 else:
1297 p = tuple(self.parents(n))
1286 p = tuple(self.parents(n))
1298 # A node is a descendant if either of its parents are
1287 # A node is a descendant if either of its parents are
1299 # descendants. (We seeded the dependents list with the roots
1288 # descendants. (We seeded the dependents list with the roots
1300 # up there, remember?)
1289 # up there, remember?)
1301 if (p[0] in descendants) or (p[1] in descendants):
1290 if (p[0] in descendants) or (p[1] in descendants):
1302 descendants.add(n)
1291 descendants.add(n)
1303 isdescendant = True
1292 isdescendant = True
1304 if isdescendant and ((ancestors is None) or (n in ancestors)):
1293 if isdescendant and ((ancestors is None) or (n in ancestors)):
1305 # Only include nodes that are both descendants and ancestors.
1294 # Only include nodes that are both descendants and ancestors.
1306 orderedout.append(n)
1295 orderedout.append(n)
1307 if (ancestors is not None) and (n in heads):
1296 if (ancestors is not None) and (n in heads):
1308 # We're trying to figure out which heads are reachable
1297 # We're trying to figure out which heads are reachable
1309 # from roots.
1298 # from roots.
1310 # Mark this head as having been reached
1299 # Mark this head as having been reached
1311 heads[n] = True
1300 heads[n] = True
1312 elif ancestors is None:
1301 elif ancestors is None:
1313 # Otherwise, we're trying to discover the heads.
1302 # Otherwise, we're trying to discover the heads.
1314 # Assume this is a head because if it isn't, the next step
1303 # Assume this is a head because if it isn't, the next step
1315 # will eventually remove it.
1304 # will eventually remove it.
1316 heads[n] = True
1305 heads[n] = True
1317 # But, obviously its parents aren't.
1306 # But, obviously its parents aren't.
1318 for p in self.parents(n):
1307 for p in self.parents(n):
1319 heads.pop(p, None)
1308 heads.pop(p, None)
1320 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1309 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1321 roots = list(roots)
1310 roots = list(roots)
1322 assert orderedout
1311 assert orderedout
1323 assert roots
1312 assert roots
1324 assert heads
1313 assert heads
1325 return (orderedout, roots, heads)
1314 return (orderedout, roots, heads)
1326
1315
1327 def headrevs(self, revs=None):
1316 def headrevs(self, revs=None):
1328 if revs is None:
1317 if revs is None:
1329 try:
1318 try:
1330 return self.index.headrevs()
1319 return self.index.headrevs()
1331 except AttributeError:
1320 except AttributeError:
1332 return self._headrevs()
1321 return self._headrevs()
1333 if rustdagop is not None:
1322 if rustdagop is not None:
1334 return rustdagop.headrevs(self.index, revs)
1323 return rustdagop.headrevs(self.index, revs)
1335 return dagop.headrevs(revs, self._uncheckedparentrevs)
1324 return dagop.headrevs(revs, self._uncheckedparentrevs)
1336
1325
1337 def computephases(self, roots):
1326 def computephases(self, roots):
1338 return self.index.computephasesmapsets(roots)
1327 return self.index.computephasesmapsets(roots)
1339
1328
1340 def _headrevs(self):
1329 def _headrevs(self):
1341 count = len(self)
1330 count = len(self)
1342 if not count:
1331 if not count:
1343 return [nullrev]
1332 return [nullrev]
1344 # we won't iter over filtered rev so nobody is a head at start
1333 # we won't iter over filtered rev so nobody is a head at start
1345 ishead = [0] * (count + 1)
1334 ishead = [0] * (count + 1)
1346 index = self.index
1335 index = self.index
1347 for r in self:
1336 for r in self:
1348 ishead[r] = 1 # I may be an head
1337 ishead[r] = 1 # I may be an head
1349 e = index[r]
1338 e = index[r]
1350 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1339 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1351 return [r for r, val in enumerate(ishead) if val]
1340 return [r for r, val in enumerate(ishead) if val]
1352
1341
1353 def heads(self, start=None, stop=None):
1342 def heads(self, start=None, stop=None):
1354 """return the list of all nodes that have no children
1343 """return the list of all nodes that have no children
1355
1344
1356 if start is specified, only heads that are descendants of
1345 if start is specified, only heads that are descendants of
1357 start will be returned
1346 start will be returned
1358 if stop is specified, it will consider all the revs from stop
1347 if stop is specified, it will consider all the revs from stop
1359 as if they had no children
1348 as if they had no children
1360 """
1349 """
1361 if start is None and stop is None:
1350 if start is None and stop is None:
1362 if not len(self):
1351 if not len(self):
1363 return [nullid]
1352 return [nullid]
1364 return [self.node(r) for r in self.headrevs()]
1353 return [self.node(r) for r in self.headrevs()]
1365
1354
1366 if start is None:
1355 if start is None:
1367 start = nullrev
1356 start = nullrev
1368 else:
1357 else:
1369 start = self.rev(start)
1358 start = self.rev(start)
1370
1359
1371 stoprevs = {self.rev(n) for n in stop or []}
1360 stoprevs = {self.rev(n) for n in stop or []}
1372
1361
1373 revs = dagop.headrevssubset(
1362 revs = dagop.headrevssubset(
1374 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1363 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1375 )
1364 )
1376
1365
1377 return [self.node(rev) for rev in revs]
1366 return [self.node(rev) for rev in revs]
1378
1367
1379 def children(self, node):
1368 def children(self, node):
1380 """find the children of a given node"""
1369 """find the children of a given node"""
1381 c = []
1370 c = []
1382 p = self.rev(node)
1371 p = self.rev(node)
1383 for r in self.revs(start=p + 1):
1372 for r in self.revs(start=p + 1):
1384 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1373 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1385 if prevs:
1374 if prevs:
1386 for pr in prevs:
1375 for pr in prevs:
1387 if pr == p:
1376 if pr == p:
1388 c.append(self.node(r))
1377 c.append(self.node(r))
1389 elif p == nullrev:
1378 elif p == nullrev:
1390 c.append(self.node(r))
1379 c.append(self.node(r))
1391 return c
1380 return c
1392
1381
1393 def commonancestorsheads(self, a, b):
1382 def commonancestorsheads(self, a, b):
1394 """calculate all the heads of the common ancestors of nodes a and b"""
1383 """calculate all the heads of the common ancestors of nodes a and b"""
1395 a, b = self.rev(a), self.rev(b)
1384 a, b = self.rev(a), self.rev(b)
1396 ancs = self._commonancestorsheads(a, b)
1385 ancs = self._commonancestorsheads(a, b)
1397 return pycompat.maplist(self.node, ancs)
1386 return pycompat.maplist(self.node, ancs)
1398
1387
1399 def _commonancestorsheads(self, *revs):
1388 def _commonancestorsheads(self, *revs):
1400 """calculate all the heads of the common ancestors of revs"""
1389 """calculate all the heads of the common ancestors of revs"""
1401 try:
1390 try:
1402 ancs = self.index.commonancestorsheads(*revs)
1391 ancs = self.index.commonancestorsheads(*revs)
1403 except (AttributeError, OverflowError): # C implementation failed
1392 except (AttributeError, OverflowError): # C implementation failed
1404 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1393 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1405 return ancs
1394 return ancs
1406
1395
1407 def isancestor(self, a, b):
1396 def isancestor(self, a, b):
1408 """return True if node a is an ancestor of node b
1397 """return True if node a is an ancestor of node b
1409
1398
1410 A revision is considered an ancestor of itself."""
1399 A revision is considered an ancestor of itself."""
1411 a, b = self.rev(a), self.rev(b)
1400 a, b = self.rev(a), self.rev(b)
1412 return self.isancestorrev(a, b)
1401 return self.isancestorrev(a, b)
1413
1402
1414 def isancestorrev(self, a, b):
1403 def isancestorrev(self, a, b):
1415 """return True if revision a is an ancestor of revision b
1404 """return True if revision a is an ancestor of revision b
1416
1405
1417 A revision is considered an ancestor of itself.
1406 A revision is considered an ancestor of itself.
1418
1407
1419 The implementation of this is trivial but the use of
1408 The implementation of this is trivial but the use of
1420 reachableroots is not."""
1409 reachableroots is not."""
1421 if a == nullrev:
1410 if a == nullrev:
1422 return True
1411 return True
1423 elif a == b:
1412 elif a == b:
1424 return True
1413 return True
1425 elif a > b:
1414 elif a > b:
1426 return False
1415 return False
1427 return bool(self.reachableroots(a, [b], [a], includepath=False))
1416 return bool(self.reachableroots(a, [b], [a], includepath=False))
1428
1417
1429 def reachableroots(self, minroot, heads, roots, includepath=False):
1418 def reachableroots(self, minroot, heads, roots, includepath=False):
1430 """return (heads(::(<roots> and <roots>::<heads>)))
1419 """return (heads(::(<roots> and <roots>::<heads>)))
1431
1420
1432 If includepath is True, return (<roots>::<heads>)."""
1421 If includepath is True, return (<roots>::<heads>)."""
1433 try:
1422 try:
1434 return self.index.reachableroots2(
1423 return self.index.reachableroots2(
1435 minroot, heads, roots, includepath
1424 minroot, heads, roots, includepath
1436 )
1425 )
1437 except AttributeError:
1426 except AttributeError:
1438 return dagop._reachablerootspure(
1427 return dagop._reachablerootspure(
1439 self.parentrevs, minroot, roots, heads, includepath
1428 self.parentrevs, minroot, roots, heads, includepath
1440 )
1429 )
1441
1430
1442 def ancestor(self, a, b):
1431 def ancestor(self, a, b):
1443 """calculate the "best" common ancestor of nodes a and b"""
1432 """calculate the "best" common ancestor of nodes a and b"""
1444
1433
1445 a, b = self.rev(a), self.rev(b)
1434 a, b = self.rev(a), self.rev(b)
1446 try:
1435 try:
1447 ancs = self.index.ancestors(a, b)
1436 ancs = self.index.ancestors(a, b)
1448 except (AttributeError, OverflowError):
1437 except (AttributeError, OverflowError):
1449 ancs = ancestor.ancestors(self.parentrevs, a, b)
1438 ancs = ancestor.ancestors(self.parentrevs, a, b)
1450 if ancs:
1439 if ancs:
1451 # choose a consistent winner when there's a tie
1440 # choose a consistent winner when there's a tie
1452 return min(map(self.node, ancs))
1441 return min(map(self.node, ancs))
1453 return nullid
1442 return nullid
1454
1443
1455 def _match(self, id):
1444 def _match(self, id):
1456 if isinstance(id, int):
1445 if isinstance(id, int):
1457 # rev
1446 # rev
1458 return self.node(id)
1447 return self.node(id)
1459 if len(id) == 20:
1448 if len(id) == 20:
1460 # possibly a binary node
1449 # possibly a binary node
1461 # odds of a binary node being all hex in ASCII are 1 in 10**25
1450 # odds of a binary node being all hex in ASCII are 1 in 10**25
1462 try:
1451 try:
1463 node = id
1452 node = id
1464 self.rev(node) # quick search the index
1453 self.rev(node) # quick search the index
1465 return node
1454 return node
1466 except error.LookupError:
1455 except error.LookupError:
1467 pass # may be partial hex id
1456 pass # may be partial hex id
1468 try:
1457 try:
1469 # str(rev)
1458 # str(rev)
1470 rev = int(id)
1459 rev = int(id)
1471 if b"%d" % rev != id:
1460 if b"%d" % rev != id:
1472 raise ValueError
1461 raise ValueError
1473 if rev < 0:
1462 if rev < 0:
1474 rev = len(self) + rev
1463 rev = len(self) + rev
1475 if rev < 0 or rev >= len(self):
1464 if rev < 0 or rev >= len(self):
1476 raise ValueError
1465 raise ValueError
1477 return self.node(rev)
1466 return self.node(rev)
1478 except (ValueError, OverflowError):
1467 except (ValueError, OverflowError):
1479 pass
1468 pass
1480 if len(id) == 40:
1469 if len(id) == 40:
1481 try:
1470 try:
1482 # a full hex nodeid?
1471 # a full hex nodeid?
1483 node = bin(id)
1472 node = bin(id)
1484 self.rev(node)
1473 self.rev(node)
1485 return node
1474 return node
1486 except (TypeError, error.LookupError):
1475 except (TypeError, error.LookupError):
1487 pass
1476 pass
1488
1477
1489 def _partialmatch(self, id):
1478 def _partialmatch(self, id):
1490 # we don't care wdirfilenodeids as they should be always full hash
1479 # we don't care wdirfilenodeids as they should be always full hash
1491 maybewdir = wdirhex.startswith(id)
1480 maybewdir = wdirhex.startswith(id)
1492 try:
1481 try:
1493 partial = self.index.partialmatch(id)
1482 partial = self.index.partialmatch(id)
1494 if partial and self.hasnode(partial):
1483 if partial and self.hasnode(partial):
1495 if maybewdir:
1484 if maybewdir:
1496 # single 'ff...' match in radix tree, ambiguous with wdir
1485 # single 'ff...' match in radix tree, ambiguous with wdir
1497 raise error.RevlogError
1486 raise error.RevlogError
1498 return partial
1487 return partial
1499 if maybewdir:
1488 if maybewdir:
1500 # no 'ff...' match in radix tree, wdir identified
1489 # no 'ff...' match in radix tree, wdir identified
1501 raise error.WdirUnsupported
1490 raise error.WdirUnsupported
1502 return None
1491 return None
1503 except error.RevlogError:
1492 except error.RevlogError:
1504 # parsers.c radix tree lookup gave multiple matches
1493 # parsers.c radix tree lookup gave multiple matches
1505 # fast path: for unfiltered changelog, radix tree is accurate
1494 # fast path: for unfiltered changelog, radix tree is accurate
1506 if not getattr(self, 'filteredrevs', None):
1495 if not getattr(self, 'filteredrevs', None):
1507 raise error.AmbiguousPrefixLookupError(
1496 raise error.AmbiguousPrefixLookupError(
1508 id, self.indexfile, _(b'ambiguous identifier')
1497 id, self.indexfile, _(b'ambiguous identifier')
1509 )
1498 )
1510 # fall through to slow path that filters hidden revisions
1499 # fall through to slow path that filters hidden revisions
1511 except (AttributeError, ValueError):
1500 except (AttributeError, ValueError):
1512 # we are pure python, or key was too short to search radix tree
1501 # we are pure python, or key was too short to search radix tree
1513 pass
1502 pass
1514
1503
1515 if id in self._pcache:
1504 if id in self._pcache:
1516 return self._pcache[id]
1505 return self._pcache[id]
1517
1506
1518 if len(id) <= 40:
1507 if len(id) <= 40:
1519 try:
1508 try:
1520 # hex(node)[:...]
1509 # hex(node)[:...]
1521 l = len(id) // 2 # grab an even number of digits
1510 l = len(id) // 2 # grab an even number of digits
1522 prefix = bin(id[: l * 2])
1511 prefix = bin(id[: l * 2])
1523 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1512 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1524 nl = [
1513 nl = [
1525 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1514 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1526 ]
1515 ]
1527 if nullhex.startswith(id):
1516 if nullhex.startswith(id):
1528 nl.append(nullid)
1517 nl.append(nullid)
1529 if len(nl) > 0:
1518 if len(nl) > 0:
1530 if len(nl) == 1 and not maybewdir:
1519 if len(nl) == 1 and not maybewdir:
1531 self._pcache[id] = nl[0]
1520 self._pcache[id] = nl[0]
1532 return nl[0]
1521 return nl[0]
1533 raise error.AmbiguousPrefixLookupError(
1522 raise error.AmbiguousPrefixLookupError(
1534 id, self.indexfile, _(b'ambiguous identifier')
1523 id, self.indexfile, _(b'ambiguous identifier')
1535 )
1524 )
1536 if maybewdir:
1525 if maybewdir:
1537 raise error.WdirUnsupported
1526 raise error.WdirUnsupported
1538 return None
1527 return None
1539 except TypeError:
1528 except TypeError:
1540 pass
1529 pass
1541
1530
1542 def lookup(self, id):
1531 def lookup(self, id):
1543 """locate a node based on:
1532 """locate a node based on:
1544 - revision number or str(revision number)
1533 - revision number or str(revision number)
1545 - nodeid or subset of hex nodeid
1534 - nodeid or subset of hex nodeid
1546 """
1535 """
1547 n = self._match(id)
1536 n = self._match(id)
1548 if n is not None:
1537 if n is not None:
1549 return n
1538 return n
1550 n = self._partialmatch(id)
1539 n = self._partialmatch(id)
1551 if n:
1540 if n:
1552 return n
1541 return n
1553
1542
1554 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1543 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1555
1544
1556 def shortest(self, node, minlength=1):
1545 def shortest(self, node, minlength=1):
1557 """Find the shortest unambiguous prefix that matches node."""
1546 """Find the shortest unambiguous prefix that matches node."""
1558
1547
1559 def isvalid(prefix):
1548 def isvalid(prefix):
1560 try:
1549 try:
1561 matchednode = self._partialmatch(prefix)
1550 matchednode = self._partialmatch(prefix)
1562 except error.AmbiguousPrefixLookupError:
1551 except error.AmbiguousPrefixLookupError:
1563 return False
1552 return False
1564 except error.WdirUnsupported:
1553 except error.WdirUnsupported:
1565 # single 'ff...' match
1554 # single 'ff...' match
1566 return True
1555 return True
1567 if matchednode is None:
1556 if matchednode is None:
1568 raise error.LookupError(node, self.indexfile, _(b'no node'))
1557 raise error.LookupError(node, self.indexfile, _(b'no node'))
1569 return True
1558 return True
1570
1559
1571 def maybewdir(prefix):
1560 def maybewdir(prefix):
1572 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1561 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1573
1562
1574 hexnode = hex(node)
1563 hexnode = hex(node)
1575
1564
1576 def disambiguate(hexnode, minlength):
1565 def disambiguate(hexnode, minlength):
1577 """Disambiguate against wdirid."""
1566 """Disambiguate against wdirid."""
1578 for length in range(minlength, len(hexnode) + 1):
1567 for length in range(minlength, len(hexnode) + 1):
1579 prefix = hexnode[:length]
1568 prefix = hexnode[:length]
1580 if not maybewdir(prefix):
1569 if not maybewdir(prefix):
1581 return prefix
1570 return prefix
1582
1571
1583 if not getattr(self, 'filteredrevs', None):
1572 if not getattr(self, 'filteredrevs', None):
1584 try:
1573 try:
1585 length = max(self.index.shortest(node), minlength)
1574 length = max(self.index.shortest(node), minlength)
1586 return disambiguate(hexnode, length)
1575 return disambiguate(hexnode, length)
1587 except error.RevlogError:
1576 except error.RevlogError:
1588 if node != wdirid:
1577 if node != wdirid:
1589 raise error.LookupError(node, self.indexfile, _(b'no node'))
1578 raise error.LookupError(node, self.indexfile, _(b'no node'))
1590 except AttributeError:
1579 except AttributeError:
1591 # Fall through to pure code
1580 # Fall through to pure code
1592 pass
1581 pass
1593
1582
1594 if node == wdirid:
1583 if node == wdirid:
1595 for length in range(minlength, len(hexnode) + 1):
1584 for length in range(minlength, len(hexnode) + 1):
1596 prefix = hexnode[:length]
1585 prefix = hexnode[:length]
1597 if isvalid(prefix):
1586 if isvalid(prefix):
1598 return prefix
1587 return prefix
1599
1588
1600 for length in range(minlength, len(hexnode) + 1):
1589 for length in range(minlength, len(hexnode) + 1):
1601 prefix = hexnode[:length]
1590 prefix = hexnode[:length]
1602 if isvalid(prefix):
1591 if isvalid(prefix):
1603 return disambiguate(hexnode, length)
1592 return disambiguate(hexnode, length)
1604
1593
1605 def cmp(self, node, text):
1594 def cmp(self, node, text):
1606 """compare text with a given file revision
1595 """compare text with a given file revision
1607
1596
1608 returns True if text is different than what is stored.
1597 returns True if text is different than what is stored.
1609 """
1598 """
1610 p1, p2 = self.parents(node)
1599 p1, p2 = self.parents(node)
1611 return storageutil.hashrevisionsha1(text, p1, p2) != node
1600 return storageutil.hashrevisionsha1(text, p1, p2) != node
1612
1601
1613 def _cachesegment(self, offset, data):
1602 def _cachesegment(self, offset, data):
1614 """Add a segment to the revlog cache.
1603 """Add a segment to the revlog cache.
1615
1604
1616 Accepts an absolute offset and the data that is at that location.
1605 Accepts an absolute offset and the data that is at that location.
1617 """
1606 """
1618 o, d = self._chunkcache
1607 o, d = self._chunkcache
1619 # try to add to existing cache
1608 # try to add to existing cache
1620 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1609 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1621 self._chunkcache = o, d + data
1610 self._chunkcache = o, d + data
1622 else:
1611 else:
1623 self._chunkcache = offset, data
1612 self._chunkcache = offset, data
1624
1613
1625 def _readsegment(self, offset, length, df=None):
1614 def _readsegment(self, offset, length, df=None):
1626 """Load a segment of raw data from the revlog.
1615 """Load a segment of raw data from the revlog.
1627
1616
1628 Accepts an absolute offset, length to read, and an optional existing
1617 Accepts an absolute offset, length to read, and an optional existing
1629 file handle to read from.
1618 file handle to read from.
1630
1619
1631 If an existing file handle is passed, it will be seeked and the
1620 If an existing file handle is passed, it will be seeked and the
1632 original seek position will NOT be restored.
1621 original seek position will NOT be restored.
1633
1622
1634 Returns a str or buffer of raw byte data.
1623 Returns a str or buffer of raw byte data.
1635
1624
1636 Raises if the requested number of bytes could not be read.
1625 Raises if the requested number of bytes could not be read.
1637 """
1626 """
1638 # Cache data both forward and backward around the requested
1627 # Cache data both forward and backward around the requested
1639 # data, in a fixed size window. This helps speed up operations
1628 # data, in a fixed size window. This helps speed up operations
1640 # involving reading the revlog backwards.
1629 # involving reading the revlog backwards.
1641 cachesize = self._chunkcachesize
1630 cachesize = self._chunkcachesize
1642 realoffset = offset & ~(cachesize - 1)
1631 realoffset = offset & ~(cachesize - 1)
1643 reallength = (
1632 reallength = (
1644 (offset + length + cachesize) & ~(cachesize - 1)
1633 (offset + length + cachesize) & ~(cachesize - 1)
1645 ) - realoffset
1634 ) - realoffset
1646 with self._datareadfp(df) as df:
1635 with self._datareadfp(df) as df:
1647 df.seek(realoffset)
1636 df.seek(realoffset)
1648 d = df.read(reallength)
1637 d = df.read(reallength)
1649
1638
1650 self._cachesegment(realoffset, d)
1639 self._cachesegment(realoffset, d)
1651 if offset != realoffset or reallength != length:
1640 if offset != realoffset or reallength != length:
1652 startoffset = offset - realoffset
1641 startoffset = offset - realoffset
1653 if len(d) - startoffset < length:
1642 if len(d) - startoffset < length:
1654 raise error.RevlogError(
1643 raise error.RevlogError(
1655 _(
1644 _(
1656 b'partial read of revlog %s; expected %d bytes from '
1645 b'partial read of revlog %s; expected %d bytes from '
1657 b'offset %d, got %d'
1646 b'offset %d, got %d'
1658 )
1647 )
1659 % (
1648 % (
1660 self.indexfile if self._inline else self.datafile,
1649 self.indexfile if self._inline else self.datafile,
1661 length,
1650 length,
1662 realoffset,
1651 realoffset,
1663 len(d) - startoffset,
1652 len(d) - startoffset,
1664 )
1653 )
1665 )
1654 )
1666
1655
1667 return util.buffer(d, startoffset, length)
1656 return util.buffer(d, startoffset, length)
1668
1657
1669 if len(d) < length:
1658 if len(d) < length:
1670 raise error.RevlogError(
1659 raise error.RevlogError(
1671 _(
1660 _(
1672 b'partial read of revlog %s; expected %d bytes from offset '
1661 b'partial read of revlog %s; expected %d bytes from offset '
1673 b'%d, got %d'
1662 b'%d, got %d'
1674 )
1663 )
1675 % (
1664 % (
1676 self.indexfile if self._inline else self.datafile,
1665 self.indexfile if self._inline else self.datafile,
1677 length,
1666 length,
1678 offset,
1667 offset,
1679 len(d),
1668 len(d),
1680 )
1669 )
1681 )
1670 )
1682
1671
1683 return d
1672 return d
1684
1673
1685 def _getsegment(self, offset, length, df=None):
1674 def _getsegment(self, offset, length, df=None):
1686 """Obtain a segment of raw data from the revlog.
1675 """Obtain a segment of raw data from the revlog.
1687
1676
1688 Accepts an absolute offset, length of bytes to obtain, and an
1677 Accepts an absolute offset, length of bytes to obtain, and an
1689 optional file handle to the already-opened revlog. If the file
1678 optional file handle to the already-opened revlog. If the file
1690 handle is used, it's original seek position will not be preserved.
1679 handle is used, it's original seek position will not be preserved.
1691
1680
1692 Requests for data may be returned from a cache.
1681 Requests for data may be returned from a cache.
1693
1682
1694 Returns a str or a buffer instance of raw byte data.
1683 Returns a str or a buffer instance of raw byte data.
1695 """
1684 """
1696 o, d = self._chunkcache
1685 o, d = self._chunkcache
1697 l = len(d)
1686 l = len(d)
1698
1687
1699 # is it in the cache?
1688 # is it in the cache?
1700 cachestart = offset - o
1689 cachestart = offset - o
1701 cacheend = cachestart + length
1690 cacheend = cachestart + length
1702 if cachestart >= 0 and cacheend <= l:
1691 if cachestart >= 0 and cacheend <= l:
1703 if cachestart == 0 and cacheend == l:
1692 if cachestart == 0 and cacheend == l:
1704 return d # avoid a copy
1693 return d # avoid a copy
1705 return util.buffer(d, cachestart, cacheend - cachestart)
1694 return util.buffer(d, cachestart, cacheend - cachestart)
1706
1695
1707 return self._readsegment(offset, length, df=df)
1696 return self._readsegment(offset, length, df=df)
1708
1697
1709 def _getsegmentforrevs(self, startrev, endrev, df=None):
1698 def _getsegmentforrevs(self, startrev, endrev, df=None):
1710 """Obtain a segment of raw data corresponding to a range of revisions.
1699 """Obtain a segment of raw data corresponding to a range of revisions.
1711
1700
1712 Accepts the start and end revisions and an optional already-open
1701 Accepts the start and end revisions and an optional already-open
1713 file handle to be used for reading. If the file handle is read, its
1702 file handle to be used for reading. If the file handle is read, its
1714 seek position will not be preserved.
1703 seek position will not be preserved.
1715
1704
1716 Requests for data may be satisfied by a cache.
1705 Requests for data may be satisfied by a cache.
1717
1706
1718 Returns a 2-tuple of (offset, data) for the requested range of
1707 Returns a 2-tuple of (offset, data) for the requested range of
1719 revisions. Offset is the integer offset from the beginning of the
1708 revisions. Offset is the integer offset from the beginning of the
1720 revlog and data is a str or buffer of the raw byte data.
1709 revlog and data is a str or buffer of the raw byte data.
1721
1710
1722 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1711 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1723 to determine where each revision's data begins and ends.
1712 to determine where each revision's data begins and ends.
1724 """
1713 """
1725 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1714 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1726 # (functions are expensive).
1715 # (functions are expensive).
1727 index = self.index
1716 index = self.index
1728 istart = index[startrev]
1717 istart = index[startrev]
1729 start = int(istart[0] >> 16)
1718 start = int(istart[0] >> 16)
1730 if startrev == endrev:
1719 if startrev == endrev:
1731 end = start + istart[1]
1720 end = start + istart[1]
1732 else:
1721 else:
1733 iend = index[endrev]
1722 iend = index[endrev]
1734 end = int(iend[0] >> 16) + iend[1]
1723 end = int(iend[0] >> 16) + iend[1]
1735
1724
1736 if self._inline:
1725 if self._inline:
1737 start += (startrev + 1) * self._io.size
1726 start += (startrev + 1) * self._io.size
1738 end += (endrev + 1) * self._io.size
1727 end += (endrev + 1) * self._io.size
1739 length = end - start
1728 length = end - start
1740
1729
1741 return start, self._getsegment(start, length, df=df)
1730 return start, self._getsegment(start, length, df=df)
1742
1731
1743 def _chunk(self, rev, df=None):
1732 def _chunk(self, rev, df=None):
1744 """Obtain a single decompressed chunk for a revision.
1733 """Obtain a single decompressed chunk for a revision.
1745
1734
1746 Accepts an integer revision and an optional already-open file handle
1735 Accepts an integer revision and an optional already-open file handle
1747 to be used for reading. If used, the seek position of the file will not
1736 to be used for reading. If used, the seek position of the file will not
1748 be preserved.
1737 be preserved.
1749
1738
1750 Returns a str holding uncompressed data for the requested revision.
1739 Returns a str holding uncompressed data for the requested revision.
1751 """
1740 """
1752 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1741 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1753
1742
1754 def _chunks(self, revs, df=None, targetsize=None):
1743 def _chunks(self, revs, df=None, targetsize=None):
1755 """Obtain decompressed chunks for the specified revisions.
1744 """Obtain decompressed chunks for the specified revisions.
1756
1745
1757 Accepts an iterable of numeric revisions that are assumed to be in
1746 Accepts an iterable of numeric revisions that are assumed to be in
1758 ascending order. Also accepts an optional already-open file handle
1747 ascending order. Also accepts an optional already-open file handle
1759 to be used for reading. If used, the seek position of the file will
1748 to be used for reading. If used, the seek position of the file will
1760 not be preserved.
1749 not be preserved.
1761
1750
1762 This function is similar to calling ``self._chunk()`` multiple times,
1751 This function is similar to calling ``self._chunk()`` multiple times,
1763 but is faster.
1752 but is faster.
1764
1753
1765 Returns a list with decompressed data for each requested revision.
1754 Returns a list with decompressed data for each requested revision.
1766 """
1755 """
1767 if not revs:
1756 if not revs:
1768 return []
1757 return []
1769 start = self.start
1758 start = self.start
1770 length = self.length
1759 length = self.length
1771 inline = self._inline
1760 inline = self._inline
1772 iosize = self._io.size
1761 iosize = self._io.size
1773 buffer = util.buffer
1762 buffer = util.buffer
1774
1763
1775 l = []
1764 l = []
1776 ladd = l.append
1765 ladd = l.append
1777
1766
1778 if not self._withsparseread:
1767 if not self._withsparseread:
1779 slicedchunks = (revs,)
1768 slicedchunks = (revs,)
1780 else:
1769 else:
1781 slicedchunks = deltautil.slicechunk(
1770 slicedchunks = deltautil.slicechunk(
1782 self, revs, targetsize=targetsize
1771 self, revs, targetsize=targetsize
1783 )
1772 )
1784
1773
1785 for revschunk in slicedchunks:
1774 for revschunk in slicedchunks:
1786 firstrev = revschunk[0]
1775 firstrev = revschunk[0]
1787 # Skip trailing revisions with empty diff
1776 # Skip trailing revisions with empty diff
1788 for lastrev in revschunk[::-1]:
1777 for lastrev in revschunk[::-1]:
1789 if length(lastrev) != 0:
1778 if length(lastrev) != 0:
1790 break
1779 break
1791
1780
1792 try:
1781 try:
1793 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1782 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1794 except OverflowError:
1783 except OverflowError:
1795 # issue4215 - we can't cache a run of chunks greater than
1784 # issue4215 - we can't cache a run of chunks greater than
1796 # 2G on Windows
1785 # 2G on Windows
1797 return [self._chunk(rev, df=df) for rev in revschunk]
1786 return [self._chunk(rev, df=df) for rev in revschunk]
1798
1787
1799 decomp = self.decompress
1788 decomp = self.decompress
1800 for rev in revschunk:
1789 for rev in revschunk:
1801 chunkstart = start(rev)
1790 chunkstart = start(rev)
1802 if inline:
1791 if inline:
1803 chunkstart += (rev + 1) * iosize
1792 chunkstart += (rev + 1) * iosize
1804 chunklength = length(rev)
1793 chunklength = length(rev)
1805 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1794 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1806
1795
1807 return l
1796 return l
1808
1797
1809 def _chunkclear(self):
1798 def _chunkclear(self):
1810 """Clear the raw chunk cache."""
1799 """Clear the raw chunk cache."""
1811 self._chunkcache = (0, b'')
1800 self._chunkcache = (0, b'')
1812
1801
1813 def deltaparent(self, rev):
1802 def deltaparent(self, rev):
1814 """return deltaparent of the given revision"""
1803 """return deltaparent of the given revision"""
1815 base = self.index[rev][3]
1804 base = self.index[rev][3]
1816 if base == rev:
1805 if base == rev:
1817 return nullrev
1806 return nullrev
1818 elif self._generaldelta:
1807 elif self._generaldelta:
1819 return base
1808 return base
1820 else:
1809 else:
1821 return rev - 1
1810 return rev - 1
1822
1811
1823 def issnapshot(self, rev):
1812 def issnapshot(self, rev):
1824 """tells whether rev is a snapshot"""
1813 """tells whether rev is a snapshot"""
1825 if not self._sparserevlog:
1814 if not self._sparserevlog:
1826 return self.deltaparent(rev) == nullrev
1815 return self.deltaparent(rev) == nullrev
1827 elif util.safehasattr(self.index, b'issnapshot'):
1816 elif util.safehasattr(self.index, b'issnapshot'):
1828 # directly assign the method to cache the testing and access
1817 # directly assign the method to cache the testing and access
1829 self.issnapshot = self.index.issnapshot
1818 self.issnapshot = self.index.issnapshot
1830 return self.issnapshot(rev)
1819 return self.issnapshot(rev)
1831 if rev == nullrev:
1820 if rev == nullrev:
1832 return True
1821 return True
1833 entry = self.index[rev]
1822 entry = self.index[rev]
1834 base = entry[3]
1823 base = entry[3]
1835 if base == rev:
1824 if base == rev:
1836 return True
1825 return True
1837 if base == nullrev:
1826 if base == nullrev:
1838 return True
1827 return True
1839 p1 = entry[5]
1828 p1 = entry[5]
1840 p2 = entry[6]
1829 p2 = entry[6]
1841 if base == p1 or base == p2:
1830 if base == p1 or base == p2:
1842 return False
1831 return False
1843 return self.issnapshot(base)
1832 return self.issnapshot(base)
1844
1833
1845 def snapshotdepth(self, rev):
1834 def snapshotdepth(self, rev):
1846 """number of snapshot in the chain before this one"""
1835 """number of snapshot in the chain before this one"""
1847 if not self.issnapshot(rev):
1836 if not self.issnapshot(rev):
1848 raise error.ProgrammingError(b'revision %d not a snapshot')
1837 raise error.ProgrammingError(b'revision %d not a snapshot')
1849 return len(self._deltachain(rev)[0]) - 1
1838 return len(self._deltachain(rev)[0]) - 1
1850
1839
1851 def revdiff(self, rev1, rev2):
1840 def revdiff(self, rev1, rev2):
1852 """return or calculate a delta between two revisions
1841 """return or calculate a delta between two revisions
1853
1842
1854 The delta calculated is in binary form and is intended to be written to
1843 The delta calculated is in binary form and is intended to be written to
1855 revlog data directly. So this function needs raw revision data.
1844 revlog data directly. So this function needs raw revision data.
1856 """
1845 """
1857 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1846 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1858 return bytes(self._chunk(rev2))
1847 return bytes(self._chunk(rev2))
1859
1848
1860 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1849 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1861
1850
1862 def _processflags(self, text, flags, operation, raw=False):
1851 def _processflags(self, text, flags, operation, raw=False):
1863 """deprecated entry point to access flag processors"""
1852 """deprecated entry point to access flag processors"""
1864 msg = b'_processflag(...) use the specialized variant'
1853 msg = b'_processflag(...) use the specialized variant'
1865 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1854 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1866 if raw:
1855 if raw:
1867 return text, flagutil.processflagsraw(self, text, flags)
1856 return text, flagutil.processflagsraw(self, text, flags)
1868 elif operation == b'read':
1857 elif operation == b'read':
1869 return flagutil.processflagsread(self, text, flags)
1858 return flagutil.processflagsread(self, text, flags)
1870 else: # write operation
1859 else: # write operation
1871 return flagutil.processflagswrite(self, text, flags)
1860 return flagutil.processflagswrite(self, text, flags)
1872
1861
1873 def revision(self, nodeorrev, _df=None, raw=False):
1862 def revision(self, nodeorrev, _df=None, raw=False):
1874 """return an uncompressed revision of a given node or revision
1863 """return an uncompressed revision of a given node or revision
1875 number.
1864 number.
1876
1865
1877 _df - an existing file handle to read from. (internal-only)
1866 _df - an existing file handle to read from. (internal-only)
1878 raw - an optional argument specifying if the revision data is to be
1867 raw - an optional argument specifying if the revision data is to be
1879 treated as raw data when applying flag transforms. 'raw' should be set
1868 treated as raw data when applying flag transforms. 'raw' should be set
1880 to True when generating changegroups or in debug commands.
1869 to True when generating changegroups or in debug commands.
1881 """
1870 """
1882 if raw:
1871 if raw:
1883 msg = (
1872 msg = (
1884 b'revlog.revision(..., raw=True) is deprecated, '
1873 b'revlog.revision(..., raw=True) is deprecated, '
1885 b'use revlog.rawdata(...)'
1874 b'use revlog.rawdata(...)'
1886 )
1875 )
1887 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1876 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1888 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1877 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1889
1878
1890 def sidedata(self, nodeorrev, _df=None):
1879 def sidedata(self, nodeorrev, _df=None):
1891 """a map of extra data related to the changeset but not part of the hash
1880 """a map of extra data related to the changeset but not part of the hash
1892
1881
1893 This function currently return a dictionary. However, more advanced
1882 This function currently return a dictionary. However, more advanced
1894 mapping object will likely be used in the future for a more
1883 mapping object will likely be used in the future for a more
1895 efficient/lazy code.
1884 efficient/lazy code.
1896 """
1885 """
1897 return self._revisiondata(nodeorrev, _df)[1]
1886 return self._revisiondata(nodeorrev, _df)[1]
1898
1887
1899 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1888 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1900 # deal with <nodeorrev> argument type
1889 # deal with <nodeorrev> argument type
1901 if isinstance(nodeorrev, int):
1890 if isinstance(nodeorrev, int):
1902 rev = nodeorrev
1891 rev = nodeorrev
1903 node = self.node(rev)
1892 node = self.node(rev)
1904 else:
1893 else:
1905 node = nodeorrev
1894 node = nodeorrev
1906 rev = None
1895 rev = None
1907
1896
1908 # fast path the special `nullid` rev
1897 # fast path the special `nullid` rev
1909 if node == nullid:
1898 if node == nullid:
1910 return b"", {}
1899 return b"", {}
1911
1900
1912 # ``rawtext`` is the text as stored inside the revlog. Might be the
1901 # ``rawtext`` is the text as stored inside the revlog. Might be the
1913 # revision or might need to be processed to retrieve the revision.
1902 # revision or might need to be processed to retrieve the revision.
1914 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1903 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1915
1904
1916 if self.version & 0xFFFF == REVLOGV2:
1905 if self.version & 0xFFFF == REVLOGV2:
1917 if rev is None:
1906 if rev is None:
1918 rev = self.rev(node)
1907 rev = self.rev(node)
1919 sidedata = self._sidedata(rev)
1908 sidedata = self._sidedata(rev)
1920 else:
1909 else:
1921 sidedata = {}
1910 sidedata = {}
1922
1911
1923 if raw and validated:
1912 if raw and validated:
1924 # if we don't want to process the raw text and that raw
1913 # if we don't want to process the raw text and that raw
1925 # text is cached, we can exit early.
1914 # text is cached, we can exit early.
1926 return rawtext, sidedata
1915 return rawtext, sidedata
1927 if rev is None:
1916 if rev is None:
1928 rev = self.rev(node)
1917 rev = self.rev(node)
1929 # the revlog's flag for this revision
1918 # the revlog's flag for this revision
1930 # (usually alter its state or content)
1919 # (usually alter its state or content)
1931 flags = self.flags(rev)
1920 flags = self.flags(rev)
1932
1921
1933 if validated and flags == REVIDX_DEFAULT_FLAGS:
1922 if validated and flags == REVIDX_DEFAULT_FLAGS:
1934 # no extra flags set, no flag processor runs, text = rawtext
1923 # no extra flags set, no flag processor runs, text = rawtext
1935 return rawtext, sidedata
1924 return rawtext, sidedata
1936
1925
1937 if raw:
1926 if raw:
1938 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1927 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1939 text = rawtext
1928 text = rawtext
1940 else:
1929 else:
1941 r = flagutil.processflagsread(self, rawtext, flags)
1930 r = flagutil.processflagsread(self, rawtext, flags)
1942 text, validatehash = r
1931 text, validatehash = r
1943 if validatehash:
1932 if validatehash:
1944 self.checkhash(text, node, rev=rev)
1933 self.checkhash(text, node, rev=rev)
1945 if not validated:
1934 if not validated:
1946 self._revisioncache = (node, rev, rawtext)
1935 self._revisioncache = (node, rev, rawtext)
1947
1936
1948 return text, sidedata
1937 return text, sidedata
1949
1938
1950 def _rawtext(self, node, rev, _df=None):
1939 def _rawtext(self, node, rev, _df=None):
1951 """return the possibly unvalidated rawtext for a revision
1940 """return the possibly unvalidated rawtext for a revision
1952
1941
1953 returns (rev, rawtext, validated)
1942 returns (rev, rawtext, validated)
1954 """
1943 """
1955
1944
1956 # revision in the cache (could be useful to apply delta)
1945 # revision in the cache (could be useful to apply delta)
1957 cachedrev = None
1946 cachedrev = None
1958 # An intermediate text to apply deltas to
1947 # An intermediate text to apply deltas to
1959 basetext = None
1948 basetext = None
1960
1949
1961 # Check if we have the entry in cache
1950 # Check if we have the entry in cache
1962 # The cache entry looks like (node, rev, rawtext)
1951 # The cache entry looks like (node, rev, rawtext)
1963 if self._revisioncache:
1952 if self._revisioncache:
1964 if self._revisioncache[0] == node:
1953 if self._revisioncache[0] == node:
1965 return (rev, self._revisioncache[2], True)
1954 return (rev, self._revisioncache[2], True)
1966 cachedrev = self._revisioncache[1]
1955 cachedrev = self._revisioncache[1]
1967
1956
1968 if rev is None:
1957 if rev is None:
1969 rev = self.rev(node)
1958 rev = self.rev(node)
1970
1959
1971 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1960 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1972 if stopped:
1961 if stopped:
1973 basetext = self._revisioncache[2]
1962 basetext = self._revisioncache[2]
1974
1963
1975 # drop cache to save memory, the caller is expected to
1964 # drop cache to save memory, the caller is expected to
1976 # update self._revisioncache after validating the text
1965 # update self._revisioncache after validating the text
1977 self._revisioncache = None
1966 self._revisioncache = None
1978
1967
1979 targetsize = None
1968 targetsize = None
1980 rawsize = self.index[rev][2]
1969 rawsize = self.index[rev][2]
1981 if 0 <= rawsize:
1970 if 0 <= rawsize:
1982 targetsize = 4 * rawsize
1971 targetsize = 4 * rawsize
1983
1972
1984 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1973 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1985 if basetext is None:
1974 if basetext is None:
1986 basetext = bytes(bins[0])
1975 basetext = bytes(bins[0])
1987 bins = bins[1:]
1976 bins = bins[1:]
1988
1977
1989 rawtext = mdiff.patches(basetext, bins)
1978 rawtext = mdiff.patches(basetext, bins)
1990 del basetext # let us have a chance to free memory early
1979 del basetext # let us have a chance to free memory early
1991 return (rev, rawtext, False)
1980 return (rev, rawtext, False)
1992
1981
1993 def _sidedata(self, rev):
1982 def _sidedata(self, rev):
1994 """Return the sidedata for a given revision number."""
1983 """Return the sidedata for a given revision number."""
1995 index_entry = self.index[rev]
1984 index_entry = self.index[rev]
1996 sidedata_offset = index_entry[8]
1985 sidedata_offset = index_entry[8]
1997 sidedata_size = index_entry[9]
1986 sidedata_size = index_entry[9]
1998
1987
1999 if self._inline:
1988 if self._inline:
2000 sidedata_offset += self._io.size * (1 + rev)
1989 sidedata_offset += self._io.size * (1 + rev)
2001 if sidedata_size == 0:
1990 if sidedata_size == 0:
2002 return {}
1991 return {}
2003
1992
2004 segment = self._getsegment(sidedata_offset, sidedata_size)
1993 segment = self._getsegment(sidedata_offset, sidedata_size)
2005 sidedata = sidedatautil.deserialize_sidedata(segment)
1994 sidedata = sidedatautil.deserialize_sidedata(segment)
2006 return sidedata
1995 return sidedata
2007
1996
2008 def rawdata(self, nodeorrev, _df=None):
1997 def rawdata(self, nodeorrev, _df=None):
2009 """return an uncompressed raw data of a given node or revision number.
1998 """return an uncompressed raw data of a given node or revision number.
2010
1999
2011 _df - an existing file handle to read from. (internal-only)
2000 _df - an existing file handle to read from. (internal-only)
2012 """
2001 """
2013 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2002 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2014
2003
2015 def hash(self, text, p1, p2):
2004 def hash(self, text, p1, p2):
2016 """Compute a node hash.
2005 """Compute a node hash.
2017
2006
2018 Available as a function so that subclasses can replace the hash
2007 Available as a function so that subclasses can replace the hash
2019 as needed.
2008 as needed.
2020 """
2009 """
2021 return storageutil.hashrevisionsha1(text, p1, p2)
2010 return storageutil.hashrevisionsha1(text, p1, p2)
2022
2011
2023 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2012 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2024 """Check node hash integrity.
2013 """Check node hash integrity.
2025
2014
2026 Available as a function so that subclasses can extend hash mismatch
2015 Available as a function so that subclasses can extend hash mismatch
2027 behaviors as needed.
2016 behaviors as needed.
2028 """
2017 """
2029 try:
2018 try:
2030 if p1 is None and p2 is None:
2019 if p1 is None and p2 is None:
2031 p1, p2 = self.parents(node)
2020 p1, p2 = self.parents(node)
2032 if node != self.hash(text, p1, p2):
2021 if node != self.hash(text, p1, p2):
2033 # Clear the revision cache on hash failure. The revision cache
2022 # Clear the revision cache on hash failure. The revision cache
2034 # only stores the raw revision and clearing the cache does have
2023 # only stores the raw revision and clearing the cache does have
2035 # the side-effect that we won't have a cache hit when the raw
2024 # the side-effect that we won't have a cache hit when the raw
2036 # revision data is accessed. But this case should be rare and
2025 # revision data is accessed. But this case should be rare and
2037 # it is extra work to teach the cache about the hash
2026 # it is extra work to teach the cache about the hash
2038 # verification state.
2027 # verification state.
2039 if self._revisioncache and self._revisioncache[0] == node:
2028 if self._revisioncache and self._revisioncache[0] == node:
2040 self._revisioncache = None
2029 self._revisioncache = None
2041
2030
2042 revornode = rev
2031 revornode = rev
2043 if revornode is None:
2032 if revornode is None:
2044 revornode = templatefilters.short(hex(node))
2033 revornode = templatefilters.short(hex(node))
2045 raise error.RevlogError(
2034 raise error.RevlogError(
2046 _(b"integrity check failed on %s:%s")
2035 _(b"integrity check failed on %s:%s")
2047 % (self.indexfile, pycompat.bytestr(revornode))
2036 % (self.indexfile, pycompat.bytestr(revornode))
2048 )
2037 )
2049 except error.RevlogError:
2038 except error.RevlogError:
2050 if self._censorable and storageutil.iscensoredtext(text):
2039 if self._censorable and storageutil.iscensoredtext(text):
2051 raise error.CensoredNodeError(self.indexfile, node, text)
2040 raise error.CensoredNodeError(self.indexfile, node, text)
2052 raise
2041 raise
2053
2042
2054 def _enforceinlinesize(self, tr, fp=None):
2043 def _enforceinlinesize(self, tr, fp=None):
2055 """Check if the revlog is too big for inline and convert if so.
2044 """Check if the revlog is too big for inline and convert if so.
2056
2045
2057 This should be called after revisions are added to the revlog. If the
2046 This should be called after revisions are added to the revlog. If the
2058 revlog has grown too large to be an inline revlog, it will convert it
2047 revlog has grown too large to be an inline revlog, it will convert it
2059 to use multiple index and data files.
2048 to use multiple index and data files.
2060 """
2049 """
2061 tiprev = len(self) - 1
2050 tiprev = len(self) - 1
2062 if (
2051 if (
2063 not self._inline
2052 not self._inline
2064 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2053 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2065 ):
2054 ):
2066 return
2055 return
2067
2056
2068 troffset = tr.findoffset(self.indexfile)
2057 troffset = tr.findoffset(self.indexfile)
2069 if troffset is None:
2058 if troffset is None:
2070 raise error.RevlogError(
2059 raise error.RevlogError(
2071 _(b"%s not found in the transaction") % self.indexfile
2060 _(b"%s not found in the transaction") % self.indexfile
2072 )
2061 )
2073 trindex = 0
2062 trindex = 0
2074 tr.add(self.datafile, 0)
2063 tr.add(self.datafile, 0)
2075
2064
2076 if fp:
2065 if fp:
2077 fp.flush()
2066 fp.flush()
2078 fp.close()
2067 fp.close()
2079 # We can't use the cached file handle after close(). So prevent
2068 # We can't use the cached file handle after close(). So prevent
2080 # its usage.
2069 # its usage.
2081 self._writinghandles = None
2070 self._writinghandles = None
2082
2071
2083 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2072 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2084 for r in self:
2073 for r in self:
2085 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2074 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2086 if troffset <= self.start(r):
2075 if troffset <= self.start(r):
2087 trindex = r
2076 trindex = r
2088
2077
2089 with self._indexfp(b'w') as fp:
2078 with self._indexfp(b'w') as fp:
2090 self.version &= ~FLAG_INLINE_DATA
2079 self.version &= ~FLAG_INLINE_DATA
2091 self._inline = False
2080 self._inline = False
2092 io = self._io
2081 io = self._io
2093 for i in self:
2082 for i in self:
2094 e = io.packentry(self.index[i], self.node, self.version, i)
2083 e = io.packentry(self.index[i], self.node, self.version, i)
2095 fp.write(e)
2084 fp.write(e)
2096
2085
2097 # the temp file replace the real index when we exit the context
2086 # the temp file replace the real index when we exit the context
2098 # manager
2087 # manager
2099
2088
2100 tr.replace(self.indexfile, trindex * self._io.size)
2089 tr.replace(self.indexfile, trindex * self._io.size)
2101 nodemaputil.setup_persistent_nodemap(tr, self)
2090 nodemaputil.setup_persistent_nodemap(tr, self)
2102 self._chunkclear()
2091 self._chunkclear()
2103
2092
2104 def _nodeduplicatecallback(self, transaction, node):
2093 def _nodeduplicatecallback(self, transaction, node):
2105 """called when trying to add a node already stored."""
2094 """called when trying to add a node already stored."""
2106
2095
2107 def addrevision(
2096 def addrevision(
2108 self,
2097 self,
2109 text,
2098 text,
2110 transaction,
2099 transaction,
2111 link,
2100 link,
2112 p1,
2101 p1,
2113 p2,
2102 p2,
2114 cachedelta=None,
2103 cachedelta=None,
2115 node=None,
2104 node=None,
2116 flags=REVIDX_DEFAULT_FLAGS,
2105 flags=REVIDX_DEFAULT_FLAGS,
2117 deltacomputer=None,
2106 deltacomputer=None,
2118 sidedata=None,
2107 sidedata=None,
2119 ):
2108 ):
2120 """add a revision to the log
2109 """add a revision to the log
2121
2110
2122 text - the revision data to add
2111 text - the revision data to add
2123 transaction - the transaction object used for rollback
2112 transaction - the transaction object used for rollback
2124 link - the linkrev data to add
2113 link - the linkrev data to add
2125 p1, p2 - the parent nodeids of the revision
2114 p1, p2 - the parent nodeids of the revision
2126 cachedelta - an optional precomputed delta
2115 cachedelta - an optional precomputed delta
2127 node - nodeid of revision; typically node is not specified, and it is
2116 node - nodeid of revision; typically node is not specified, and it is
2128 computed by default as hash(text, p1, p2), however subclasses might
2117 computed by default as hash(text, p1, p2), however subclasses might
2129 use different hashing method (and override checkhash() in such case)
2118 use different hashing method (and override checkhash() in such case)
2130 flags - the known flags to set on the revision
2119 flags - the known flags to set on the revision
2131 deltacomputer - an optional deltacomputer instance shared between
2120 deltacomputer - an optional deltacomputer instance shared between
2132 multiple calls
2121 multiple calls
2133 """
2122 """
2134 if link == nullrev:
2123 if link == nullrev:
2135 raise error.RevlogError(
2124 raise error.RevlogError(
2136 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2125 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2137 )
2126 )
2138
2127
2139 if sidedata is None:
2128 if sidedata is None:
2140 sidedata = {}
2129 sidedata = {}
2141 elif not self.hassidedata:
2130 elif not self.hassidedata:
2142 raise error.ProgrammingError(
2131 raise error.ProgrammingError(
2143 _(b"trying to add sidedata to a revlog who don't support them")
2132 _(b"trying to add sidedata to a revlog who don't support them")
2144 )
2133 )
2145
2134
2146 if flags:
2135 if flags:
2147 node = node or self.hash(text, p1, p2)
2136 node = node or self.hash(text, p1, p2)
2148
2137
2149 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2138 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2150
2139
2151 # If the flag processor modifies the revision data, ignore any provided
2140 # If the flag processor modifies the revision data, ignore any provided
2152 # cachedelta.
2141 # cachedelta.
2153 if rawtext != text:
2142 if rawtext != text:
2154 cachedelta = None
2143 cachedelta = None
2155
2144
2156 if len(rawtext) > _maxentrysize:
2145 if len(rawtext) > _maxentrysize:
2157 raise error.RevlogError(
2146 raise error.RevlogError(
2158 _(
2147 _(
2159 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2148 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2160 )
2149 )
2161 % (self.indexfile, len(rawtext))
2150 % (self.indexfile, len(rawtext))
2162 )
2151 )
2163
2152
2164 node = node or self.hash(rawtext, p1, p2)
2153 node = node or self.hash(rawtext, p1, p2)
2165 rev = self.index.get_rev(node)
2154 rev = self.index.get_rev(node)
2166 if rev is not None:
2155 if rev is not None:
2167 return rev
2156 return rev
2168
2157
2169 if validatehash:
2158 if validatehash:
2170 self.checkhash(rawtext, node, p1=p1, p2=p2)
2159 self.checkhash(rawtext, node, p1=p1, p2=p2)
2171
2160
2172 return self.addrawrevision(
2161 return self.addrawrevision(
2173 rawtext,
2162 rawtext,
2174 transaction,
2163 transaction,
2175 link,
2164 link,
2176 p1,
2165 p1,
2177 p2,
2166 p2,
2178 node,
2167 node,
2179 flags,
2168 flags,
2180 cachedelta=cachedelta,
2169 cachedelta=cachedelta,
2181 deltacomputer=deltacomputer,
2170 deltacomputer=deltacomputer,
2182 sidedata=sidedata,
2171 sidedata=sidedata,
2183 )
2172 )
2184
2173
2185 def addrawrevision(
2174 def addrawrevision(
2186 self,
2175 self,
2187 rawtext,
2176 rawtext,
2188 transaction,
2177 transaction,
2189 link,
2178 link,
2190 p1,
2179 p1,
2191 p2,
2180 p2,
2192 node,
2181 node,
2193 flags,
2182 flags,
2194 cachedelta=None,
2183 cachedelta=None,
2195 deltacomputer=None,
2184 deltacomputer=None,
2196 sidedata=None,
2185 sidedata=None,
2197 ):
2186 ):
2198 """add a raw revision with known flags, node and parents
2187 """add a raw revision with known flags, node and parents
2199 useful when reusing a revision not stored in this revlog (ex: received
2188 useful when reusing a revision not stored in this revlog (ex: received
2200 over wire, or read from an external bundle).
2189 over wire, or read from an external bundle).
2201 """
2190 """
2202 dfh = None
2191 dfh = None
2203 if not self._inline:
2192 if not self._inline:
2204 dfh = self._datafp(b"a+")
2193 dfh = self._datafp(b"a+")
2205 ifh = self._indexfp(b"a+")
2194 ifh = self._indexfp(b"a+")
2206 try:
2195 try:
2207 return self._addrevision(
2196 return self._addrevision(
2208 node,
2197 node,
2209 rawtext,
2198 rawtext,
2210 transaction,
2199 transaction,
2211 link,
2200 link,
2212 p1,
2201 p1,
2213 p2,
2202 p2,
2214 flags,
2203 flags,
2215 cachedelta,
2204 cachedelta,
2216 ifh,
2205 ifh,
2217 dfh,
2206 dfh,
2218 deltacomputer=deltacomputer,
2207 deltacomputer=deltacomputer,
2219 sidedata=sidedata,
2208 sidedata=sidedata,
2220 )
2209 )
2221 finally:
2210 finally:
2222 if dfh:
2211 if dfh:
2223 dfh.close()
2212 dfh.close()
2224 ifh.close()
2213 ifh.close()
2225
2214
2226 def compress(self, data):
2215 def compress(self, data):
2227 """Generate a possibly-compressed representation of data."""
2216 """Generate a possibly-compressed representation of data."""
2228 if not data:
2217 if not data:
2229 return b'', data
2218 return b'', data
2230
2219
2231 compressed = self._compressor.compress(data)
2220 compressed = self._compressor.compress(data)
2232
2221
2233 if compressed:
2222 if compressed:
2234 # The revlog compressor added the header in the returned data.
2223 # The revlog compressor added the header in the returned data.
2235 return b'', compressed
2224 return b'', compressed
2236
2225
2237 if data[0:1] == b'\0':
2226 if data[0:1] == b'\0':
2238 return b'', data
2227 return b'', data
2239 return b'u', data
2228 return b'u', data
2240
2229
2241 def decompress(self, data):
2230 def decompress(self, data):
2242 """Decompress a revlog chunk.
2231 """Decompress a revlog chunk.
2243
2232
2244 The chunk is expected to begin with a header identifying the
2233 The chunk is expected to begin with a header identifying the
2245 format type so it can be routed to an appropriate decompressor.
2234 format type so it can be routed to an appropriate decompressor.
2246 """
2235 """
2247 if not data:
2236 if not data:
2248 return data
2237 return data
2249
2238
2250 # Revlogs are read much more frequently than they are written and many
2239 # Revlogs are read much more frequently than they are written and many
2251 # chunks only take microseconds to decompress, so performance is
2240 # chunks only take microseconds to decompress, so performance is
2252 # important here.
2241 # important here.
2253 #
2242 #
2254 # We can make a few assumptions about revlogs:
2243 # We can make a few assumptions about revlogs:
2255 #
2244 #
2256 # 1) the majority of chunks will be compressed (as opposed to inline
2245 # 1) the majority of chunks will be compressed (as opposed to inline
2257 # raw data).
2246 # raw data).
2258 # 2) decompressing *any* data will likely by at least 10x slower than
2247 # 2) decompressing *any* data will likely by at least 10x slower than
2259 # returning raw inline data.
2248 # returning raw inline data.
2260 # 3) we want to prioritize common and officially supported compression
2249 # 3) we want to prioritize common and officially supported compression
2261 # engines
2250 # engines
2262 #
2251 #
2263 # It follows that we want to optimize for "decompress compressed data
2252 # It follows that we want to optimize for "decompress compressed data
2264 # when encoded with common and officially supported compression engines"
2253 # when encoded with common and officially supported compression engines"
2265 # case over "raw data" and "data encoded by less common or non-official
2254 # case over "raw data" and "data encoded by less common or non-official
2266 # compression engines." That is why we have the inline lookup first
2255 # compression engines." That is why we have the inline lookup first
2267 # followed by the compengines lookup.
2256 # followed by the compengines lookup.
2268 #
2257 #
2269 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2258 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2270 # compressed chunks. And this matters for changelog and manifest reads.
2259 # compressed chunks. And this matters for changelog and manifest reads.
2271 t = data[0:1]
2260 t = data[0:1]
2272
2261
2273 if t == b'x':
2262 if t == b'x':
2274 try:
2263 try:
2275 return _zlibdecompress(data)
2264 return _zlibdecompress(data)
2276 except zlib.error as e:
2265 except zlib.error as e:
2277 raise error.RevlogError(
2266 raise error.RevlogError(
2278 _(b'revlog decompress error: %s')
2267 _(b'revlog decompress error: %s')
2279 % stringutil.forcebytestr(e)
2268 % stringutil.forcebytestr(e)
2280 )
2269 )
2281 # '\0' is more common than 'u' so it goes first.
2270 # '\0' is more common than 'u' so it goes first.
2282 elif t == b'\0':
2271 elif t == b'\0':
2283 return data
2272 return data
2284 elif t == b'u':
2273 elif t == b'u':
2285 return util.buffer(data, 1)
2274 return util.buffer(data, 1)
2286
2275
2287 try:
2276 try:
2288 compressor = self._decompressors[t]
2277 compressor = self._decompressors[t]
2289 except KeyError:
2278 except KeyError:
2290 try:
2279 try:
2291 engine = util.compengines.forrevlogheader(t)
2280 engine = util.compengines.forrevlogheader(t)
2292 compressor = engine.revlogcompressor(self._compengineopts)
2281 compressor = engine.revlogcompressor(self._compengineopts)
2293 self._decompressors[t] = compressor
2282 self._decompressors[t] = compressor
2294 except KeyError:
2283 except KeyError:
2295 raise error.RevlogError(
2284 raise error.RevlogError(
2296 _(b'unknown compression type %s') % binascii.hexlify(t)
2285 _(b'unknown compression type %s') % binascii.hexlify(t)
2297 )
2286 )
2298
2287
2299 return compressor.decompress(data)
2288 return compressor.decompress(data)
2300
2289
2301 def _addrevision(
2290 def _addrevision(
2302 self,
2291 self,
2303 node,
2292 node,
2304 rawtext,
2293 rawtext,
2305 transaction,
2294 transaction,
2306 link,
2295 link,
2307 p1,
2296 p1,
2308 p2,
2297 p2,
2309 flags,
2298 flags,
2310 cachedelta,
2299 cachedelta,
2311 ifh,
2300 ifh,
2312 dfh,
2301 dfh,
2313 alwayscache=False,
2302 alwayscache=False,
2314 deltacomputer=None,
2303 deltacomputer=None,
2315 sidedata=None,
2304 sidedata=None,
2316 ):
2305 ):
2317 """internal function to add revisions to the log
2306 """internal function to add revisions to the log
2318
2307
2319 see addrevision for argument descriptions.
2308 see addrevision for argument descriptions.
2320
2309
2321 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2310 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2322
2311
2323 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2312 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2324 be used.
2313 be used.
2325
2314
2326 invariants:
2315 invariants:
2327 - rawtext is optional (can be None); if not set, cachedelta must be set.
2316 - rawtext is optional (can be None); if not set, cachedelta must be set.
2328 if both are set, they must correspond to each other.
2317 if both are set, they must correspond to each other.
2329 """
2318 """
2330 if node == nullid:
2319 if node == nullid:
2331 raise error.RevlogError(
2320 raise error.RevlogError(
2332 _(b"%s: attempt to add null revision") % self.indexfile
2321 _(b"%s: attempt to add null revision") % self.indexfile
2333 )
2322 )
2334 if node == wdirid or node in wdirfilenodeids:
2323 if node == wdirid or node in wdirfilenodeids:
2335 raise error.RevlogError(
2324 raise error.RevlogError(
2336 _(b"%s: attempt to add wdir revision") % self.indexfile
2325 _(b"%s: attempt to add wdir revision") % self.indexfile
2337 )
2326 )
2338
2327
2339 if self._inline:
2328 if self._inline:
2340 fh = ifh
2329 fh = ifh
2341 else:
2330 else:
2342 fh = dfh
2331 fh = dfh
2343
2332
2344 btext = [rawtext]
2333 btext = [rawtext]
2345
2334
2346 curr = len(self)
2335 curr = len(self)
2347 prev = curr - 1
2336 prev = curr - 1
2348
2337
2349 offset = self._get_data_offset(prev)
2338 offset = self._get_data_offset(prev)
2350
2339
2351 if self._concurrencychecker:
2340 if self._concurrencychecker:
2352 if self._inline:
2341 if self._inline:
2353 # offset is "as if" it were in the .d file, so we need to add on
2342 # offset is "as if" it were in the .d file, so we need to add on
2354 # the size of the entry metadata.
2343 # the size of the entry metadata.
2355 self._concurrencychecker(
2344 self._concurrencychecker(
2356 ifh, self.indexfile, offset + curr * self._io.size
2345 ifh, self.indexfile, offset + curr * self._io.size
2357 )
2346 )
2358 else:
2347 else:
2359 # Entries in the .i are a consistent size.
2348 # Entries in the .i are a consistent size.
2360 self._concurrencychecker(
2349 self._concurrencychecker(
2361 ifh, self.indexfile, curr * self._io.size
2350 ifh, self.indexfile, curr * self._io.size
2362 )
2351 )
2363 self._concurrencychecker(dfh, self.datafile, offset)
2352 self._concurrencychecker(dfh, self.datafile, offset)
2364
2353
2365 p1r, p2r = self.rev(p1), self.rev(p2)
2354 p1r, p2r = self.rev(p1), self.rev(p2)
2366
2355
2367 # full versions are inserted when the needed deltas
2356 # full versions are inserted when the needed deltas
2368 # become comparable to the uncompressed text
2357 # become comparable to the uncompressed text
2369 if rawtext is None:
2358 if rawtext is None:
2370 # need rawtext size, before changed by flag processors, which is
2359 # need rawtext size, before changed by flag processors, which is
2371 # the non-raw size. use revlog explicitly to avoid filelog's extra
2360 # the non-raw size. use revlog explicitly to avoid filelog's extra
2372 # logic that might remove metadata size.
2361 # logic that might remove metadata size.
2373 textlen = mdiff.patchedsize(
2362 textlen = mdiff.patchedsize(
2374 revlog.size(self, cachedelta[0]), cachedelta[1]
2363 revlog.size(self, cachedelta[0]), cachedelta[1]
2375 )
2364 )
2376 else:
2365 else:
2377 textlen = len(rawtext)
2366 textlen = len(rawtext)
2378
2367
2379 if deltacomputer is None:
2368 if deltacomputer is None:
2380 deltacomputer = deltautil.deltacomputer(self)
2369 deltacomputer = deltautil.deltacomputer(self)
2381
2370
2382 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2371 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2383
2372
2384 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2373 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2385
2374
2386 if sidedata:
2375 if sidedata:
2387 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2376 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2388 sidedata_offset = offset + deltainfo.deltalen
2377 sidedata_offset = offset + deltainfo.deltalen
2389 else:
2378 else:
2390 serialized_sidedata = b""
2379 serialized_sidedata = b""
2391 # Don't store the offset if the sidedata is empty, that way
2380 # Don't store the offset if the sidedata is empty, that way
2392 # we can easily detect empty sidedata and they will be no different
2381 # we can easily detect empty sidedata and they will be no different
2393 # than ones we manually add.
2382 # than ones we manually add.
2394 sidedata_offset = 0
2383 sidedata_offset = 0
2395
2384
2396 e = (
2385 e = (
2397 offset_type(offset, flags),
2386 offset_type(offset, flags),
2398 deltainfo.deltalen,
2387 deltainfo.deltalen,
2399 textlen,
2388 textlen,
2400 deltainfo.base,
2389 deltainfo.base,
2401 link,
2390 link,
2402 p1r,
2391 p1r,
2403 p2r,
2392 p2r,
2404 node,
2393 node,
2405 sidedata_offset,
2394 sidedata_offset,
2406 len(serialized_sidedata),
2395 len(serialized_sidedata),
2407 )
2396 )
2408
2397
2409 if self.version & 0xFFFF != REVLOGV2:
2398 if self.version & 0xFFFF != REVLOGV2:
2410 e = e[:8]
2399 e = e[:8]
2411
2400
2412 self.index.append(e)
2401 self.index.append(e)
2413 entry = self._io.packentry(e, self.node, self.version, curr)
2402 entry = self._io.packentry(e, self.node, self.version, curr)
2414 self._writeentry(
2403 self._writeentry(
2415 transaction,
2404 transaction,
2416 ifh,
2405 ifh,
2417 dfh,
2406 dfh,
2418 entry,
2407 entry,
2419 deltainfo.data,
2408 deltainfo.data,
2420 link,
2409 link,
2421 offset,
2410 offset,
2422 serialized_sidedata,
2411 serialized_sidedata,
2423 )
2412 )
2424
2413
2425 rawtext = btext[0]
2414 rawtext = btext[0]
2426
2415
2427 if alwayscache and rawtext is None:
2416 if alwayscache and rawtext is None:
2428 rawtext = deltacomputer.buildtext(revinfo, fh)
2417 rawtext = deltacomputer.buildtext(revinfo, fh)
2429
2418
2430 if type(rawtext) == bytes: # only accept immutable objects
2419 if type(rawtext) == bytes: # only accept immutable objects
2431 self._revisioncache = (node, curr, rawtext)
2420 self._revisioncache = (node, curr, rawtext)
2432 self._chainbasecache[curr] = deltainfo.chainbase
2421 self._chainbasecache[curr] = deltainfo.chainbase
2433 return curr
2422 return curr
2434
2423
2435 def _get_data_offset(self, prev):
2424 def _get_data_offset(self, prev):
2436 """Returns the current offset in the (in-transaction) data file.
2425 """Returns the current offset in the (in-transaction) data file.
2437 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2426 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2438 file to store that information: since sidedata can be rewritten to the
2427 file to store that information: since sidedata can be rewritten to the
2439 end of the data file within a transaction, you can have cases where, for
2428 end of the data file within a transaction, you can have cases where, for
2440 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2429 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2441 to `n - 1`'s sidedata being written after `n`'s data.
2430 to `n - 1`'s sidedata being written after `n`'s data.
2442
2431
2443 TODO cache this in a docket file before getting out of experimental."""
2432 TODO cache this in a docket file before getting out of experimental."""
2444 if self.version & 0xFFFF != REVLOGV2:
2433 if self.version & 0xFFFF != REVLOGV2:
2445 return self.end(prev)
2434 return self.end(prev)
2446
2435
2447 offset = 0
2436 offset = 0
2448 for rev, entry in enumerate(self.index):
2437 for rev, entry in enumerate(self.index):
2449 sidedata_end = entry[8] + entry[9]
2438 sidedata_end = entry[8] + entry[9]
2450 # Sidedata for a previous rev has potentially been written after
2439 # Sidedata for a previous rev has potentially been written after
2451 # this rev's end, so take the max.
2440 # this rev's end, so take the max.
2452 offset = max(self.end(rev), offset, sidedata_end)
2441 offset = max(self.end(rev), offset, sidedata_end)
2453 return offset
2442 return offset
2454
2443
2455 def _writeentry(
2444 def _writeentry(
2456 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2445 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2457 ):
2446 ):
2458 # Files opened in a+ mode have inconsistent behavior on various
2447 # Files opened in a+ mode have inconsistent behavior on various
2459 # platforms. Windows requires that a file positioning call be made
2448 # platforms. Windows requires that a file positioning call be made
2460 # when the file handle transitions between reads and writes. See
2449 # when the file handle transitions between reads and writes. See
2461 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2450 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2462 # platforms, Python or the platform itself can be buggy. Some versions
2451 # platforms, Python or the platform itself can be buggy. Some versions
2463 # of Solaris have been observed to not append at the end of the file
2452 # of Solaris have been observed to not append at the end of the file
2464 # if the file was seeked to before the end. See issue4943 for more.
2453 # if the file was seeked to before the end. See issue4943 for more.
2465 #
2454 #
2466 # We work around this issue by inserting a seek() before writing.
2455 # We work around this issue by inserting a seek() before writing.
2467 # Note: This is likely not necessary on Python 3. However, because
2456 # Note: This is likely not necessary on Python 3. However, because
2468 # the file handle is reused for reads and may be seeked there, we need
2457 # the file handle is reused for reads and may be seeked there, we need
2469 # to be careful before changing this.
2458 # to be careful before changing this.
2470 ifh.seek(0, os.SEEK_END)
2459 ifh.seek(0, os.SEEK_END)
2471 if dfh:
2460 if dfh:
2472 dfh.seek(0, os.SEEK_END)
2461 dfh.seek(0, os.SEEK_END)
2473
2462
2474 curr = len(self) - 1
2463 curr = len(self) - 1
2475 if not self._inline:
2464 if not self._inline:
2476 transaction.add(self.datafile, offset)
2465 transaction.add(self.datafile, offset)
2477 transaction.add(self.indexfile, curr * len(entry))
2466 transaction.add(self.indexfile, curr * len(entry))
2478 if data[0]:
2467 if data[0]:
2479 dfh.write(data[0])
2468 dfh.write(data[0])
2480 dfh.write(data[1])
2469 dfh.write(data[1])
2481 if sidedata:
2470 if sidedata:
2482 dfh.write(sidedata)
2471 dfh.write(sidedata)
2483 ifh.write(entry)
2472 ifh.write(entry)
2484 else:
2473 else:
2485 offset += curr * self._io.size
2474 offset += curr * self._io.size
2486 transaction.add(self.indexfile, offset)
2475 transaction.add(self.indexfile, offset)
2487 ifh.write(entry)
2476 ifh.write(entry)
2488 ifh.write(data[0])
2477 ifh.write(data[0])
2489 ifh.write(data[1])
2478 ifh.write(data[1])
2490 if sidedata:
2479 if sidedata:
2491 ifh.write(sidedata)
2480 ifh.write(sidedata)
2492 self._enforceinlinesize(transaction, ifh)
2481 self._enforceinlinesize(transaction, ifh)
2493 nodemaputil.setup_persistent_nodemap(transaction, self)
2482 nodemaputil.setup_persistent_nodemap(transaction, self)
2494
2483
2495 def addgroup(
2484 def addgroup(
2496 self,
2485 self,
2497 deltas,
2486 deltas,
2498 linkmapper,
2487 linkmapper,
2499 transaction,
2488 transaction,
2500 alwayscache=False,
2489 alwayscache=False,
2501 addrevisioncb=None,
2490 addrevisioncb=None,
2502 duplicaterevisioncb=None,
2491 duplicaterevisioncb=None,
2503 ):
2492 ):
2504 """
2493 """
2505 add a delta group
2494 add a delta group
2506
2495
2507 given a set of deltas, add them to the revision log. the
2496 given a set of deltas, add them to the revision log. the
2508 first delta is against its parent, which should be in our
2497 first delta is against its parent, which should be in our
2509 log, the rest are against the previous delta.
2498 log, the rest are against the previous delta.
2510
2499
2511 If ``addrevisioncb`` is defined, it will be called with arguments of
2500 If ``addrevisioncb`` is defined, it will be called with arguments of
2512 this revlog and the node that was added.
2501 this revlog and the node that was added.
2513 """
2502 """
2514
2503
2515 if self._writinghandles:
2504 if self._writinghandles:
2516 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2505 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2517
2506
2518 r = len(self)
2507 r = len(self)
2519 end = 0
2508 end = 0
2520 if r:
2509 if r:
2521 end = self.end(r - 1)
2510 end = self.end(r - 1)
2522 ifh = self._indexfp(b"a+")
2511 ifh = self._indexfp(b"a+")
2523 isize = r * self._io.size
2512 isize = r * self._io.size
2524 if self._inline:
2513 if self._inline:
2525 transaction.add(self.indexfile, end + isize)
2514 transaction.add(self.indexfile, end + isize)
2526 dfh = None
2515 dfh = None
2527 else:
2516 else:
2528 transaction.add(self.indexfile, isize)
2517 transaction.add(self.indexfile, isize)
2529 transaction.add(self.datafile, end)
2518 transaction.add(self.datafile, end)
2530 dfh = self._datafp(b"a+")
2519 dfh = self._datafp(b"a+")
2531
2520
2532 def flush():
2521 def flush():
2533 if dfh:
2522 if dfh:
2534 dfh.flush()
2523 dfh.flush()
2535 ifh.flush()
2524 ifh.flush()
2536
2525
2537 self._writinghandles = (ifh, dfh)
2526 self._writinghandles = (ifh, dfh)
2538 empty = True
2527 empty = True
2539
2528
2540 try:
2529 try:
2541 deltacomputer = deltautil.deltacomputer(self)
2530 deltacomputer = deltautil.deltacomputer(self)
2542 # loop through our set of deltas
2531 # loop through our set of deltas
2543 for data in deltas:
2532 for data in deltas:
2544 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2533 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2545 link = linkmapper(linknode)
2534 link = linkmapper(linknode)
2546 flags = flags or REVIDX_DEFAULT_FLAGS
2535 flags = flags or REVIDX_DEFAULT_FLAGS
2547
2536
2548 rev = self.index.get_rev(node)
2537 rev = self.index.get_rev(node)
2549 if rev is not None:
2538 if rev is not None:
2550 # this can happen if two branches make the same change
2539 # this can happen if two branches make the same change
2551 self._nodeduplicatecallback(transaction, rev)
2540 self._nodeduplicatecallback(transaction, rev)
2552 if duplicaterevisioncb:
2541 if duplicaterevisioncb:
2553 duplicaterevisioncb(self, rev)
2542 duplicaterevisioncb(self, rev)
2554 empty = False
2543 empty = False
2555 continue
2544 continue
2556
2545
2557 for p in (p1, p2):
2546 for p in (p1, p2):
2558 if not self.index.has_node(p):
2547 if not self.index.has_node(p):
2559 raise error.LookupError(
2548 raise error.LookupError(
2560 p, self.indexfile, _(b'unknown parent')
2549 p, self.indexfile, _(b'unknown parent')
2561 )
2550 )
2562
2551
2563 if not self.index.has_node(deltabase):
2552 if not self.index.has_node(deltabase):
2564 raise error.LookupError(
2553 raise error.LookupError(
2565 deltabase, self.indexfile, _(b'unknown delta base')
2554 deltabase, self.indexfile, _(b'unknown delta base')
2566 )
2555 )
2567
2556
2568 baserev = self.rev(deltabase)
2557 baserev = self.rev(deltabase)
2569
2558
2570 if baserev != nullrev and self.iscensored(baserev):
2559 if baserev != nullrev and self.iscensored(baserev):
2571 # if base is censored, delta must be full replacement in a
2560 # if base is censored, delta must be full replacement in a
2572 # single patch operation
2561 # single patch operation
2573 hlen = struct.calcsize(b">lll")
2562 hlen = struct.calcsize(b">lll")
2574 oldlen = self.rawsize(baserev)
2563 oldlen = self.rawsize(baserev)
2575 newlen = len(delta) - hlen
2564 newlen = len(delta) - hlen
2576 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2565 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2577 raise error.CensoredBaseError(
2566 raise error.CensoredBaseError(
2578 self.indexfile, self.node(baserev)
2567 self.indexfile, self.node(baserev)
2579 )
2568 )
2580
2569
2581 if not flags and self._peek_iscensored(baserev, delta, flush):
2570 if not flags and self._peek_iscensored(baserev, delta, flush):
2582 flags |= REVIDX_ISCENSORED
2571 flags |= REVIDX_ISCENSORED
2583
2572
2584 # We assume consumers of addrevisioncb will want to retrieve
2573 # We assume consumers of addrevisioncb will want to retrieve
2585 # the added revision, which will require a call to
2574 # the added revision, which will require a call to
2586 # revision(). revision() will fast path if there is a cache
2575 # revision(). revision() will fast path if there is a cache
2587 # hit. So, we tell _addrevision() to always cache in this case.
2576 # hit. So, we tell _addrevision() to always cache in this case.
2588 # We're only using addgroup() in the context of changegroup
2577 # We're only using addgroup() in the context of changegroup
2589 # generation so the revision data can always be handled as raw
2578 # generation so the revision data can always be handled as raw
2590 # by the flagprocessor.
2579 # by the flagprocessor.
2591 rev = self._addrevision(
2580 rev = self._addrevision(
2592 node,
2581 node,
2593 None,
2582 None,
2594 transaction,
2583 transaction,
2595 link,
2584 link,
2596 p1,
2585 p1,
2597 p2,
2586 p2,
2598 flags,
2587 flags,
2599 (baserev, delta),
2588 (baserev, delta),
2600 ifh,
2589 ifh,
2601 dfh,
2590 dfh,
2602 alwayscache=alwayscache,
2591 alwayscache=alwayscache,
2603 deltacomputer=deltacomputer,
2592 deltacomputer=deltacomputer,
2604 sidedata=sidedata,
2593 sidedata=sidedata,
2605 )
2594 )
2606
2595
2607 if addrevisioncb:
2596 if addrevisioncb:
2608 addrevisioncb(self, rev)
2597 addrevisioncb(self, rev)
2609 empty = False
2598 empty = False
2610
2599
2611 if not dfh and not self._inline:
2600 if not dfh and not self._inline:
2612 # addrevision switched from inline to conventional
2601 # addrevision switched from inline to conventional
2613 # reopen the index
2602 # reopen the index
2614 ifh.close()
2603 ifh.close()
2615 dfh = self._datafp(b"a+")
2604 dfh = self._datafp(b"a+")
2616 ifh = self._indexfp(b"a+")
2605 ifh = self._indexfp(b"a+")
2617 self._writinghandles = (ifh, dfh)
2606 self._writinghandles = (ifh, dfh)
2618 finally:
2607 finally:
2619 self._writinghandles = None
2608 self._writinghandles = None
2620
2609
2621 if dfh:
2610 if dfh:
2622 dfh.close()
2611 dfh.close()
2623 ifh.close()
2612 ifh.close()
2624 return not empty
2613 return not empty
2625
2614
2626 def iscensored(self, rev):
2615 def iscensored(self, rev):
2627 """Check if a file revision is censored."""
2616 """Check if a file revision is censored."""
2628 if not self._censorable:
2617 if not self._censorable:
2629 return False
2618 return False
2630
2619
2631 return self.flags(rev) & REVIDX_ISCENSORED
2620 return self.flags(rev) & REVIDX_ISCENSORED
2632
2621
2633 def _peek_iscensored(self, baserev, delta, flush):
2622 def _peek_iscensored(self, baserev, delta, flush):
2634 """Quickly check if a delta produces a censored revision."""
2623 """Quickly check if a delta produces a censored revision."""
2635 if not self._censorable:
2624 if not self._censorable:
2636 return False
2625 return False
2637
2626
2638 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2627 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2639
2628
2640 def getstrippoint(self, minlink):
2629 def getstrippoint(self, minlink):
2641 """find the minimum rev that must be stripped to strip the linkrev
2630 """find the minimum rev that must be stripped to strip the linkrev
2642
2631
2643 Returns a tuple containing the minimum rev and a set of all revs that
2632 Returns a tuple containing the minimum rev and a set of all revs that
2644 have linkrevs that will be broken by this strip.
2633 have linkrevs that will be broken by this strip.
2645 """
2634 """
2646 return storageutil.resolvestripinfo(
2635 return storageutil.resolvestripinfo(
2647 minlink,
2636 minlink,
2648 len(self) - 1,
2637 len(self) - 1,
2649 self.headrevs(),
2638 self.headrevs(),
2650 self.linkrev,
2639 self.linkrev,
2651 self.parentrevs,
2640 self.parentrevs,
2652 )
2641 )
2653
2642
2654 def strip(self, minlink, transaction):
2643 def strip(self, minlink, transaction):
2655 """truncate the revlog on the first revision with a linkrev >= minlink
2644 """truncate the revlog on the first revision with a linkrev >= minlink
2656
2645
2657 This function is called when we're stripping revision minlink and
2646 This function is called when we're stripping revision minlink and
2658 its descendants from the repository.
2647 its descendants from the repository.
2659
2648
2660 We have to remove all revisions with linkrev >= minlink, because
2649 We have to remove all revisions with linkrev >= minlink, because
2661 the equivalent changelog revisions will be renumbered after the
2650 the equivalent changelog revisions will be renumbered after the
2662 strip.
2651 strip.
2663
2652
2664 So we truncate the revlog on the first of these revisions, and
2653 So we truncate the revlog on the first of these revisions, and
2665 trust that the caller has saved the revisions that shouldn't be
2654 trust that the caller has saved the revisions that shouldn't be
2666 removed and that it'll re-add them after this truncation.
2655 removed and that it'll re-add them after this truncation.
2667 """
2656 """
2668 if len(self) == 0:
2657 if len(self) == 0:
2669 return
2658 return
2670
2659
2671 rev, _ = self.getstrippoint(minlink)
2660 rev, _ = self.getstrippoint(minlink)
2672 if rev == len(self):
2661 if rev == len(self):
2673 return
2662 return
2674
2663
2675 # first truncate the files on disk
2664 # first truncate the files on disk
2676 end = self.start(rev)
2665 end = self.start(rev)
2677 if not self._inline:
2666 if not self._inline:
2678 transaction.add(self.datafile, end)
2667 transaction.add(self.datafile, end)
2679 end = rev * self._io.size
2668 end = rev * self._io.size
2680 else:
2669 else:
2681 end += rev * self._io.size
2670 end += rev * self._io.size
2682
2671
2683 transaction.add(self.indexfile, end)
2672 transaction.add(self.indexfile, end)
2684
2673
2685 # then reset internal state in memory to forget those revisions
2674 # then reset internal state in memory to forget those revisions
2686 self._revisioncache = None
2675 self._revisioncache = None
2687 self._chaininfocache = util.lrucachedict(500)
2676 self._chaininfocache = util.lrucachedict(500)
2688 self._chunkclear()
2677 self._chunkclear()
2689
2678
2690 del self.index[rev:-1]
2679 del self.index[rev:-1]
2691
2680
2692 def checksize(self):
2681 def checksize(self):
2693 """Check size of index and data files
2682 """Check size of index and data files
2694
2683
2695 return a (dd, di) tuple.
2684 return a (dd, di) tuple.
2696 - dd: extra bytes for the "data" file
2685 - dd: extra bytes for the "data" file
2697 - di: extra bytes for the "index" file
2686 - di: extra bytes for the "index" file
2698
2687
2699 A healthy revlog will return (0, 0).
2688 A healthy revlog will return (0, 0).
2700 """
2689 """
2701 expected = 0
2690 expected = 0
2702 if len(self):
2691 if len(self):
2703 expected = max(0, self.end(len(self) - 1))
2692 expected = max(0, self.end(len(self) - 1))
2704
2693
2705 try:
2694 try:
2706 with self._datafp() as f:
2695 with self._datafp() as f:
2707 f.seek(0, io.SEEK_END)
2696 f.seek(0, io.SEEK_END)
2708 actual = f.tell()
2697 actual = f.tell()
2709 dd = actual - expected
2698 dd = actual - expected
2710 except IOError as inst:
2699 except IOError as inst:
2711 if inst.errno != errno.ENOENT:
2700 if inst.errno != errno.ENOENT:
2712 raise
2701 raise
2713 dd = 0
2702 dd = 0
2714
2703
2715 try:
2704 try:
2716 f = self.opener(self.indexfile)
2705 f = self.opener(self.indexfile)
2717 f.seek(0, io.SEEK_END)
2706 f.seek(0, io.SEEK_END)
2718 actual = f.tell()
2707 actual = f.tell()
2719 f.close()
2708 f.close()
2720 s = self._io.size
2709 s = self._io.size
2721 i = max(0, actual // s)
2710 i = max(0, actual // s)
2722 di = actual - (i * s)
2711 di = actual - (i * s)
2723 if self._inline:
2712 if self._inline:
2724 databytes = 0
2713 databytes = 0
2725 for r in self:
2714 for r in self:
2726 databytes += max(0, self.length(r))
2715 databytes += max(0, self.length(r))
2727 dd = 0
2716 dd = 0
2728 di = actual - len(self) * s - databytes
2717 di = actual - len(self) * s - databytes
2729 except IOError as inst:
2718 except IOError as inst:
2730 if inst.errno != errno.ENOENT:
2719 if inst.errno != errno.ENOENT:
2731 raise
2720 raise
2732 di = 0
2721 di = 0
2733
2722
2734 return (dd, di)
2723 return (dd, di)
2735
2724
2736 def files(self):
2725 def files(self):
2737 res = [self.indexfile]
2726 res = [self.indexfile]
2738 if not self._inline:
2727 if not self._inline:
2739 res.append(self.datafile)
2728 res.append(self.datafile)
2740 return res
2729 return res
2741
2730
2742 def emitrevisions(
2731 def emitrevisions(
2743 self,
2732 self,
2744 nodes,
2733 nodes,
2745 nodesorder=None,
2734 nodesorder=None,
2746 revisiondata=False,
2735 revisiondata=False,
2747 assumehaveparentrevisions=False,
2736 assumehaveparentrevisions=False,
2748 deltamode=repository.CG_DELTAMODE_STD,
2737 deltamode=repository.CG_DELTAMODE_STD,
2749 sidedata_helpers=None,
2738 sidedata_helpers=None,
2750 ):
2739 ):
2751 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2740 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2752 raise error.ProgrammingError(
2741 raise error.ProgrammingError(
2753 b'unhandled value for nodesorder: %s' % nodesorder
2742 b'unhandled value for nodesorder: %s' % nodesorder
2754 )
2743 )
2755
2744
2756 if nodesorder is None and not self._generaldelta:
2745 if nodesorder is None and not self._generaldelta:
2757 nodesorder = b'storage'
2746 nodesorder = b'storage'
2758
2747
2759 if (
2748 if (
2760 not self._storedeltachains
2749 not self._storedeltachains
2761 and deltamode != repository.CG_DELTAMODE_PREV
2750 and deltamode != repository.CG_DELTAMODE_PREV
2762 ):
2751 ):
2763 deltamode = repository.CG_DELTAMODE_FULL
2752 deltamode = repository.CG_DELTAMODE_FULL
2764
2753
2765 return storageutil.emitrevisions(
2754 return storageutil.emitrevisions(
2766 self,
2755 self,
2767 nodes,
2756 nodes,
2768 nodesorder,
2757 nodesorder,
2769 revlogrevisiondelta,
2758 revlogrevisiondelta,
2770 deltaparentfn=self.deltaparent,
2759 deltaparentfn=self.deltaparent,
2771 candeltafn=self.candelta,
2760 candeltafn=self.candelta,
2772 rawsizefn=self.rawsize,
2761 rawsizefn=self.rawsize,
2773 revdifffn=self.revdiff,
2762 revdifffn=self.revdiff,
2774 flagsfn=self.flags,
2763 flagsfn=self.flags,
2775 deltamode=deltamode,
2764 deltamode=deltamode,
2776 revisiondata=revisiondata,
2765 revisiondata=revisiondata,
2777 assumehaveparentrevisions=assumehaveparentrevisions,
2766 assumehaveparentrevisions=assumehaveparentrevisions,
2778 sidedata_helpers=sidedata_helpers,
2767 sidedata_helpers=sidedata_helpers,
2779 )
2768 )
2780
2769
2781 DELTAREUSEALWAYS = b'always'
2770 DELTAREUSEALWAYS = b'always'
2782 DELTAREUSESAMEREVS = b'samerevs'
2771 DELTAREUSESAMEREVS = b'samerevs'
2783 DELTAREUSENEVER = b'never'
2772 DELTAREUSENEVER = b'never'
2784
2773
2785 DELTAREUSEFULLADD = b'fulladd'
2774 DELTAREUSEFULLADD = b'fulladd'
2786
2775
2787 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2776 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2788
2777
2789 def clone(
2778 def clone(
2790 self,
2779 self,
2791 tr,
2780 tr,
2792 destrevlog,
2781 destrevlog,
2793 addrevisioncb=None,
2782 addrevisioncb=None,
2794 deltareuse=DELTAREUSESAMEREVS,
2783 deltareuse=DELTAREUSESAMEREVS,
2795 forcedeltabothparents=None,
2784 forcedeltabothparents=None,
2796 sidedatacompanion=None,
2785 sidedatacompanion=None,
2797 ):
2786 ):
2798 """Copy this revlog to another, possibly with format changes.
2787 """Copy this revlog to another, possibly with format changes.
2799
2788
2800 The destination revlog will contain the same revisions and nodes.
2789 The destination revlog will contain the same revisions and nodes.
2801 However, it may not be bit-for-bit identical due to e.g. delta encoding
2790 However, it may not be bit-for-bit identical due to e.g. delta encoding
2802 differences.
2791 differences.
2803
2792
2804 The ``deltareuse`` argument control how deltas from the existing revlog
2793 The ``deltareuse`` argument control how deltas from the existing revlog
2805 are preserved in the destination revlog. The argument can have the
2794 are preserved in the destination revlog. The argument can have the
2806 following values:
2795 following values:
2807
2796
2808 DELTAREUSEALWAYS
2797 DELTAREUSEALWAYS
2809 Deltas will always be reused (if possible), even if the destination
2798 Deltas will always be reused (if possible), even if the destination
2810 revlog would not select the same revisions for the delta. This is the
2799 revlog would not select the same revisions for the delta. This is the
2811 fastest mode of operation.
2800 fastest mode of operation.
2812 DELTAREUSESAMEREVS
2801 DELTAREUSESAMEREVS
2813 Deltas will be reused if the destination revlog would pick the same
2802 Deltas will be reused if the destination revlog would pick the same
2814 revisions for the delta. This mode strikes a balance between speed
2803 revisions for the delta. This mode strikes a balance between speed
2815 and optimization.
2804 and optimization.
2816 DELTAREUSENEVER
2805 DELTAREUSENEVER
2817 Deltas will never be reused. This is the slowest mode of execution.
2806 Deltas will never be reused. This is the slowest mode of execution.
2818 This mode can be used to recompute deltas (e.g. if the diff/delta
2807 This mode can be used to recompute deltas (e.g. if the diff/delta
2819 algorithm changes).
2808 algorithm changes).
2820 DELTAREUSEFULLADD
2809 DELTAREUSEFULLADD
2821 Revision will be re-added as if their were new content. This is
2810 Revision will be re-added as if their were new content. This is
2822 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2811 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2823 eg: large file detection and handling.
2812 eg: large file detection and handling.
2824
2813
2825 Delta computation can be slow, so the choice of delta reuse policy can
2814 Delta computation can be slow, so the choice of delta reuse policy can
2826 significantly affect run time.
2815 significantly affect run time.
2827
2816
2828 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2817 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2829 two extremes. Deltas will be reused if they are appropriate. But if the
2818 two extremes. Deltas will be reused if they are appropriate. But if the
2830 delta could choose a better revision, it will do so. This means if you
2819 delta could choose a better revision, it will do so. This means if you
2831 are converting a non-generaldelta revlog to a generaldelta revlog,
2820 are converting a non-generaldelta revlog to a generaldelta revlog,
2832 deltas will be recomputed if the delta's parent isn't a parent of the
2821 deltas will be recomputed if the delta's parent isn't a parent of the
2833 revision.
2822 revision.
2834
2823
2835 In addition to the delta policy, the ``forcedeltabothparents``
2824 In addition to the delta policy, the ``forcedeltabothparents``
2836 argument controls whether to force compute deltas against both parents
2825 argument controls whether to force compute deltas against both parents
2837 for merges. By default, the current default is used.
2826 for merges. By default, the current default is used.
2838
2827
2839 If not None, the `sidedatacompanion` is callable that accept two
2828 If not None, the `sidedatacompanion` is callable that accept two
2840 arguments:
2829 arguments:
2841
2830
2842 (srcrevlog, rev)
2831 (srcrevlog, rev)
2843
2832
2844 and return a quintet that control changes to sidedata content from the
2833 and return a quintet that control changes to sidedata content from the
2845 old revision to the new clone result:
2834 old revision to the new clone result:
2846
2835
2847 (dropall, filterout, update, new_flags, dropped_flags)
2836 (dropall, filterout, update, new_flags, dropped_flags)
2848
2837
2849 * if `dropall` is True, all sidedata should be dropped
2838 * if `dropall` is True, all sidedata should be dropped
2850 * `filterout` is a set of sidedata keys that should be dropped
2839 * `filterout` is a set of sidedata keys that should be dropped
2851 * `update` is a mapping of additionnal/new key -> value
2840 * `update` is a mapping of additionnal/new key -> value
2852 * new_flags is a bitfields of new flags that the revision should get
2841 * new_flags is a bitfields of new flags that the revision should get
2853 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2842 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2854 """
2843 """
2855 if deltareuse not in self.DELTAREUSEALL:
2844 if deltareuse not in self.DELTAREUSEALL:
2856 raise ValueError(
2845 raise ValueError(
2857 _(b'value for deltareuse invalid: %s') % deltareuse
2846 _(b'value for deltareuse invalid: %s') % deltareuse
2858 )
2847 )
2859
2848
2860 if len(destrevlog):
2849 if len(destrevlog):
2861 raise ValueError(_(b'destination revlog is not empty'))
2850 raise ValueError(_(b'destination revlog is not empty'))
2862
2851
2863 if getattr(self, 'filteredrevs', None):
2852 if getattr(self, 'filteredrevs', None):
2864 raise ValueError(_(b'source revlog has filtered revisions'))
2853 raise ValueError(_(b'source revlog has filtered revisions'))
2865 if getattr(destrevlog, 'filteredrevs', None):
2854 if getattr(destrevlog, 'filteredrevs', None):
2866 raise ValueError(_(b'destination revlog has filtered revisions'))
2855 raise ValueError(_(b'destination revlog has filtered revisions'))
2867
2856
2868 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2857 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2869 # if possible.
2858 # if possible.
2870 oldlazydelta = destrevlog._lazydelta
2859 oldlazydelta = destrevlog._lazydelta
2871 oldlazydeltabase = destrevlog._lazydeltabase
2860 oldlazydeltabase = destrevlog._lazydeltabase
2872 oldamd = destrevlog._deltabothparents
2861 oldamd = destrevlog._deltabothparents
2873
2862
2874 try:
2863 try:
2875 if deltareuse == self.DELTAREUSEALWAYS:
2864 if deltareuse == self.DELTAREUSEALWAYS:
2876 destrevlog._lazydeltabase = True
2865 destrevlog._lazydeltabase = True
2877 destrevlog._lazydelta = True
2866 destrevlog._lazydelta = True
2878 elif deltareuse == self.DELTAREUSESAMEREVS:
2867 elif deltareuse == self.DELTAREUSESAMEREVS:
2879 destrevlog._lazydeltabase = False
2868 destrevlog._lazydeltabase = False
2880 destrevlog._lazydelta = True
2869 destrevlog._lazydelta = True
2881 elif deltareuse == self.DELTAREUSENEVER:
2870 elif deltareuse == self.DELTAREUSENEVER:
2882 destrevlog._lazydeltabase = False
2871 destrevlog._lazydeltabase = False
2883 destrevlog._lazydelta = False
2872 destrevlog._lazydelta = False
2884
2873
2885 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2874 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2886
2875
2887 self._clone(
2876 self._clone(
2888 tr,
2877 tr,
2889 destrevlog,
2878 destrevlog,
2890 addrevisioncb,
2879 addrevisioncb,
2891 deltareuse,
2880 deltareuse,
2892 forcedeltabothparents,
2881 forcedeltabothparents,
2893 sidedatacompanion,
2882 sidedatacompanion,
2894 )
2883 )
2895
2884
2896 finally:
2885 finally:
2897 destrevlog._lazydelta = oldlazydelta
2886 destrevlog._lazydelta = oldlazydelta
2898 destrevlog._lazydeltabase = oldlazydeltabase
2887 destrevlog._lazydeltabase = oldlazydeltabase
2899 destrevlog._deltabothparents = oldamd
2888 destrevlog._deltabothparents = oldamd
2900
2889
2901 def _clone(
2890 def _clone(
2902 self,
2891 self,
2903 tr,
2892 tr,
2904 destrevlog,
2893 destrevlog,
2905 addrevisioncb,
2894 addrevisioncb,
2906 deltareuse,
2895 deltareuse,
2907 forcedeltabothparents,
2896 forcedeltabothparents,
2908 sidedatacompanion,
2897 sidedatacompanion,
2909 ):
2898 ):
2910 """perform the core duty of `revlog.clone` after parameter processing"""
2899 """perform the core duty of `revlog.clone` after parameter processing"""
2911 deltacomputer = deltautil.deltacomputer(destrevlog)
2900 deltacomputer = deltautil.deltacomputer(destrevlog)
2912 index = self.index
2901 index = self.index
2913 for rev in self:
2902 for rev in self:
2914 entry = index[rev]
2903 entry = index[rev]
2915
2904
2916 # Some classes override linkrev to take filtered revs into
2905 # Some classes override linkrev to take filtered revs into
2917 # account. Use raw entry from index.
2906 # account. Use raw entry from index.
2918 flags = entry[0] & 0xFFFF
2907 flags = entry[0] & 0xFFFF
2919 linkrev = entry[4]
2908 linkrev = entry[4]
2920 p1 = index[entry[5]][7]
2909 p1 = index[entry[5]][7]
2921 p2 = index[entry[6]][7]
2910 p2 = index[entry[6]][7]
2922 node = entry[7]
2911 node = entry[7]
2923
2912
2924 sidedataactions = (False, [], {}, 0, 0)
2913 sidedataactions = (False, [], {}, 0, 0)
2925 if sidedatacompanion is not None:
2914 if sidedatacompanion is not None:
2926 sidedataactions = sidedatacompanion(self, rev)
2915 sidedataactions = sidedatacompanion(self, rev)
2927
2916
2928 # (Possibly) reuse the delta from the revlog if allowed and
2917 # (Possibly) reuse the delta from the revlog if allowed and
2929 # the revlog chunk is a delta.
2918 # the revlog chunk is a delta.
2930 cachedelta = None
2919 cachedelta = None
2931 rawtext = None
2920 rawtext = None
2932 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2921 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2933 dropall = sidedataactions[0]
2922 dropall = sidedataactions[0]
2934 filterout = sidedataactions[1]
2923 filterout = sidedataactions[1]
2935 update = sidedataactions[2]
2924 update = sidedataactions[2]
2936 new_flags = sidedataactions[3]
2925 new_flags = sidedataactions[3]
2937 dropped_flags = sidedataactions[4]
2926 dropped_flags = sidedataactions[4]
2938 text, sidedata = self._revisiondata(rev)
2927 text, sidedata = self._revisiondata(rev)
2939 if dropall:
2928 if dropall:
2940 sidedata = {}
2929 sidedata = {}
2941 for key in filterout:
2930 for key in filterout:
2942 sidedata.pop(key, None)
2931 sidedata.pop(key, None)
2943 sidedata.update(update)
2932 sidedata.update(update)
2944 if not sidedata:
2933 if not sidedata:
2945 sidedata = None
2934 sidedata = None
2946
2935
2947 flags |= new_flags
2936 flags |= new_flags
2948 flags &= ~dropped_flags
2937 flags &= ~dropped_flags
2949
2938
2950 destrevlog.addrevision(
2939 destrevlog.addrevision(
2951 text,
2940 text,
2952 tr,
2941 tr,
2953 linkrev,
2942 linkrev,
2954 p1,
2943 p1,
2955 p2,
2944 p2,
2956 cachedelta=cachedelta,
2945 cachedelta=cachedelta,
2957 node=node,
2946 node=node,
2958 flags=flags,
2947 flags=flags,
2959 deltacomputer=deltacomputer,
2948 deltacomputer=deltacomputer,
2960 sidedata=sidedata,
2949 sidedata=sidedata,
2961 )
2950 )
2962 else:
2951 else:
2963 if destrevlog._lazydelta:
2952 if destrevlog._lazydelta:
2964 dp = self.deltaparent(rev)
2953 dp = self.deltaparent(rev)
2965 if dp != nullrev:
2954 if dp != nullrev:
2966 cachedelta = (dp, bytes(self._chunk(rev)))
2955 cachedelta = (dp, bytes(self._chunk(rev)))
2967
2956
2968 if not cachedelta:
2957 if not cachedelta:
2969 rawtext = self.rawdata(rev)
2958 rawtext = self.rawdata(rev)
2970
2959
2971 ifh = destrevlog.opener(
2960 ifh = destrevlog.opener(
2972 destrevlog.indexfile, b'a+', checkambig=False
2961 destrevlog.indexfile, b'a+', checkambig=False
2973 )
2962 )
2974 dfh = None
2963 dfh = None
2975 if not destrevlog._inline:
2964 if not destrevlog._inline:
2976 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2965 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2977 try:
2966 try:
2978 destrevlog._addrevision(
2967 destrevlog._addrevision(
2979 node,
2968 node,
2980 rawtext,
2969 rawtext,
2981 tr,
2970 tr,
2982 linkrev,
2971 linkrev,
2983 p1,
2972 p1,
2984 p2,
2973 p2,
2985 flags,
2974 flags,
2986 cachedelta,
2975 cachedelta,
2987 ifh,
2976 ifh,
2988 dfh,
2977 dfh,
2989 deltacomputer=deltacomputer,
2978 deltacomputer=deltacomputer,
2990 )
2979 )
2991 finally:
2980 finally:
2992 if dfh:
2981 if dfh:
2993 dfh.close()
2982 dfh.close()
2994 ifh.close()
2983 ifh.close()
2995
2984
2996 if addrevisioncb:
2985 if addrevisioncb:
2997 addrevisioncb(self, rev, node)
2986 addrevisioncb(self, rev, node)
2998
2987
2999 def censorrevision(self, tr, censornode, tombstone=b''):
2988 def censorrevision(self, tr, censornode, tombstone=b''):
3000 if (self.version & 0xFFFF) == REVLOGV0:
2989 if (self.version & 0xFFFF) == REVLOGV0:
3001 raise error.RevlogError(
2990 raise error.RevlogError(
3002 _(b'cannot censor with version %d revlogs') % self.version
2991 _(b'cannot censor with version %d revlogs') % self.version
3003 )
2992 )
3004
2993
3005 censorrev = self.rev(censornode)
2994 censorrev = self.rev(censornode)
3006 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2995 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3007
2996
3008 if len(tombstone) > self.rawsize(censorrev):
2997 if len(tombstone) > self.rawsize(censorrev):
3009 raise error.Abort(
2998 raise error.Abort(
3010 _(b'censor tombstone must be no longer than censored data')
2999 _(b'censor tombstone must be no longer than censored data')
3011 )
3000 )
3012
3001
3013 # Rewriting the revlog in place is hard. Our strategy for censoring is
3002 # Rewriting the revlog in place is hard. Our strategy for censoring is
3014 # to create a new revlog, copy all revisions to it, then replace the
3003 # to create a new revlog, copy all revisions to it, then replace the
3015 # revlogs on transaction close.
3004 # revlogs on transaction close.
3016
3005
3017 newindexfile = self.indexfile + b'.tmpcensored'
3006 newindexfile = self.indexfile + b'.tmpcensored'
3018 newdatafile = self.datafile + b'.tmpcensored'
3007 newdatafile = self.datafile + b'.tmpcensored'
3019
3008
3020 # This is a bit dangerous. We could easily have a mismatch of state.
3009 # This is a bit dangerous. We could easily have a mismatch of state.
3021 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3010 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3022 newrl.version = self.version
3011 newrl.version = self.version
3023 newrl._generaldelta = self._generaldelta
3012 newrl._generaldelta = self._generaldelta
3024 newrl._io = self._io
3013 newrl._io = self._io
3025
3014
3026 for rev in self.revs():
3015 for rev in self.revs():
3027 node = self.node(rev)
3016 node = self.node(rev)
3028 p1, p2 = self.parents(node)
3017 p1, p2 = self.parents(node)
3029
3018
3030 if rev == censorrev:
3019 if rev == censorrev:
3031 newrl.addrawrevision(
3020 newrl.addrawrevision(
3032 tombstone,
3021 tombstone,
3033 tr,
3022 tr,
3034 self.linkrev(censorrev),
3023 self.linkrev(censorrev),
3035 p1,
3024 p1,
3036 p2,
3025 p2,
3037 censornode,
3026 censornode,
3038 REVIDX_ISCENSORED,
3027 REVIDX_ISCENSORED,
3039 )
3028 )
3040
3029
3041 if newrl.deltaparent(rev) != nullrev:
3030 if newrl.deltaparent(rev) != nullrev:
3042 raise error.Abort(
3031 raise error.Abort(
3043 _(
3032 _(
3044 b'censored revision stored as delta; '
3033 b'censored revision stored as delta; '
3045 b'cannot censor'
3034 b'cannot censor'
3046 ),
3035 ),
3047 hint=_(
3036 hint=_(
3048 b'censoring of revlogs is not '
3037 b'censoring of revlogs is not '
3049 b'fully implemented; please report '
3038 b'fully implemented; please report '
3050 b'this bug'
3039 b'this bug'
3051 ),
3040 ),
3052 )
3041 )
3053 continue
3042 continue
3054
3043
3055 if self.iscensored(rev):
3044 if self.iscensored(rev):
3056 if self.deltaparent(rev) != nullrev:
3045 if self.deltaparent(rev) != nullrev:
3057 raise error.Abort(
3046 raise error.Abort(
3058 _(
3047 _(
3059 b'cannot censor due to censored '
3048 b'cannot censor due to censored '
3060 b'revision having delta stored'
3049 b'revision having delta stored'
3061 )
3050 )
3062 )
3051 )
3063 rawtext = self._chunk(rev)
3052 rawtext = self._chunk(rev)
3064 else:
3053 else:
3065 rawtext = self.rawdata(rev)
3054 rawtext = self.rawdata(rev)
3066
3055
3067 newrl.addrawrevision(
3056 newrl.addrawrevision(
3068 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3057 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3069 )
3058 )
3070
3059
3071 tr.addbackup(self.indexfile, location=b'store')
3060 tr.addbackup(self.indexfile, location=b'store')
3072 if not self._inline:
3061 if not self._inline:
3073 tr.addbackup(self.datafile, location=b'store')
3062 tr.addbackup(self.datafile, location=b'store')
3074
3063
3075 self.opener.rename(newrl.indexfile, self.indexfile)
3064 self.opener.rename(newrl.indexfile, self.indexfile)
3076 if not self._inline:
3065 if not self._inline:
3077 self.opener.rename(newrl.datafile, self.datafile)
3066 self.opener.rename(newrl.datafile, self.datafile)
3078
3067
3079 self.clearcaches()
3068 self.clearcaches()
3080 self._loadindex()
3069 self._loadindex()
3081
3070
3082 def verifyintegrity(self, state):
3071 def verifyintegrity(self, state):
3083 """Verifies the integrity of the revlog.
3072 """Verifies the integrity of the revlog.
3084
3073
3085 Yields ``revlogproblem`` instances describing problems that are
3074 Yields ``revlogproblem`` instances describing problems that are
3086 found.
3075 found.
3087 """
3076 """
3088 dd, di = self.checksize()
3077 dd, di = self.checksize()
3089 if dd:
3078 if dd:
3090 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3079 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3091 if di:
3080 if di:
3092 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3081 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3093
3082
3094 version = self.version & 0xFFFF
3083 version = self.version & 0xFFFF
3095
3084
3096 # The verifier tells us what version revlog we should be.
3085 # The verifier tells us what version revlog we should be.
3097 if version != state[b'expectedversion']:
3086 if version != state[b'expectedversion']:
3098 yield revlogproblem(
3087 yield revlogproblem(
3099 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3088 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3100 % (self.indexfile, version, state[b'expectedversion'])
3089 % (self.indexfile, version, state[b'expectedversion'])
3101 )
3090 )
3102
3091
3103 state[b'skipread'] = set()
3092 state[b'skipread'] = set()
3104 state[b'safe_renamed'] = set()
3093 state[b'safe_renamed'] = set()
3105
3094
3106 for rev in self:
3095 for rev in self:
3107 node = self.node(rev)
3096 node = self.node(rev)
3108
3097
3109 # Verify contents. 4 cases to care about:
3098 # Verify contents. 4 cases to care about:
3110 #
3099 #
3111 # common: the most common case
3100 # common: the most common case
3112 # rename: with a rename
3101 # rename: with a rename
3113 # meta: file content starts with b'\1\n', the metadata
3102 # meta: file content starts with b'\1\n', the metadata
3114 # header defined in filelog.py, but without a rename
3103 # header defined in filelog.py, but without a rename
3115 # ext: content stored externally
3104 # ext: content stored externally
3116 #
3105 #
3117 # More formally, their differences are shown below:
3106 # More formally, their differences are shown below:
3118 #
3107 #
3119 # | common | rename | meta | ext
3108 # | common | rename | meta | ext
3120 # -------------------------------------------------------
3109 # -------------------------------------------------------
3121 # flags() | 0 | 0 | 0 | not 0
3110 # flags() | 0 | 0 | 0 | not 0
3122 # renamed() | False | True | False | ?
3111 # renamed() | False | True | False | ?
3123 # rawtext[0:2]=='\1\n'| False | True | True | ?
3112 # rawtext[0:2]=='\1\n'| False | True | True | ?
3124 #
3113 #
3125 # "rawtext" means the raw text stored in revlog data, which
3114 # "rawtext" means the raw text stored in revlog data, which
3126 # could be retrieved by "rawdata(rev)". "text"
3115 # could be retrieved by "rawdata(rev)". "text"
3127 # mentioned below is "revision(rev)".
3116 # mentioned below is "revision(rev)".
3128 #
3117 #
3129 # There are 3 different lengths stored physically:
3118 # There are 3 different lengths stored physically:
3130 # 1. L1: rawsize, stored in revlog index
3119 # 1. L1: rawsize, stored in revlog index
3131 # 2. L2: len(rawtext), stored in revlog data
3120 # 2. L2: len(rawtext), stored in revlog data
3132 # 3. L3: len(text), stored in revlog data if flags==0, or
3121 # 3. L3: len(text), stored in revlog data if flags==0, or
3133 # possibly somewhere else if flags!=0
3122 # possibly somewhere else if flags!=0
3134 #
3123 #
3135 # L1 should be equal to L2. L3 could be different from them.
3124 # L1 should be equal to L2. L3 could be different from them.
3136 # "text" may or may not affect commit hash depending on flag
3125 # "text" may or may not affect commit hash depending on flag
3137 # processors (see flagutil.addflagprocessor).
3126 # processors (see flagutil.addflagprocessor).
3138 #
3127 #
3139 # | common | rename | meta | ext
3128 # | common | rename | meta | ext
3140 # -------------------------------------------------
3129 # -------------------------------------------------
3141 # rawsize() | L1 | L1 | L1 | L1
3130 # rawsize() | L1 | L1 | L1 | L1
3142 # size() | L1 | L2-LM | L1(*) | L1 (?)
3131 # size() | L1 | L2-LM | L1(*) | L1 (?)
3143 # len(rawtext) | L2 | L2 | L2 | L2
3132 # len(rawtext) | L2 | L2 | L2 | L2
3144 # len(text) | L2 | L2 | L2 | L3
3133 # len(text) | L2 | L2 | L2 | L3
3145 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3134 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3146 #
3135 #
3147 # LM: length of metadata, depending on rawtext
3136 # LM: length of metadata, depending on rawtext
3148 # (*): not ideal, see comment in filelog.size
3137 # (*): not ideal, see comment in filelog.size
3149 # (?): could be "- len(meta)" if the resolved content has
3138 # (?): could be "- len(meta)" if the resolved content has
3150 # rename metadata
3139 # rename metadata
3151 #
3140 #
3152 # Checks needed to be done:
3141 # Checks needed to be done:
3153 # 1. length check: L1 == L2, in all cases.
3142 # 1. length check: L1 == L2, in all cases.
3154 # 2. hash check: depending on flag processor, we may need to
3143 # 2. hash check: depending on flag processor, we may need to
3155 # use either "text" (external), or "rawtext" (in revlog).
3144 # use either "text" (external), or "rawtext" (in revlog).
3156
3145
3157 try:
3146 try:
3158 skipflags = state.get(b'skipflags', 0)
3147 skipflags = state.get(b'skipflags', 0)
3159 if skipflags:
3148 if skipflags:
3160 skipflags &= self.flags(rev)
3149 skipflags &= self.flags(rev)
3161
3150
3162 _verify_revision(self, skipflags, state, node)
3151 _verify_revision(self, skipflags, state, node)
3163
3152
3164 l1 = self.rawsize(rev)
3153 l1 = self.rawsize(rev)
3165 l2 = len(self.rawdata(node))
3154 l2 = len(self.rawdata(node))
3166
3155
3167 if l1 != l2:
3156 if l1 != l2:
3168 yield revlogproblem(
3157 yield revlogproblem(
3169 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3158 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3170 node=node,
3159 node=node,
3171 )
3160 )
3172
3161
3173 except error.CensoredNodeError:
3162 except error.CensoredNodeError:
3174 if state[b'erroroncensored']:
3163 if state[b'erroroncensored']:
3175 yield revlogproblem(
3164 yield revlogproblem(
3176 error=_(b'censored file data'), node=node
3165 error=_(b'censored file data'), node=node
3177 )
3166 )
3178 state[b'skipread'].add(node)
3167 state[b'skipread'].add(node)
3179 except Exception as e:
3168 except Exception as e:
3180 yield revlogproblem(
3169 yield revlogproblem(
3181 error=_(b'unpacking %s: %s')
3170 error=_(b'unpacking %s: %s')
3182 % (short(node), stringutil.forcebytestr(e)),
3171 % (short(node), stringutil.forcebytestr(e)),
3183 node=node,
3172 node=node,
3184 )
3173 )
3185 state[b'skipread'].add(node)
3174 state[b'skipread'].add(node)
3186
3175
3187 def storageinfo(
3176 def storageinfo(
3188 self,
3177 self,
3189 exclusivefiles=False,
3178 exclusivefiles=False,
3190 sharedfiles=False,
3179 sharedfiles=False,
3191 revisionscount=False,
3180 revisionscount=False,
3192 trackedsize=False,
3181 trackedsize=False,
3193 storedsize=False,
3182 storedsize=False,
3194 ):
3183 ):
3195 d = {}
3184 d = {}
3196
3185
3197 if exclusivefiles:
3186 if exclusivefiles:
3198 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3187 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3199 if not self._inline:
3188 if not self._inline:
3200 d[b'exclusivefiles'].append((self.opener, self.datafile))
3189 d[b'exclusivefiles'].append((self.opener, self.datafile))
3201
3190
3202 if sharedfiles:
3191 if sharedfiles:
3203 d[b'sharedfiles'] = []
3192 d[b'sharedfiles'] = []
3204
3193
3205 if revisionscount:
3194 if revisionscount:
3206 d[b'revisionscount'] = len(self)
3195 d[b'revisionscount'] = len(self)
3207
3196
3208 if trackedsize:
3197 if trackedsize:
3209 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3198 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3210
3199
3211 if storedsize:
3200 if storedsize:
3212 d[b'storedsize'] = sum(
3201 d[b'storedsize'] = sum(
3213 self.opener.stat(path).st_size for path in self.files()
3202 self.opener.stat(path).st_size for path in self.files()
3214 )
3203 )
3215
3204
3216 return d
3205 return d
3217
3206
3218 def rewrite_sidedata(self, helpers, startrev, endrev):
3207 def rewrite_sidedata(self, helpers, startrev, endrev):
3219 if self.version & 0xFFFF != REVLOGV2:
3208 if self.version & 0xFFFF != REVLOGV2:
3220 return
3209 return
3221 # inline are not yet supported because they suffer from an issue when
3210 # inline are not yet supported because they suffer from an issue when
3222 # rewriting them (since it's not an append-only operation).
3211 # rewriting them (since it's not an append-only operation).
3223 # See issue6485.
3212 # See issue6485.
3224 assert not self._inline
3213 assert not self._inline
3225 if not helpers[1] and not helpers[2]:
3214 if not helpers[1] and not helpers[2]:
3226 # Nothing to generate or remove
3215 # Nothing to generate or remove
3227 return
3216 return
3228
3217
3229 new_entries = []
3218 new_entries = []
3230 # append the new sidedata
3219 # append the new sidedata
3231 with self._datafp(b'a+') as fp:
3220 with self._datafp(b'a+') as fp:
3232 # Maybe this bug still exists, see revlog._writeentry
3221 # Maybe this bug still exists, see revlog._writeentry
3233 fp.seek(0, os.SEEK_END)
3222 fp.seek(0, os.SEEK_END)
3234 current_offset = fp.tell()
3223 current_offset = fp.tell()
3235 for rev in range(startrev, endrev + 1):
3224 for rev in range(startrev, endrev + 1):
3236 entry = self.index[rev]
3225 entry = self.index[rev]
3237 new_sidedata = storageutil.run_sidedata_helpers(
3226 new_sidedata = storageutil.run_sidedata_helpers(
3238 store=self,
3227 store=self,
3239 sidedata_helpers=helpers,
3228 sidedata_helpers=helpers,
3240 sidedata={},
3229 sidedata={},
3241 rev=rev,
3230 rev=rev,
3242 )
3231 )
3243
3232
3244 serialized_sidedata = sidedatautil.serialize_sidedata(
3233 serialized_sidedata = sidedatautil.serialize_sidedata(
3245 new_sidedata
3234 new_sidedata
3246 )
3235 )
3247 if entry[8] != 0 or entry[9] != 0:
3236 if entry[8] != 0 or entry[9] != 0:
3248 # rewriting entries that already have sidedata is not
3237 # rewriting entries that already have sidedata is not
3249 # supported yet, because it introduces garbage data in the
3238 # supported yet, because it introduces garbage data in the
3250 # revlog.
3239 # revlog.
3251 msg = b"Rewriting existing sidedata is not supported yet"
3240 msg = b"Rewriting existing sidedata is not supported yet"
3252 raise error.Abort(msg)
3241 raise error.Abort(msg)
3253 entry = entry[:8]
3242 entry = entry[:8]
3254 entry += (current_offset, len(serialized_sidedata))
3243 entry += (current_offset, len(serialized_sidedata))
3255
3244
3256 fp.write(serialized_sidedata)
3245 fp.write(serialized_sidedata)
3257 new_entries.append(entry)
3246 new_entries.append(entry)
3258 current_offset += len(serialized_sidedata)
3247 current_offset += len(serialized_sidedata)
3259
3248
3260 # rewrite the new index entries
3249 # rewrite the new index entries
3261 with self._indexfp(b'w+') as fp:
3250 with self._indexfp(b'w+') as fp:
3262 fp.seek(startrev * self._io.size)
3251 fp.seek(startrev * self._io.size)
3263 for i, entry in enumerate(new_entries):
3252 for i, entry in enumerate(new_entries):
3264 rev = startrev + i
3253 rev = startrev + i
3265 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3254 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3266 packed = self._io.packentry(entry, self.node, self.version, rev)
3255 packed = self._io.packentry(entry, self.node, self.version, rev)
3267 fp.write(packed)
3256 fp.write(packed)
@@ -1,77 +1,90 b''
1 # revlogdeltas.py - constant used for revlog logic
1 # revlogdeltas.py - constant used for revlog logic
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import struct
12 import struct
13
13
14 from ..interfaces import repository
14 from ..interfaces import repository
15
15
16 ### main revlog header
16 ### main revlog header
17
17
18 ## revlog version
18 ## revlog version
19 REVLOGV0 = 0
19 REVLOGV0 = 0
20 REVLOGV1 = 1
20 REVLOGV1 = 1
21 # Dummy value until file format is finalized.
21 # Dummy value until file format is finalized.
22 REVLOGV2 = 0xDEAD
22 REVLOGV2 = 0xDEAD
23
23
24 ## global revlog header flags
24 ## global revlog header flags
25 # Shared across v1 and v2.
25 # Shared across v1 and v2.
26 FLAG_INLINE_DATA = 1 << 16
26 FLAG_INLINE_DATA = 1 << 16
27 # Only used by v1, implied by v2.
27 # Only used by v1, implied by v2.
28 FLAG_GENERALDELTA = 1 << 17
28 FLAG_GENERALDELTA = 1 << 17
29 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
29 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
30 REVLOG_DEFAULT_FORMAT = REVLOGV1
30 REVLOG_DEFAULT_FORMAT = REVLOGV1
31 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
31 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
32 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
33 REVLOGV2_FLAGS = FLAG_INLINE_DATA
33 REVLOGV2_FLAGS = FLAG_INLINE_DATA
34
34
35 ### individual entry
35 ### individual entry
36
36
37 ## index v0:
37 ## index v0:
38 # 4 bytes: offset
38 # 4 bytes: offset
39 # 4 bytes: compressed length
39 # 4 bytes: compressed length
40 # 4 bytes: base rev
40 # 4 bytes: base rev
41 # 4 bytes: link rev
41 # 4 bytes: link rev
42 # 20 bytes: parent 1 nodeid
42 # 20 bytes: parent 1 nodeid
43 # 20 bytes: parent 2 nodeid
43 # 20 bytes: parent 2 nodeid
44 # 20 bytes: nodeid
44 # 20 bytes: nodeid
45 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
45 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
46
46
47 ## index v1
48 # 6 bytes: offset
49 # 2 bytes: flags
50 # 4 bytes: compressed length
51 # 4 bytes: uncompressed length
52 # 4 bytes: base rev
53 # 4 bytes: link rev
54 # 4 bytes: parent 1 rev
55 # 4 bytes: parent 2 rev
56 # 32 bytes: nodeid
57 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
58 assert INDEX_ENTRY_V1.size == 32 * 2
59
47 # revlog index flags
60 # revlog index flags
48
61
49 # For historical reasons, revlog's internal flags were exposed via the
62 # For historical reasons, revlog's internal flags were exposed via the
50 # wire protocol and are even exposed in parts of the storage APIs.
63 # wire protocol and are even exposed in parts of the storage APIs.
51
64
52 # revision has censor metadata, must be verified
65 # revision has censor metadata, must be verified
53 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
66 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
54 # revision hash does not match data (narrowhg)
67 # revision hash does not match data (narrowhg)
55 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
68 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
56 # revision data is stored externally
69 # revision data is stored externally
57 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
70 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
58 # revision data contains extra metadata not part of the official digest
71 # revision data contains extra metadata not part of the official digest
59 REVIDX_SIDEDATA = repository.REVISION_FLAG_SIDEDATA
72 REVIDX_SIDEDATA = repository.REVISION_FLAG_SIDEDATA
60 # revision changes files in a way that could affect copy tracing.
73 # revision changes files in a way that could affect copy tracing.
61 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
74 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
62 REVIDX_DEFAULT_FLAGS = 0
75 REVIDX_DEFAULT_FLAGS = 0
63 # stable order in which flags need to be processed and their processors applied
76 # stable order in which flags need to be processed and their processors applied
64 REVIDX_FLAGS_ORDER = [
77 REVIDX_FLAGS_ORDER = [
65 REVIDX_ISCENSORED,
78 REVIDX_ISCENSORED,
66 REVIDX_ELLIPSIS,
79 REVIDX_ELLIPSIS,
67 REVIDX_EXTSTORED,
80 REVIDX_EXTSTORED,
68 REVIDX_SIDEDATA,
81 REVIDX_SIDEDATA,
69 REVIDX_HASCOPIESINFO,
82 REVIDX_HASCOPIESINFO,
70 ]
83 ]
71
84
72 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
73 REVIDX_RAWTEXT_CHANGING_FLAGS = (
86 REVIDX_RAWTEXT_CHANGING_FLAGS = (
74 REVIDX_ISCENSORED | REVIDX_EXTSTORED | REVIDX_SIDEDATA
87 REVIDX_ISCENSORED | REVIDX_EXTSTORED | REVIDX_SIDEDATA
75 )
88 )
76
89
77 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
90 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
General Comments 0
You need to be logged in to leave comments. Login now