##// END OF EJS Templates
revlog: deprecate direct `nodemap` access...
marmoute -
r43974:02802fa8 default
parent child Browse files
Show More
@@ -1,229 +1,235 b''
1 # parsers.py - Python implementation of parsers.c
1 # parsers.py - Python implementation of parsers.c
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11 import zlib
11 import zlib
12
12
13 from ..node import nullid, nullrev
13 from ..node import nullid, nullrev
14 from .. import (
14 from .. import (
15 pycompat,
15 pycompat,
16 revlogutils,
16 revlogutils,
17 util,
17 util,
18 )
18 )
19
19
20 stringio = pycompat.bytesio
20 stringio = pycompat.bytesio
21
21
22
22
23 _pack = struct.pack
23 _pack = struct.pack
24 _unpack = struct.unpack
24 _unpack = struct.unpack
25 _compress = zlib.compress
25 _compress = zlib.compress
26 _decompress = zlib.decompress
26 _decompress = zlib.decompress
27
27
28 # Some code below makes tuples directly because it's more convenient. However,
28 # Some code below makes tuples directly because it's more convenient. However,
29 # code outside this module should always use dirstatetuple.
29 # code outside this module should always use dirstatetuple.
30 def dirstatetuple(*x):
30 def dirstatetuple(*x):
31 # x is a tuple
31 # x is a tuple
32 return x
32 return x
33
33
34
34
35 indexformatng = b">Qiiiiii20s12x"
35 indexformatng = b">Qiiiiii20s12x"
36 indexfirst = struct.calcsize(b'Q')
36 indexfirst = struct.calcsize(b'Q')
37 sizeint = struct.calcsize(b'i')
37 sizeint = struct.calcsize(b'i')
38 indexsize = struct.calcsize(indexformatng)
38 indexsize = struct.calcsize(indexformatng)
39
39
40
40
41 def gettype(q):
41 def gettype(q):
42 return int(q & 0xFFFF)
42 return int(q & 0xFFFF)
43
43
44
44
45 def offset_type(offset, type):
45 def offset_type(offset, type):
46 return int(int(offset) << 16 | type)
46 return int(int(offset) << 16 | type)
47
47
48
48
49 class BaseIndexObject(object):
49 class BaseIndexObject(object):
50 @property
51 def nodemap(self):
52 msg = "index.nodemap is deprecated, " "use index.[has_node|rev|get_rev]"
53 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
54 return self._nodemap
55
50 @util.propertycache
56 @util.propertycache
51 def nodemap(self):
57 def _nodemap(self):
52 nodemap = revlogutils.NodeMap({nullid: nullrev})
58 nodemap = revlogutils.NodeMap({nullid: nullrev})
53 for r in range(0, len(self)):
59 for r in range(0, len(self)):
54 n = self[r][7]
60 n = self[r][7]
55 nodemap[n] = r
61 nodemap[n] = r
56 return nodemap
62 return nodemap
57
63
58 def has_node(self, node):
64 def has_node(self, node):
59 """return True if the node exist in the index"""
65 """return True if the node exist in the index"""
60 return node in self.nodemap
66 return node in self._nodemap
61
67
62 def rev(self, node):
68 def rev(self, node):
63 """return a revision for a node
69 """return a revision for a node
64
70
65 If the node is unknown, raise a RevlogError"""
71 If the node is unknown, raise a RevlogError"""
66 return self.nodemap[node]
72 return self._nodemap[node]
67
73
68 def get_rev(self, node):
74 def get_rev(self, node):
69 """return a revision for a node
75 """return a revision for a node
70
76
71 If the node is unknown, return None"""
77 If the node is unknown, return None"""
72 return self.nodemap.get(node)
78 return self._nodemap.get(node)
73
79
74 def _stripnodes(self, start):
80 def _stripnodes(self, start):
75 if 'nodemap' in vars(self):
81 if '_nodemap' in vars(self):
76 for r in range(start, len(self)):
82 for r in range(start, len(self)):
77 n = self[r][7]
83 n = self[r][7]
78 del self.nodemap[n]
84 del self._nodemap[n]
79
85
80 def clearcaches(self):
86 def clearcaches(self):
81 self.__dict__.pop('nodemap', None)
87 self.__dict__.pop('_nodemap', None)
82
88
83 def __len__(self):
89 def __len__(self):
84 return self._lgt + len(self._extra)
90 return self._lgt + len(self._extra)
85
91
86 def append(self, tup):
92 def append(self, tup):
87 if 'nodemap' in vars(self):
93 if '_nodemap' in vars(self):
88 self.nodemap[tup[7]] = len(self)
94 self._nodemap[tup[7]] = len(self)
89 self._extra.append(tup)
95 self._extra.append(tup)
90
96
91 def _check_index(self, i):
97 def _check_index(self, i):
92 if not isinstance(i, int):
98 if not isinstance(i, int):
93 raise TypeError(b"expecting int indexes")
99 raise TypeError(b"expecting int indexes")
94 if i < 0 or i >= len(self):
100 if i < 0 or i >= len(self):
95 raise IndexError
101 raise IndexError
96
102
97 def __getitem__(self, i):
103 def __getitem__(self, i):
98 if i == -1:
104 if i == -1:
99 return (0, 0, 0, -1, -1, -1, -1, nullid)
105 return (0, 0, 0, -1, -1, -1, -1, nullid)
100 self._check_index(i)
106 self._check_index(i)
101 if i >= self._lgt:
107 if i >= self._lgt:
102 return self._extra[i - self._lgt]
108 return self._extra[i - self._lgt]
103 index = self._calculate_index(i)
109 index = self._calculate_index(i)
104 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
110 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
105 if i == 0:
111 if i == 0:
106 e = list(r)
112 e = list(r)
107 type = gettype(e[0])
113 type = gettype(e[0])
108 e[0] = offset_type(0, type)
114 e[0] = offset_type(0, type)
109 return tuple(e)
115 return tuple(e)
110 return r
116 return r
111
117
112
118
113 class IndexObject(BaseIndexObject):
119 class IndexObject(BaseIndexObject):
114 def __init__(self, data):
120 def __init__(self, data):
115 assert len(data) % indexsize == 0
121 assert len(data) % indexsize == 0
116 self._data = data
122 self._data = data
117 self._lgt = len(data) // indexsize
123 self._lgt = len(data) // indexsize
118 self._extra = []
124 self._extra = []
119
125
120 def _calculate_index(self, i):
126 def _calculate_index(self, i):
121 return i * indexsize
127 return i * indexsize
122
128
123 def __delitem__(self, i):
129 def __delitem__(self, i):
124 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
130 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
125 raise ValueError(b"deleting slices only supports a:-1 with step 1")
131 raise ValueError(b"deleting slices only supports a:-1 with step 1")
126 i = i.start
132 i = i.start
127 self._check_index(i)
133 self._check_index(i)
128 self._stripnodes(i)
134 self._stripnodes(i)
129 if i < self._lgt:
135 if i < self._lgt:
130 self._data = self._data[: i * indexsize]
136 self._data = self._data[: i * indexsize]
131 self._lgt = i
137 self._lgt = i
132 self._extra = []
138 self._extra = []
133 else:
139 else:
134 self._extra = self._extra[: i - self._lgt]
140 self._extra = self._extra[: i - self._lgt]
135
141
136
142
137 class InlinedIndexObject(BaseIndexObject):
143 class InlinedIndexObject(BaseIndexObject):
138 def __init__(self, data, inline=0):
144 def __init__(self, data, inline=0):
139 self._data = data
145 self._data = data
140 self._lgt = self._inline_scan(None)
146 self._lgt = self._inline_scan(None)
141 self._inline_scan(self._lgt)
147 self._inline_scan(self._lgt)
142 self._extra = []
148 self._extra = []
143
149
144 def _inline_scan(self, lgt):
150 def _inline_scan(self, lgt):
145 off = 0
151 off = 0
146 if lgt is not None:
152 if lgt is not None:
147 self._offsets = [0] * lgt
153 self._offsets = [0] * lgt
148 count = 0
154 count = 0
149 while off <= len(self._data) - indexsize:
155 while off <= len(self._data) - indexsize:
150 (s,) = struct.unpack(
156 (s,) = struct.unpack(
151 b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
157 b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
152 )
158 )
153 if lgt is not None:
159 if lgt is not None:
154 self._offsets[count] = off
160 self._offsets[count] = off
155 count += 1
161 count += 1
156 off += indexsize + s
162 off += indexsize + s
157 if off != len(self._data):
163 if off != len(self._data):
158 raise ValueError(b"corrupted data")
164 raise ValueError(b"corrupted data")
159 return count
165 return count
160
166
161 def __delitem__(self, i):
167 def __delitem__(self, i):
162 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
168 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
163 raise ValueError(b"deleting slices only supports a:-1 with step 1")
169 raise ValueError(b"deleting slices only supports a:-1 with step 1")
164 i = i.start
170 i = i.start
165 self._check_index(i)
171 self._check_index(i)
166 self._stripnodes(i)
172 self._stripnodes(i)
167 if i < self._lgt:
173 if i < self._lgt:
168 self._offsets = self._offsets[:i]
174 self._offsets = self._offsets[:i]
169 self._lgt = i
175 self._lgt = i
170 self._extra = []
176 self._extra = []
171 else:
177 else:
172 self._extra = self._extra[: i - self._lgt]
178 self._extra = self._extra[: i - self._lgt]
173
179
174 def _calculate_index(self, i):
180 def _calculate_index(self, i):
175 return self._offsets[i]
181 return self._offsets[i]
176
182
177
183
178 def parse_index2(data, inline):
184 def parse_index2(data, inline):
179 if not inline:
185 if not inline:
180 return IndexObject(data), None
186 return IndexObject(data), None
181 return InlinedIndexObject(data, inline), (0, data)
187 return InlinedIndexObject(data, inline), (0, data)
182
188
183
189
184 def parse_dirstate(dmap, copymap, st):
190 def parse_dirstate(dmap, copymap, st):
185 parents = [st[:20], st[20:40]]
191 parents = [st[:20], st[20:40]]
186 # dereference fields so they will be local in loop
192 # dereference fields so they will be local in loop
187 format = b">cllll"
193 format = b">cllll"
188 e_size = struct.calcsize(format)
194 e_size = struct.calcsize(format)
189 pos1 = 40
195 pos1 = 40
190 l = len(st)
196 l = len(st)
191
197
192 # the inner loop
198 # the inner loop
193 while pos1 < l:
199 while pos1 < l:
194 pos2 = pos1 + e_size
200 pos2 = pos1 + e_size
195 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
201 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
196 pos1 = pos2 + e[4]
202 pos1 = pos2 + e[4]
197 f = st[pos2:pos1]
203 f = st[pos2:pos1]
198 if b'\0' in f:
204 if b'\0' in f:
199 f, c = f.split(b'\0')
205 f, c = f.split(b'\0')
200 copymap[f] = c
206 copymap[f] = c
201 dmap[f] = e[:4]
207 dmap[f] = e[:4]
202 return parents
208 return parents
203
209
204
210
205 def pack_dirstate(dmap, copymap, pl, now):
211 def pack_dirstate(dmap, copymap, pl, now):
206 now = int(now)
212 now = int(now)
207 cs = stringio()
213 cs = stringio()
208 write = cs.write
214 write = cs.write
209 write(b"".join(pl))
215 write(b"".join(pl))
210 for f, e in pycompat.iteritems(dmap):
216 for f, e in pycompat.iteritems(dmap):
211 if e[0] == b'n' and e[3] == now:
217 if e[0] == b'n' and e[3] == now:
212 # The file was last modified "simultaneously" with the current
218 # The file was last modified "simultaneously" with the current
213 # write to dirstate (i.e. within the same second for file-
219 # write to dirstate (i.e. within the same second for file-
214 # systems with a granularity of 1 sec). This commonly happens
220 # systems with a granularity of 1 sec). This commonly happens
215 # for at least a couple of files on 'update'.
221 # for at least a couple of files on 'update'.
216 # The user could change the file without changing its size
222 # The user could change the file without changing its size
217 # within the same second. Invalidate the file's mtime in
223 # within the same second. Invalidate the file's mtime in
218 # dirstate, forcing future 'status' calls to compare the
224 # dirstate, forcing future 'status' calls to compare the
219 # contents of the file if the size is the same. This prevents
225 # contents of the file if the size is the same. This prevents
220 # mistakenly treating such files as clean.
226 # mistakenly treating such files as clean.
221 e = dirstatetuple(e[0], e[1], e[2], -1)
227 e = dirstatetuple(e[0], e[1], e[2], -1)
222 dmap[f] = e
228 dmap[f] = e
223
229
224 if f in copymap:
230 if f in copymap:
225 f = b"%s\0%s" % (f, copymap[f])
231 f = b"%s\0%s" % (f, copymap[f])
226 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
232 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
227 write(e)
233 write(e)
228 write(f)
234 write(f)
229 return cs.getvalue()
235 return cs.getvalue()
@@ -1,2972 +1,2982 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_SIDEDATA,
58 REVIDX_SIDEDATA,
59 )
59 )
60 from .thirdparty import attr
60 from .thirdparty import attr
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 revlogutils,
68 revlogutils,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .interfaces import (
72 from .interfaces import (
73 repository,
73 repository,
74 util as interfaceutil,
74 util as interfaceutil,
75 )
75 )
76 from .revlogutils import (
76 from .revlogutils import (
77 deltas as deltautil,
77 deltas as deltautil,
78 flagutil,
78 flagutil,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88 REVLOGV0
88 REVLOGV0
89 REVLOGV1
89 REVLOGV1
90 REVLOGV2
90 REVLOGV2
91 FLAG_INLINE_DATA
91 FLAG_INLINE_DATA
92 FLAG_GENERALDELTA
92 FLAG_GENERALDELTA
93 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_VERSION
95 REVLOG_DEFAULT_VERSION
96 REVLOGV1_FLAGS
96 REVLOGV1_FLAGS
97 REVLOGV2_FLAGS
97 REVLOGV2_FLAGS
98 REVIDX_ISCENSORED
98 REVIDX_ISCENSORED
99 REVIDX_ELLIPSIS
99 REVIDX_ELLIPSIS
100 REVIDX_SIDEDATA
100 REVIDX_SIDEDATA
101 REVIDX_EXTSTORED
101 REVIDX_EXTSTORED
102 REVIDX_DEFAULT_FLAGS
102 REVIDX_DEFAULT_FLAGS
103 REVIDX_FLAGS_ORDER
103 REVIDX_FLAGS_ORDER
104 REVIDX_RAWTEXT_CHANGING_FLAGS
104 REVIDX_RAWTEXT_CHANGING_FLAGS
105
105
106 parsers = policy.importmod('parsers')
106 parsers = policy.importmod('parsers')
107 rustancestor = policy.importrust('ancestor')
107 rustancestor = policy.importrust('ancestor')
108 rustdagop = policy.importrust('dagop')
108 rustdagop = policy.importrust('dagop')
109
109
110 # Aliased for performance.
110 # Aliased for performance.
111 _zlibdecompress = zlib.decompress
111 _zlibdecompress = zlib.decompress
112
112
113 # max size of revlog with inline data
113 # max size of revlog with inline data
114 _maxinline = 131072
114 _maxinline = 131072
115 _chunksize = 1048576
115 _chunksize = 1048576
116
116
117 # Flag processors for REVIDX_ELLIPSIS.
117 # Flag processors for REVIDX_ELLIPSIS.
118 def ellipsisreadprocessor(rl, text):
118 def ellipsisreadprocessor(rl, text):
119 return text, False, {}
119 return text, False, {}
120
120
121
121
122 def ellipsiswriteprocessor(rl, text, sidedata):
122 def ellipsiswriteprocessor(rl, text, sidedata):
123 return text, False
123 return text, False
124
124
125
125
126 def ellipsisrawprocessor(rl, text):
126 def ellipsisrawprocessor(rl, text):
127 return False
127 return False
128
128
129
129
130 ellipsisprocessor = (
130 ellipsisprocessor = (
131 ellipsisreadprocessor,
131 ellipsisreadprocessor,
132 ellipsiswriteprocessor,
132 ellipsiswriteprocessor,
133 ellipsisrawprocessor,
133 ellipsisrawprocessor,
134 )
134 )
135
135
136
136
137 def getoffset(q):
137 def getoffset(q):
138 return int(q >> 16)
138 return int(q >> 16)
139
139
140
140
141 def gettype(q):
141 def gettype(q):
142 return int(q & 0xFFFF)
142 return int(q & 0xFFFF)
143
143
144
144
145 def offset_type(offset, type):
145 def offset_type(offset, type):
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 raise ValueError(b'unknown revlog index flags')
147 raise ValueError(b'unknown revlog index flags')
148 return int(int(offset) << 16 | type)
148 return int(int(offset) << 16 | type)
149
149
150
150
151 @attr.s(slots=True, frozen=True)
151 @attr.s(slots=True, frozen=True)
152 class _revisioninfo(object):
152 class _revisioninfo(object):
153 """Information about a revision that allows building its fulltext
153 """Information about a revision that allows building its fulltext
154 node: expected hash of the revision
154 node: expected hash of the revision
155 p1, p2: parent revs of the revision
155 p1, p2: parent revs of the revision
156 btext: built text cache consisting of a one-element list
156 btext: built text cache consisting of a one-element list
157 cachedelta: (baserev, uncompressed_delta) or None
157 cachedelta: (baserev, uncompressed_delta) or None
158 flags: flags associated to the revision storage
158 flags: flags associated to the revision storage
159
159
160 One of btext[0] or cachedelta must be set.
160 One of btext[0] or cachedelta must be set.
161 """
161 """
162
162
163 node = attr.ib()
163 node = attr.ib()
164 p1 = attr.ib()
164 p1 = attr.ib()
165 p2 = attr.ib()
165 p2 = attr.ib()
166 btext = attr.ib()
166 btext = attr.ib()
167 textlen = attr.ib()
167 textlen = attr.ib()
168 cachedelta = attr.ib()
168 cachedelta = attr.ib()
169 flags = attr.ib()
169 flags = attr.ib()
170
170
171
171
172 @interfaceutil.implementer(repository.irevisiondelta)
172 @interfaceutil.implementer(repository.irevisiondelta)
173 @attr.s(slots=True)
173 @attr.s(slots=True)
174 class revlogrevisiondelta(object):
174 class revlogrevisiondelta(object):
175 node = attr.ib()
175 node = attr.ib()
176 p1node = attr.ib()
176 p1node = attr.ib()
177 p2node = attr.ib()
177 p2node = attr.ib()
178 basenode = attr.ib()
178 basenode = attr.ib()
179 flags = attr.ib()
179 flags = attr.ib()
180 baserevisionsize = attr.ib()
180 baserevisionsize = attr.ib()
181 revision = attr.ib()
181 revision = attr.ib()
182 delta = attr.ib()
182 delta = attr.ib()
183 linknode = attr.ib(default=None)
183 linknode = attr.ib(default=None)
184
184
185
185
186 @interfaceutil.implementer(repository.iverifyproblem)
186 @interfaceutil.implementer(repository.iverifyproblem)
187 @attr.s(frozen=True)
187 @attr.s(frozen=True)
188 class revlogproblem(object):
188 class revlogproblem(object):
189 warning = attr.ib(default=None)
189 warning = attr.ib(default=None)
190 error = attr.ib(default=None)
190 error = attr.ib(default=None)
191 node = attr.ib(default=None)
191 node = attr.ib(default=None)
192
192
193
193
194 # index v0:
194 # index v0:
195 # 4 bytes: offset
195 # 4 bytes: offset
196 # 4 bytes: compressed length
196 # 4 bytes: compressed length
197 # 4 bytes: base rev
197 # 4 bytes: base rev
198 # 4 bytes: link rev
198 # 4 bytes: link rev
199 # 20 bytes: parent 1 nodeid
199 # 20 bytes: parent 1 nodeid
200 # 20 bytes: parent 2 nodeid
200 # 20 bytes: parent 2 nodeid
201 # 20 bytes: nodeid
201 # 20 bytes: nodeid
202 indexformatv0 = struct.Struct(b">4l20s20s20s")
202 indexformatv0 = struct.Struct(b">4l20s20s20s")
203 indexformatv0_pack = indexformatv0.pack
203 indexformatv0_pack = indexformatv0.pack
204 indexformatv0_unpack = indexformatv0.unpack
204 indexformatv0_unpack = indexformatv0.unpack
205
205
206
206
207 class revlogoldindex(list):
207 class revlogoldindex(list):
208 @property
209 def nodemap(self):
210 msg = "index.nodemap is deprecated, " "use index.[has_node|rev|get_rev]"
211 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
212 return self._nodemap
213
208 @util.propertycache
214 @util.propertycache
209 def nodemap(self):
215 def _nodemap(self):
210 nodemap = revlogutils.NodeMap({nullid: nullrev})
216 nodemap = revlogutils.NodeMap({nullid: nullrev})
211 for r in range(0, len(self)):
217 for r in range(0, len(self)):
212 n = self[r][7]
218 n = self[r][7]
213 nodemap[n] = r
219 nodemap[n] = r
214 return nodemap
220 return nodemap
215
221
216 def has_node(self, node):
222 def has_node(self, node):
217 """return True if the node exist in the index"""
223 """return True if the node exist in the index"""
218 return node in self.nodemap
224 return node in self._nodemap
219
225
220 def rev(self, node):
226 def rev(self, node):
221 """return a revision for a node
227 """return a revision for a node
222
228
223 If the node is unknown, raise a RevlogError"""
229 If the node is unknown, raise a RevlogError"""
224 return self.nodemap[node]
230 return self._nodemap[node]
225
231
226 def get_rev(self, node):
232 def get_rev(self, node):
227 """return a revision for a node
233 """return a revision for a node
228
234
229 If the node is unknown, return None"""
235 If the node is unknown, return None"""
230 return self.nodemap.get(node)
236 return self._nodemap.get(node)
231
237
232 def append(self, tup):
238 def append(self, tup):
233 self.nodemap[tup[7]] = len(self)
239 self._nodemap[tup[7]] = len(self)
234 super(revlogoldindex, self).append(tup)
240 super(revlogoldindex, self).append(tup)
235
241
236 def __delitem__(self, i):
242 def __delitem__(self, i):
237 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
243 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
238 raise ValueError(b"deleting slices only supports a:-1 with step 1")
244 raise ValueError(b"deleting slices only supports a:-1 with step 1")
239 for r in pycompat.xrange(i.start, len(self)):
245 for r in pycompat.xrange(i.start, len(self)):
240 del self.nodemap[self[r][7]]
246 del self._nodemap[self[r][7]]
241 super(revlogoldindex, self).__delitem__(i)
247 super(revlogoldindex, self).__delitem__(i)
242
248
243 def clearcaches(self):
249 def clearcaches(self):
244 self.__dict__.pop('nodemap', None)
250 self.__dict__.pop('_nodemap', None)
245
251
246 def __getitem__(self, i):
252 def __getitem__(self, i):
247 if i == -1:
253 if i == -1:
248 return (0, 0, 0, -1, -1, -1, -1, nullid)
254 return (0, 0, 0, -1, -1, -1, -1, nullid)
249 return list.__getitem__(self, i)
255 return list.__getitem__(self, i)
250
256
251
257
252 class revlogoldio(object):
258 class revlogoldio(object):
253 def __init__(self):
259 def __init__(self):
254 self.size = indexformatv0.size
260 self.size = indexformatv0.size
255
261
256 def parseindex(self, data, inline):
262 def parseindex(self, data, inline):
257 s = self.size
263 s = self.size
258 index = []
264 index = []
259 nodemap = revlogutils.NodeMap({nullid: nullrev})
265 nodemap = revlogutils.NodeMap({nullid: nullrev})
260 n = off = 0
266 n = off = 0
261 l = len(data)
267 l = len(data)
262 while off + s <= l:
268 while off + s <= l:
263 cur = data[off : off + s]
269 cur = data[off : off + s]
264 off += s
270 off += s
265 e = indexformatv0_unpack(cur)
271 e = indexformatv0_unpack(cur)
266 # transform to revlogv1 format
272 # transform to revlogv1 format
267 e2 = (
273 e2 = (
268 offset_type(e[0], 0),
274 offset_type(e[0], 0),
269 e[1],
275 e[1],
270 -1,
276 -1,
271 e[2],
277 e[2],
272 e[3],
278 e[3],
273 nodemap.get(e[4], nullrev),
279 nodemap.get(e[4], nullrev),
274 nodemap.get(e[5], nullrev),
280 nodemap.get(e[5], nullrev),
275 e[6],
281 e[6],
276 )
282 )
277 index.append(e2)
283 index.append(e2)
278 nodemap[e[6]] = n
284 nodemap[e[6]] = n
279 n += 1
285 n += 1
280
286
281 index = revlogoldindex(index)
287 index = revlogoldindex(index)
282 return index, None
288 return index, None
283
289
284 def packentry(self, entry, node, version, rev):
290 def packentry(self, entry, node, version, rev):
285 if gettype(entry[0]):
291 if gettype(entry[0]):
286 raise error.RevlogError(
292 raise error.RevlogError(
287 _(b'index entry flags need revlog version 1')
293 _(b'index entry flags need revlog version 1')
288 )
294 )
289 e2 = (
295 e2 = (
290 getoffset(entry[0]),
296 getoffset(entry[0]),
291 entry[1],
297 entry[1],
292 entry[3],
298 entry[3],
293 entry[4],
299 entry[4],
294 node(entry[5]),
300 node(entry[5]),
295 node(entry[6]),
301 node(entry[6]),
296 entry[7],
302 entry[7],
297 )
303 )
298 return indexformatv0_pack(*e2)
304 return indexformatv0_pack(*e2)
299
305
300
306
301 # index ng:
307 # index ng:
302 # 6 bytes: offset
308 # 6 bytes: offset
303 # 2 bytes: flags
309 # 2 bytes: flags
304 # 4 bytes: compressed length
310 # 4 bytes: compressed length
305 # 4 bytes: uncompressed length
311 # 4 bytes: uncompressed length
306 # 4 bytes: base rev
312 # 4 bytes: base rev
307 # 4 bytes: link rev
313 # 4 bytes: link rev
308 # 4 bytes: parent 1 rev
314 # 4 bytes: parent 1 rev
309 # 4 bytes: parent 2 rev
315 # 4 bytes: parent 2 rev
310 # 32 bytes: nodeid
316 # 32 bytes: nodeid
311 indexformatng = struct.Struct(b">Qiiiiii20s12x")
317 indexformatng = struct.Struct(b">Qiiiiii20s12x")
312 indexformatng_pack = indexformatng.pack
318 indexformatng_pack = indexformatng.pack
313 versionformat = struct.Struct(b">I")
319 versionformat = struct.Struct(b">I")
314 versionformat_pack = versionformat.pack
320 versionformat_pack = versionformat.pack
315 versionformat_unpack = versionformat.unpack
321 versionformat_unpack = versionformat.unpack
316
322
317 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
323 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
318 # signed integer)
324 # signed integer)
319 _maxentrysize = 0x7FFFFFFF
325 _maxentrysize = 0x7FFFFFFF
320
326
321
327
322 class revlogio(object):
328 class revlogio(object):
323 def __init__(self):
329 def __init__(self):
324 self.size = indexformatng.size
330 self.size = indexformatng.size
325
331
326 def parseindex(self, data, inline):
332 def parseindex(self, data, inline):
327 # call the C implementation to parse the index data
333 # call the C implementation to parse the index data
328 index, cache = parsers.parse_index2(data, inline)
334 index, cache = parsers.parse_index2(data, inline)
329 return index, cache
335 return index, cache
330
336
331 def packentry(self, entry, node, version, rev):
337 def packentry(self, entry, node, version, rev):
332 p = indexformatng_pack(*entry)
338 p = indexformatng_pack(*entry)
333 if rev == 0:
339 if rev == 0:
334 p = versionformat_pack(version) + p[4:]
340 p = versionformat_pack(version) + p[4:]
335 return p
341 return p
336
342
337
343
338 class revlog(object):
344 class revlog(object):
339 """
345 """
340 the underlying revision storage object
346 the underlying revision storage object
341
347
342 A revlog consists of two parts, an index and the revision data.
348 A revlog consists of two parts, an index and the revision data.
343
349
344 The index is a file with a fixed record size containing
350 The index is a file with a fixed record size containing
345 information on each revision, including its nodeid (hash), the
351 information on each revision, including its nodeid (hash), the
346 nodeids of its parents, the position and offset of its data within
352 nodeids of its parents, the position and offset of its data within
347 the data file, and the revision it's based on. Finally, each entry
353 the data file, and the revision it's based on. Finally, each entry
348 contains a linkrev entry that can serve as a pointer to external
354 contains a linkrev entry that can serve as a pointer to external
349 data.
355 data.
350
356
351 The revision data itself is a linear collection of data chunks.
357 The revision data itself is a linear collection of data chunks.
352 Each chunk represents a revision and is usually represented as a
358 Each chunk represents a revision and is usually represented as a
353 delta against the previous chunk. To bound lookup time, runs of
359 delta against the previous chunk. To bound lookup time, runs of
354 deltas are limited to about 2 times the length of the original
360 deltas are limited to about 2 times the length of the original
355 version data. This makes retrieval of a version proportional to
361 version data. This makes retrieval of a version proportional to
356 its size, or O(1) relative to the number of revisions.
362 its size, or O(1) relative to the number of revisions.
357
363
358 Both pieces of the revlog are written to in an append-only
364 Both pieces of the revlog are written to in an append-only
359 fashion, which means we never need to rewrite a file to insert or
365 fashion, which means we never need to rewrite a file to insert or
360 remove data, and can use some simple techniques to avoid the need
366 remove data, and can use some simple techniques to avoid the need
361 for locking while reading.
367 for locking while reading.
362
368
363 If checkambig, indexfile is opened with checkambig=True at
369 If checkambig, indexfile is opened with checkambig=True at
364 writing, to avoid file stat ambiguity.
370 writing, to avoid file stat ambiguity.
365
371
366 If mmaplargeindex is True, and an mmapindexthreshold is set, the
372 If mmaplargeindex is True, and an mmapindexthreshold is set, the
367 index will be mmapped rather than read if it is larger than the
373 index will be mmapped rather than read if it is larger than the
368 configured threshold.
374 configured threshold.
369
375
370 If censorable is True, the revlog can have censored revisions.
376 If censorable is True, the revlog can have censored revisions.
371
377
372 If `upperboundcomp` is not None, this is the expected maximal gain from
378 If `upperboundcomp` is not None, this is the expected maximal gain from
373 compression for the data content.
379 compression for the data content.
374 """
380 """
375
381
376 _flagserrorclass = error.RevlogError
382 _flagserrorclass = error.RevlogError
377
383
378 def __init__(
384 def __init__(
379 self,
385 self,
380 opener,
386 opener,
381 indexfile,
387 indexfile,
382 datafile=None,
388 datafile=None,
383 checkambig=False,
389 checkambig=False,
384 mmaplargeindex=False,
390 mmaplargeindex=False,
385 censorable=False,
391 censorable=False,
386 upperboundcomp=None,
392 upperboundcomp=None,
387 ):
393 ):
388 """
394 """
389 create a revlog object
395 create a revlog object
390
396
391 opener is a function that abstracts the file opening operation
397 opener is a function that abstracts the file opening operation
392 and can be used to implement COW semantics or the like.
398 and can be used to implement COW semantics or the like.
393
399
394 """
400 """
395 self.upperboundcomp = upperboundcomp
401 self.upperboundcomp = upperboundcomp
396 self.indexfile = indexfile
402 self.indexfile = indexfile
397 self.datafile = datafile or (indexfile[:-2] + b".d")
403 self.datafile = datafile or (indexfile[:-2] + b".d")
398 self.opener = opener
404 self.opener = opener
399 # When True, indexfile is opened with checkambig=True at writing, to
405 # When True, indexfile is opened with checkambig=True at writing, to
400 # avoid file stat ambiguity.
406 # avoid file stat ambiguity.
401 self._checkambig = checkambig
407 self._checkambig = checkambig
402 self._mmaplargeindex = mmaplargeindex
408 self._mmaplargeindex = mmaplargeindex
403 self._censorable = censorable
409 self._censorable = censorable
404 # 3-tuple of (node, rev, text) for a raw revision.
410 # 3-tuple of (node, rev, text) for a raw revision.
405 self._revisioncache = None
411 self._revisioncache = None
406 # Maps rev to chain base rev.
412 # Maps rev to chain base rev.
407 self._chainbasecache = util.lrucachedict(100)
413 self._chainbasecache = util.lrucachedict(100)
408 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
414 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
409 self._chunkcache = (0, b'')
415 self._chunkcache = (0, b'')
410 # How much data to read and cache into the raw revlog data cache.
416 # How much data to read and cache into the raw revlog data cache.
411 self._chunkcachesize = 65536
417 self._chunkcachesize = 65536
412 self._maxchainlen = None
418 self._maxchainlen = None
413 self._deltabothparents = True
419 self._deltabothparents = True
414 self.index = None
420 self.index = None
415 # Mapping of partial identifiers to full nodes.
421 # Mapping of partial identifiers to full nodes.
416 self._pcache = {}
422 self._pcache = {}
417 # Mapping of revision integer to full node.
423 # Mapping of revision integer to full node.
418 self._nodepos = None
424 self._nodepos = None
419 self._compengine = b'zlib'
425 self._compengine = b'zlib'
420 self._compengineopts = {}
426 self._compengineopts = {}
421 self._maxdeltachainspan = -1
427 self._maxdeltachainspan = -1
422 self._withsparseread = False
428 self._withsparseread = False
423 self._sparserevlog = False
429 self._sparserevlog = False
424 self._srdensitythreshold = 0.50
430 self._srdensitythreshold = 0.50
425 self._srmingapsize = 262144
431 self._srmingapsize = 262144
426
432
427 # Make copy of flag processors so each revlog instance can support
433 # Make copy of flag processors so each revlog instance can support
428 # custom flags.
434 # custom flags.
429 self._flagprocessors = dict(flagutil.flagprocessors)
435 self._flagprocessors = dict(flagutil.flagprocessors)
430
436
431 # 2-tuple of file handles being used for active writing.
437 # 2-tuple of file handles being used for active writing.
432 self._writinghandles = None
438 self._writinghandles = None
433
439
434 self._loadindex()
440 self._loadindex()
435
441
436 def _loadindex(self):
442 def _loadindex(self):
437 mmapindexthreshold = None
443 mmapindexthreshold = None
438 opts = self.opener.options
444 opts = self.opener.options
439
445
440 if b'revlogv2' in opts:
446 if b'revlogv2' in opts:
441 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
447 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
442 elif b'revlogv1' in opts:
448 elif b'revlogv1' in opts:
443 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
449 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
444 if b'generaldelta' in opts:
450 if b'generaldelta' in opts:
445 newversionflags |= FLAG_GENERALDELTA
451 newversionflags |= FLAG_GENERALDELTA
446 elif b'revlogv0' in self.opener.options:
452 elif b'revlogv0' in self.opener.options:
447 newversionflags = REVLOGV0
453 newversionflags = REVLOGV0
448 else:
454 else:
449 newversionflags = REVLOG_DEFAULT_VERSION
455 newversionflags = REVLOG_DEFAULT_VERSION
450
456
451 if b'chunkcachesize' in opts:
457 if b'chunkcachesize' in opts:
452 self._chunkcachesize = opts[b'chunkcachesize']
458 self._chunkcachesize = opts[b'chunkcachesize']
453 if b'maxchainlen' in opts:
459 if b'maxchainlen' in opts:
454 self._maxchainlen = opts[b'maxchainlen']
460 self._maxchainlen = opts[b'maxchainlen']
455 if b'deltabothparents' in opts:
461 if b'deltabothparents' in opts:
456 self._deltabothparents = opts[b'deltabothparents']
462 self._deltabothparents = opts[b'deltabothparents']
457 self._lazydelta = bool(opts.get(b'lazydelta', True))
463 self._lazydelta = bool(opts.get(b'lazydelta', True))
458 self._lazydeltabase = False
464 self._lazydeltabase = False
459 if self._lazydelta:
465 if self._lazydelta:
460 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
466 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
461 if b'compengine' in opts:
467 if b'compengine' in opts:
462 self._compengine = opts[b'compengine']
468 self._compengine = opts[b'compengine']
463 if b'zlib.level' in opts:
469 if b'zlib.level' in opts:
464 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
470 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
465 if b'zstd.level' in opts:
471 if b'zstd.level' in opts:
466 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
472 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
467 if b'maxdeltachainspan' in opts:
473 if b'maxdeltachainspan' in opts:
468 self._maxdeltachainspan = opts[b'maxdeltachainspan']
474 self._maxdeltachainspan = opts[b'maxdeltachainspan']
469 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
475 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
470 mmapindexthreshold = opts[b'mmapindexthreshold']
476 mmapindexthreshold = opts[b'mmapindexthreshold']
471 self.hassidedata = bool(opts.get(b'side-data', False))
477 self.hassidedata = bool(opts.get(b'side-data', False))
472 if self.hassidedata:
478 if self.hassidedata:
473 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
479 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
474 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
480 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
475 withsparseread = bool(opts.get(b'with-sparse-read', False))
481 withsparseread = bool(opts.get(b'with-sparse-read', False))
476 # sparse-revlog forces sparse-read
482 # sparse-revlog forces sparse-read
477 self._withsparseread = self._sparserevlog or withsparseread
483 self._withsparseread = self._sparserevlog or withsparseread
478 if b'sparse-read-density-threshold' in opts:
484 if b'sparse-read-density-threshold' in opts:
479 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
485 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
480 if b'sparse-read-min-gap-size' in opts:
486 if b'sparse-read-min-gap-size' in opts:
481 self._srmingapsize = opts[b'sparse-read-min-gap-size']
487 self._srmingapsize = opts[b'sparse-read-min-gap-size']
482 if opts.get(b'enableellipsis'):
488 if opts.get(b'enableellipsis'):
483 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
489 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
484
490
485 # revlog v0 doesn't have flag processors
491 # revlog v0 doesn't have flag processors
486 for flag, processor in pycompat.iteritems(
492 for flag, processor in pycompat.iteritems(
487 opts.get(b'flagprocessors', {})
493 opts.get(b'flagprocessors', {})
488 ):
494 ):
489 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
495 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
490
496
491 if self._chunkcachesize <= 0:
497 if self._chunkcachesize <= 0:
492 raise error.RevlogError(
498 raise error.RevlogError(
493 _(b'revlog chunk cache size %r is not greater than 0')
499 _(b'revlog chunk cache size %r is not greater than 0')
494 % self._chunkcachesize
500 % self._chunkcachesize
495 )
501 )
496 elif self._chunkcachesize & (self._chunkcachesize - 1):
502 elif self._chunkcachesize & (self._chunkcachesize - 1):
497 raise error.RevlogError(
503 raise error.RevlogError(
498 _(b'revlog chunk cache size %r is not a power of 2')
504 _(b'revlog chunk cache size %r is not a power of 2')
499 % self._chunkcachesize
505 % self._chunkcachesize
500 )
506 )
501
507
502 indexdata = b''
508 indexdata = b''
503 self._initempty = True
509 self._initempty = True
504 try:
510 try:
505 with self._indexfp() as f:
511 with self._indexfp() as f:
506 if (
512 if (
507 mmapindexthreshold is not None
513 mmapindexthreshold is not None
508 and self.opener.fstat(f).st_size >= mmapindexthreshold
514 and self.opener.fstat(f).st_size >= mmapindexthreshold
509 ):
515 ):
510 # TODO: should .close() to release resources without
516 # TODO: should .close() to release resources without
511 # relying on Python GC
517 # relying on Python GC
512 indexdata = util.buffer(util.mmapread(f))
518 indexdata = util.buffer(util.mmapread(f))
513 else:
519 else:
514 indexdata = f.read()
520 indexdata = f.read()
515 if len(indexdata) > 0:
521 if len(indexdata) > 0:
516 versionflags = versionformat_unpack(indexdata[:4])[0]
522 versionflags = versionformat_unpack(indexdata[:4])[0]
517 self._initempty = False
523 self._initempty = False
518 else:
524 else:
519 versionflags = newversionflags
525 versionflags = newversionflags
520 except IOError as inst:
526 except IOError as inst:
521 if inst.errno != errno.ENOENT:
527 if inst.errno != errno.ENOENT:
522 raise
528 raise
523
529
524 versionflags = newversionflags
530 versionflags = newversionflags
525
531
526 self.version = versionflags
532 self.version = versionflags
527
533
528 flags = versionflags & ~0xFFFF
534 flags = versionflags & ~0xFFFF
529 fmt = versionflags & 0xFFFF
535 fmt = versionflags & 0xFFFF
530
536
531 if fmt == REVLOGV0:
537 if fmt == REVLOGV0:
532 if flags:
538 if flags:
533 raise error.RevlogError(
539 raise error.RevlogError(
534 _(b'unknown flags (%#04x) in version %d revlog %s')
540 _(b'unknown flags (%#04x) in version %d revlog %s')
535 % (flags >> 16, fmt, self.indexfile)
541 % (flags >> 16, fmt, self.indexfile)
536 )
542 )
537
543
538 self._inline = False
544 self._inline = False
539 self._generaldelta = False
545 self._generaldelta = False
540
546
541 elif fmt == REVLOGV1:
547 elif fmt == REVLOGV1:
542 if flags & ~REVLOGV1_FLAGS:
548 if flags & ~REVLOGV1_FLAGS:
543 raise error.RevlogError(
549 raise error.RevlogError(
544 _(b'unknown flags (%#04x) in version %d revlog %s')
550 _(b'unknown flags (%#04x) in version %d revlog %s')
545 % (flags >> 16, fmt, self.indexfile)
551 % (flags >> 16, fmt, self.indexfile)
546 )
552 )
547
553
548 self._inline = versionflags & FLAG_INLINE_DATA
554 self._inline = versionflags & FLAG_INLINE_DATA
549 self._generaldelta = versionflags & FLAG_GENERALDELTA
555 self._generaldelta = versionflags & FLAG_GENERALDELTA
550
556
551 elif fmt == REVLOGV2:
557 elif fmt == REVLOGV2:
552 if flags & ~REVLOGV2_FLAGS:
558 if flags & ~REVLOGV2_FLAGS:
553 raise error.RevlogError(
559 raise error.RevlogError(
554 _(b'unknown flags (%#04x) in version %d revlog %s')
560 _(b'unknown flags (%#04x) in version %d revlog %s')
555 % (flags >> 16, fmt, self.indexfile)
561 % (flags >> 16, fmt, self.indexfile)
556 )
562 )
557
563
558 self._inline = versionflags & FLAG_INLINE_DATA
564 self._inline = versionflags & FLAG_INLINE_DATA
559 # generaldelta implied by version 2 revlogs.
565 # generaldelta implied by version 2 revlogs.
560 self._generaldelta = True
566 self._generaldelta = True
561
567
562 else:
568 else:
563 raise error.RevlogError(
569 raise error.RevlogError(
564 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
570 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
565 )
571 )
566 # sparse-revlog can't be on without general-delta (issue6056)
572 # sparse-revlog can't be on without general-delta (issue6056)
567 if not self._generaldelta:
573 if not self._generaldelta:
568 self._sparserevlog = False
574 self._sparserevlog = False
569
575
570 self._storedeltachains = True
576 self._storedeltachains = True
571
577
572 self._io = revlogio()
578 self._io = revlogio()
573 if self.version == REVLOGV0:
579 if self.version == REVLOGV0:
574 self._io = revlogoldio()
580 self._io = revlogoldio()
575 try:
581 try:
576 d = self._io.parseindex(indexdata, self._inline)
582 d = self._io.parseindex(indexdata, self._inline)
577 except (ValueError, IndexError):
583 except (ValueError, IndexError):
578 raise error.RevlogError(
584 raise error.RevlogError(
579 _(b"index %s is corrupted") % self.indexfile
585 _(b"index %s is corrupted") % self.indexfile
580 )
586 )
581 self.index, self._chunkcache = d
587 self.index, self._chunkcache = d
582 self.nodemap = self.index.nodemap
583 if not self._chunkcache:
588 if not self._chunkcache:
584 self._chunkclear()
589 self._chunkclear()
585 # revnum -> (chain-length, sum-delta-length)
590 # revnum -> (chain-length, sum-delta-length)
586 self._chaininfocache = {}
591 self._chaininfocache = {}
587 # revlog header -> revlog compressor
592 # revlog header -> revlog compressor
588 self._decompressors = {}
593 self._decompressors = {}
589
594
590 @util.propertycache
595 @util.propertycache
591 def _compressor(self):
596 def _compressor(self):
592 engine = util.compengines[self._compengine]
597 engine = util.compengines[self._compengine]
593 return engine.revlogcompressor(self._compengineopts)
598 return engine.revlogcompressor(self._compengineopts)
594
599
595 def _indexfp(self, mode=b'r'):
600 def _indexfp(self, mode=b'r'):
596 """file object for the revlog's index file"""
601 """file object for the revlog's index file"""
597 args = {'mode': mode}
602 args = {'mode': mode}
598 if mode != b'r':
603 if mode != b'r':
599 args['checkambig'] = self._checkambig
604 args['checkambig'] = self._checkambig
600 if mode == b'w':
605 if mode == b'w':
601 args['atomictemp'] = True
606 args['atomictemp'] = True
602 return self.opener(self.indexfile, **args)
607 return self.opener(self.indexfile, **args)
603
608
604 def _datafp(self, mode=b'r'):
609 def _datafp(self, mode=b'r'):
605 """file object for the revlog's data file"""
610 """file object for the revlog's data file"""
606 return self.opener(self.datafile, mode=mode)
611 return self.opener(self.datafile, mode=mode)
607
612
608 @contextlib.contextmanager
613 @contextlib.contextmanager
609 def _datareadfp(self, existingfp=None):
614 def _datareadfp(self, existingfp=None):
610 """file object suitable to read data"""
615 """file object suitable to read data"""
611 # Use explicit file handle, if given.
616 # Use explicit file handle, if given.
612 if existingfp is not None:
617 if existingfp is not None:
613 yield existingfp
618 yield existingfp
614
619
615 # Use a file handle being actively used for writes, if available.
620 # Use a file handle being actively used for writes, if available.
616 # There is some danger to doing this because reads will seek the
621 # There is some danger to doing this because reads will seek the
617 # file. However, _writeentry() performs a SEEK_END before all writes,
622 # file. However, _writeentry() performs a SEEK_END before all writes,
618 # so we should be safe.
623 # so we should be safe.
619 elif self._writinghandles:
624 elif self._writinghandles:
620 if self._inline:
625 if self._inline:
621 yield self._writinghandles[0]
626 yield self._writinghandles[0]
622 else:
627 else:
623 yield self._writinghandles[1]
628 yield self._writinghandles[1]
624
629
625 # Otherwise open a new file handle.
630 # Otherwise open a new file handle.
626 else:
631 else:
627 if self._inline:
632 if self._inline:
628 func = self._indexfp
633 func = self._indexfp
629 else:
634 else:
630 func = self._datafp
635 func = self._datafp
631 with func() as fp:
636 with func() as fp:
632 yield fp
637 yield fp
633
638
634 def tiprev(self):
639 def tiprev(self):
635 return len(self.index) - 1
640 return len(self.index) - 1
636
641
637 def tip(self):
642 def tip(self):
638 return self.node(self.tiprev())
643 return self.node(self.tiprev())
639
644
640 def __contains__(self, rev):
645 def __contains__(self, rev):
641 return 0 <= rev < len(self)
646 return 0 <= rev < len(self)
642
647
643 def __len__(self):
648 def __len__(self):
644 return len(self.index)
649 return len(self.index)
645
650
646 def __iter__(self):
651 def __iter__(self):
647 return iter(pycompat.xrange(len(self)))
652 return iter(pycompat.xrange(len(self)))
648
653
649 def revs(self, start=0, stop=None):
654 def revs(self, start=0, stop=None):
650 """iterate over all rev in this revlog (from start to stop)"""
655 """iterate over all rev in this revlog (from start to stop)"""
651 return storageutil.iterrevs(len(self), start=start, stop=stop)
656 return storageutil.iterrevs(len(self), start=start, stop=stop)
652
657
653 @util.propertycache
658 @property
654 def nodemap(self):
659 def nodemap(self):
660 msg = (
661 "revlog.nodemap is deprecated, "
662 "use revlog.index.[has_node|rev|get_rev]"
663 )
664 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
655 return self.index.nodemap
665 return self.index.nodemap
656
666
657 @property
667 @property
658 def _nodecache(self):
668 def _nodecache(self):
659 msg = "revlog._nodecache is deprecated, use revlog.index.nodemap"
669 msg = "revlog._nodecache is deprecated, use revlog.index.nodemap"
660 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
670 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
661 return self.index.nodemap
671 return self.index.nodemap
662
672
663 def hasnode(self, node):
673 def hasnode(self, node):
664 try:
674 try:
665 self.rev(node)
675 self.rev(node)
666 return True
676 return True
667 except KeyError:
677 except KeyError:
668 return False
678 return False
669
679
670 def candelta(self, baserev, rev):
680 def candelta(self, baserev, rev):
671 """whether two revisions (baserev, rev) can be delta-ed or not"""
681 """whether two revisions (baserev, rev) can be delta-ed or not"""
672 # Disable delta if either rev requires a content-changing flag
682 # Disable delta if either rev requires a content-changing flag
673 # processor (ex. LFS). This is because such flag processor can alter
683 # processor (ex. LFS). This is because such flag processor can alter
674 # the rawtext content that the delta will be based on, and two clients
684 # the rawtext content that the delta will be based on, and two clients
675 # could have a same revlog node with different flags (i.e. different
685 # could have a same revlog node with different flags (i.e. different
676 # rawtext contents) and the delta could be incompatible.
686 # rawtext contents) and the delta could be incompatible.
677 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
687 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
678 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
688 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
679 ):
689 ):
680 return False
690 return False
681 return True
691 return True
682
692
683 def clearcaches(self):
693 def clearcaches(self):
684 self._revisioncache = None
694 self._revisioncache = None
685 self._chainbasecache.clear()
695 self._chainbasecache.clear()
686 self._chunkcache = (0, b'')
696 self._chunkcache = (0, b'')
687 self._pcache = {}
697 self._pcache = {}
688 self.index.clearcaches()
698 self.index.clearcaches()
689
699
690 def rev(self, node):
700 def rev(self, node):
691 try:
701 try:
692 return self.index.rev(node)
702 return self.index.rev(node)
693 except TypeError:
703 except TypeError:
694 raise
704 raise
695 except error.RevlogError:
705 except error.RevlogError:
696 # parsers.c radix tree lookup failed
706 # parsers.c radix tree lookup failed
697 if node == wdirid or node in wdirfilenodeids:
707 if node == wdirid or node in wdirfilenodeids:
698 raise error.WdirUnsupported
708 raise error.WdirUnsupported
699 raise error.LookupError(node, self.indexfile, _(b'no node'))
709 raise error.LookupError(node, self.indexfile, _(b'no node'))
700
710
701 # Accessors for index entries.
711 # Accessors for index entries.
702
712
703 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
713 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
704 # are flags.
714 # are flags.
705 def start(self, rev):
715 def start(self, rev):
706 return int(self.index[rev][0] >> 16)
716 return int(self.index[rev][0] >> 16)
707
717
708 def flags(self, rev):
718 def flags(self, rev):
709 return self.index[rev][0] & 0xFFFF
719 return self.index[rev][0] & 0xFFFF
710
720
711 def length(self, rev):
721 def length(self, rev):
712 return self.index[rev][1]
722 return self.index[rev][1]
713
723
714 def rawsize(self, rev):
724 def rawsize(self, rev):
715 """return the length of the uncompressed text for a given revision"""
725 """return the length of the uncompressed text for a given revision"""
716 l = self.index[rev][2]
726 l = self.index[rev][2]
717 if l >= 0:
727 if l >= 0:
718 return l
728 return l
719
729
720 t = self.rawdata(rev)
730 t = self.rawdata(rev)
721 return len(t)
731 return len(t)
722
732
723 def size(self, rev):
733 def size(self, rev):
724 """length of non-raw text (processed by a "read" flag processor)"""
734 """length of non-raw text (processed by a "read" flag processor)"""
725 # fast path: if no "read" flag processor could change the content,
735 # fast path: if no "read" flag processor could change the content,
726 # size is rawsize. note: ELLIPSIS is known to not change the content.
736 # size is rawsize. note: ELLIPSIS is known to not change the content.
727 flags = self.flags(rev)
737 flags = self.flags(rev)
728 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
738 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
729 return self.rawsize(rev)
739 return self.rawsize(rev)
730
740
731 return len(self.revision(rev, raw=False))
741 return len(self.revision(rev, raw=False))
732
742
733 def chainbase(self, rev):
743 def chainbase(self, rev):
734 base = self._chainbasecache.get(rev)
744 base = self._chainbasecache.get(rev)
735 if base is not None:
745 if base is not None:
736 return base
746 return base
737
747
738 index = self.index
748 index = self.index
739 iterrev = rev
749 iterrev = rev
740 base = index[iterrev][3]
750 base = index[iterrev][3]
741 while base != iterrev:
751 while base != iterrev:
742 iterrev = base
752 iterrev = base
743 base = index[iterrev][3]
753 base = index[iterrev][3]
744
754
745 self._chainbasecache[rev] = base
755 self._chainbasecache[rev] = base
746 return base
756 return base
747
757
748 def linkrev(self, rev):
758 def linkrev(self, rev):
749 return self.index[rev][4]
759 return self.index[rev][4]
750
760
751 def parentrevs(self, rev):
761 def parentrevs(self, rev):
752 try:
762 try:
753 entry = self.index[rev]
763 entry = self.index[rev]
754 except IndexError:
764 except IndexError:
755 if rev == wdirrev:
765 if rev == wdirrev:
756 raise error.WdirUnsupported
766 raise error.WdirUnsupported
757 raise
767 raise
758
768
759 return entry[5], entry[6]
769 return entry[5], entry[6]
760
770
761 # fast parentrevs(rev) where rev isn't filtered
771 # fast parentrevs(rev) where rev isn't filtered
762 _uncheckedparentrevs = parentrevs
772 _uncheckedparentrevs = parentrevs
763
773
764 def node(self, rev):
774 def node(self, rev):
765 try:
775 try:
766 return self.index[rev][7]
776 return self.index[rev][7]
767 except IndexError:
777 except IndexError:
768 if rev == wdirrev:
778 if rev == wdirrev:
769 raise error.WdirUnsupported
779 raise error.WdirUnsupported
770 raise
780 raise
771
781
772 # Derived from index values.
782 # Derived from index values.
773
783
774 def end(self, rev):
784 def end(self, rev):
775 return self.start(rev) + self.length(rev)
785 return self.start(rev) + self.length(rev)
776
786
777 def parents(self, node):
787 def parents(self, node):
778 i = self.index
788 i = self.index
779 d = i[self.rev(node)]
789 d = i[self.rev(node)]
780 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
790 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
781
791
782 def chainlen(self, rev):
792 def chainlen(self, rev):
783 return self._chaininfo(rev)[0]
793 return self._chaininfo(rev)[0]
784
794
785 def _chaininfo(self, rev):
795 def _chaininfo(self, rev):
786 chaininfocache = self._chaininfocache
796 chaininfocache = self._chaininfocache
787 if rev in chaininfocache:
797 if rev in chaininfocache:
788 return chaininfocache[rev]
798 return chaininfocache[rev]
789 index = self.index
799 index = self.index
790 generaldelta = self._generaldelta
800 generaldelta = self._generaldelta
791 iterrev = rev
801 iterrev = rev
792 e = index[iterrev]
802 e = index[iterrev]
793 clen = 0
803 clen = 0
794 compresseddeltalen = 0
804 compresseddeltalen = 0
795 while iterrev != e[3]:
805 while iterrev != e[3]:
796 clen += 1
806 clen += 1
797 compresseddeltalen += e[1]
807 compresseddeltalen += e[1]
798 if generaldelta:
808 if generaldelta:
799 iterrev = e[3]
809 iterrev = e[3]
800 else:
810 else:
801 iterrev -= 1
811 iterrev -= 1
802 if iterrev in chaininfocache:
812 if iterrev in chaininfocache:
803 t = chaininfocache[iterrev]
813 t = chaininfocache[iterrev]
804 clen += t[0]
814 clen += t[0]
805 compresseddeltalen += t[1]
815 compresseddeltalen += t[1]
806 break
816 break
807 e = index[iterrev]
817 e = index[iterrev]
808 else:
818 else:
809 # Add text length of base since decompressing that also takes
819 # Add text length of base since decompressing that also takes
810 # work. For cache hits the length is already included.
820 # work. For cache hits the length is already included.
811 compresseddeltalen += e[1]
821 compresseddeltalen += e[1]
812 r = (clen, compresseddeltalen)
822 r = (clen, compresseddeltalen)
813 chaininfocache[rev] = r
823 chaininfocache[rev] = r
814 return r
824 return r
815
825
816 def _deltachain(self, rev, stoprev=None):
826 def _deltachain(self, rev, stoprev=None):
817 """Obtain the delta chain for a revision.
827 """Obtain the delta chain for a revision.
818
828
819 ``stoprev`` specifies a revision to stop at. If not specified, we
829 ``stoprev`` specifies a revision to stop at. If not specified, we
820 stop at the base of the chain.
830 stop at the base of the chain.
821
831
822 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
832 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
823 revs in ascending order and ``stopped`` is a bool indicating whether
833 revs in ascending order and ``stopped`` is a bool indicating whether
824 ``stoprev`` was hit.
834 ``stoprev`` was hit.
825 """
835 """
826 # Try C implementation.
836 # Try C implementation.
827 try:
837 try:
828 return self.index.deltachain(rev, stoprev, self._generaldelta)
838 return self.index.deltachain(rev, stoprev, self._generaldelta)
829 except AttributeError:
839 except AttributeError:
830 pass
840 pass
831
841
832 chain = []
842 chain = []
833
843
834 # Alias to prevent attribute lookup in tight loop.
844 # Alias to prevent attribute lookup in tight loop.
835 index = self.index
845 index = self.index
836 generaldelta = self._generaldelta
846 generaldelta = self._generaldelta
837
847
838 iterrev = rev
848 iterrev = rev
839 e = index[iterrev]
849 e = index[iterrev]
840 while iterrev != e[3] and iterrev != stoprev:
850 while iterrev != e[3] and iterrev != stoprev:
841 chain.append(iterrev)
851 chain.append(iterrev)
842 if generaldelta:
852 if generaldelta:
843 iterrev = e[3]
853 iterrev = e[3]
844 else:
854 else:
845 iterrev -= 1
855 iterrev -= 1
846 e = index[iterrev]
856 e = index[iterrev]
847
857
848 if iterrev == stoprev:
858 if iterrev == stoprev:
849 stopped = True
859 stopped = True
850 else:
860 else:
851 chain.append(iterrev)
861 chain.append(iterrev)
852 stopped = False
862 stopped = False
853
863
854 chain.reverse()
864 chain.reverse()
855 return chain, stopped
865 return chain, stopped
856
866
857 def ancestors(self, revs, stoprev=0, inclusive=False):
867 def ancestors(self, revs, stoprev=0, inclusive=False):
858 """Generate the ancestors of 'revs' in reverse revision order.
868 """Generate the ancestors of 'revs' in reverse revision order.
859 Does not generate revs lower than stoprev.
869 Does not generate revs lower than stoprev.
860
870
861 See the documentation for ancestor.lazyancestors for more details."""
871 See the documentation for ancestor.lazyancestors for more details."""
862
872
863 # first, make sure start revisions aren't filtered
873 # first, make sure start revisions aren't filtered
864 revs = list(revs)
874 revs = list(revs)
865 checkrev = self.node
875 checkrev = self.node
866 for r in revs:
876 for r in revs:
867 checkrev(r)
877 checkrev(r)
868 # and we're sure ancestors aren't filtered as well
878 # and we're sure ancestors aren't filtered as well
869
879
870 if rustancestor is not None:
880 if rustancestor is not None:
871 lazyancestors = rustancestor.LazyAncestors
881 lazyancestors = rustancestor.LazyAncestors
872 arg = self.index
882 arg = self.index
873 elif util.safehasattr(parsers, b'rustlazyancestors'):
883 elif util.safehasattr(parsers, b'rustlazyancestors'):
874 lazyancestors = ancestor.rustlazyancestors
884 lazyancestors = ancestor.rustlazyancestors
875 arg = self.index
885 arg = self.index
876 else:
886 else:
877 lazyancestors = ancestor.lazyancestors
887 lazyancestors = ancestor.lazyancestors
878 arg = self._uncheckedparentrevs
888 arg = self._uncheckedparentrevs
879 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
889 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
880
890
881 def descendants(self, revs):
891 def descendants(self, revs):
882 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
892 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
883
893
884 def findcommonmissing(self, common=None, heads=None):
894 def findcommonmissing(self, common=None, heads=None):
885 """Return a tuple of the ancestors of common and the ancestors of heads
895 """Return a tuple of the ancestors of common and the ancestors of heads
886 that are not ancestors of common. In revset terminology, we return the
896 that are not ancestors of common. In revset terminology, we return the
887 tuple:
897 tuple:
888
898
889 ::common, (::heads) - (::common)
899 ::common, (::heads) - (::common)
890
900
891 The list is sorted by revision number, meaning it is
901 The list is sorted by revision number, meaning it is
892 topologically sorted.
902 topologically sorted.
893
903
894 'heads' and 'common' are both lists of node IDs. If heads is
904 'heads' and 'common' are both lists of node IDs. If heads is
895 not supplied, uses all of the revlog's heads. If common is not
905 not supplied, uses all of the revlog's heads. If common is not
896 supplied, uses nullid."""
906 supplied, uses nullid."""
897 if common is None:
907 if common is None:
898 common = [nullid]
908 common = [nullid]
899 if heads is None:
909 if heads is None:
900 heads = self.heads()
910 heads = self.heads()
901
911
902 common = [self.rev(n) for n in common]
912 common = [self.rev(n) for n in common]
903 heads = [self.rev(n) for n in heads]
913 heads = [self.rev(n) for n in heads]
904
914
905 # we want the ancestors, but inclusive
915 # we want the ancestors, but inclusive
906 class lazyset(object):
916 class lazyset(object):
907 def __init__(self, lazyvalues):
917 def __init__(self, lazyvalues):
908 self.addedvalues = set()
918 self.addedvalues = set()
909 self.lazyvalues = lazyvalues
919 self.lazyvalues = lazyvalues
910
920
911 def __contains__(self, value):
921 def __contains__(self, value):
912 return value in self.addedvalues or value in self.lazyvalues
922 return value in self.addedvalues or value in self.lazyvalues
913
923
914 def __iter__(self):
924 def __iter__(self):
915 added = self.addedvalues
925 added = self.addedvalues
916 for r in added:
926 for r in added:
917 yield r
927 yield r
918 for r in self.lazyvalues:
928 for r in self.lazyvalues:
919 if not r in added:
929 if not r in added:
920 yield r
930 yield r
921
931
922 def add(self, value):
932 def add(self, value):
923 self.addedvalues.add(value)
933 self.addedvalues.add(value)
924
934
925 def update(self, values):
935 def update(self, values):
926 self.addedvalues.update(values)
936 self.addedvalues.update(values)
927
937
928 has = lazyset(self.ancestors(common))
938 has = lazyset(self.ancestors(common))
929 has.add(nullrev)
939 has.add(nullrev)
930 has.update(common)
940 has.update(common)
931
941
932 # take all ancestors from heads that aren't in has
942 # take all ancestors from heads that aren't in has
933 missing = set()
943 missing = set()
934 visit = collections.deque(r for r in heads if r not in has)
944 visit = collections.deque(r for r in heads if r not in has)
935 while visit:
945 while visit:
936 r = visit.popleft()
946 r = visit.popleft()
937 if r in missing:
947 if r in missing:
938 continue
948 continue
939 else:
949 else:
940 missing.add(r)
950 missing.add(r)
941 for p in self.parentrevs(r):
951 for p in self.parentrevs(r):
942 if p not in has:
952 if p not in has:
943 visit.append(p)
953 visit.append(p)
944 missing = list(missing)
954 missing = list(missing)
945 missing.sort()
955 missing.sort()
946 return has, [self.node(miss) for miss in missing]
956 return has, [self.node(miss) for miss in missing]
947
957
948 def incrementalmissingrevs(self, common=None):
958 def incrementalmissingrevs(self, common=None):
949 """Return an object that can be used to incrementally compute the
959 """Return an object that can be used to incrementally compute the
950 revision numbers of the ancestors of arbitrary sets that are not
960 revision numbers of the ancestors of arbitrary sets that are not
951 ancestors of common. This is an ancestor.incrementalmissingancestors
961 ancestors of common. This is an ancestor.incrementalmissingancestors
952 object.
962 object.
953
963
954 'common' is a list of revision numbers. If common is not supplied, uses
964 'common' is a list of revision numbers. If common is not supplied, uses
955 nullrev.
965 nullrev.
956 """
966 """
957 if common is None:
967 if common is None:
958 common = [nullrev]
968 common = [nullrev]
959
969
960 if rustancestor is not None:
970 if rustancestor is not None:
961 return rustancestor.MissingAncestors(self.index, common)
971 return rustancestor.MissingAncestors(self.index, common)
962 return ancestor.incrementalmissingancestors(self.parentrevs, common)
972 return ancestor.incrementalmissingancestors(self.parentrevs, common)
963
973
964 def findmissingrevs(self, common=None, heads=None):
974 def findmissingrevs(self, common=None, heads=None):
965 """Return the revision numbers of the ancestors of heads that
975 """Return the revision numbers of the ancestors of heads that
966 are not ancestors of common.
976 are not ancestors of common.
967
977
968 More specifically, return a list of revision numbers corresponding to
978 More specifically, return a list of revision numbers corresponding to
969 nodes N such that every N satisfies the following constraints:
979 nodes N such that every N satisfies the following constraints:
970
980
971 1. N is an ancestor of some node in 'heads'
981 1. N is an ancestor of some node in 'heads'
972 2. N is not an ancestor of any node in 'common'
982 2. N is not an ancestor of any node in 'common'
973
983
974 The list is sorted by revision number, meaning it is
984 The list is sorted by revision number, meaning it is
975 topologically sorted.
985 topologically sorted.
976
986
977 'heads' and 'common' are both lists of revision numbers. If heads is
987 'heads' and 'common' are both lists of revision numbers. If heads is
978 not supplied, uses all of the revlog's heads. If common is not
988 not supplied, uses all of the revlog's heads. If common is not
979 supplied, uses nullid."""
989 supplied, uses nullid."""
980 if common is None:
990 if common is None:
981 common = [nullrev]
991 common = [nullrev]
982 if heads is None:
992 if heads is None:
983 heads = self.headrevs()
993 heads = self.headrevs()
984
994
985 inc = self.incrementalmissingrevs(common=common)
995 inc = self.incrementalmissingrevs(common=common)
986 return inc.missingancestors(heads)
996 return inc.missingancestors(heads)
987
997
988 def findmissing(self, common=None, heads=None):
998 def findmissing(self, common=None, heads=None):
989 """Return the ancestors of heads that are not ancestors of common.
999 """Return the ancestors of heads that are not ancestors of common.
990
1000
991 More specifically, return a list of nodes N such that every N
1001 More specifically, return a list of nodes N such that every N
992 satisfies the following constraints:
1002 satisfies the following constraints:
993
1003
994 1. N is an ancestor of some node in 'heads'
1004 1. N is an ancestor of some node in 'heads'
995 2. N is not an ancestor of any node in 'common'
1005 2. N is not an ancestor of any node in 'common'
996
1006
997 The list is sorted by revision number, meaning it is
1007 The list is sorted by revision number, meaning it is
998 topologically sorted.
1008 topologically sorted.
999
1009
1000 'heads' and 'common' are both lists of node IDs. If heads is
1010 'heads' and 'common' are both lists of node IDs. If heads is
1001 not supplied, uses all of the revlog's heads. If common is not
1011 not supplied, uses all of the revlog's heads. If common is not
1002 supplied, uses nullid."""
1012 supplied, uses nullid."""
1003 if common is None:
1013 if common is None:
1004 common = [nullid]
1014 common = [nullid]
1005 if heads is None:
1015 if heads is None:
1006 heads = self.heads()
1016 heads = self.heads()
1007
1017
1008 common = [self.rev(n) for n in common]
1018 common = [self.rev(n) for n in common]
1009 heads = [self.rev(n) for n in heads]
1019 heads = [self.rev(n) for n in heads]
1010
1020
1011 inc = self.incrementalmissingrevs(common=common)
1021 inc = self.incrementalmissingrevs(common=common)
1012 return [self.node(r) for r in inc.missingancestors(heads)]
1022 return [self.node(r) for r in inc.missingancestors(heads)]
1013
1023
1014 def nodesbetween(self, roots=None, heads=None):
1024 def nodesbetween(self, roots=None, heads=None):
1015 """Return a topological path from 'roots' to 'heads'.
1025 """Return a topological path from 'roots' to 'heads'.
1016
1026
1017 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1027 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1018 topologically sorted list of all nodes N that satisfy both of
1028 topologically sorted list of all nodes N that satisfy both of
1019 these constraints:
1029 these constraints:
1020
1030
1021 1. N is a descendant of some node in 'roots'
1031 1. N is a descendant of some node in 'roots'
1022 2. N is an ancestor of some node in 'heads'
1032 2. N is an ancestor of some node in 'heads'
1023
1033
1024 Every node is considered to be both a descendant and an ancestor
1034 Every node is considered to be both a descendant and an ancestor
1025 of itself, so every reachable node in 'roots' and 'heads' will be
1035 of itself, so every reachable node in 'roots' and 'heads' will be
1026 included in 'nodes'.
1036 included in 'nodes'.
1027
1037
1028 'outroots' is the list of reachable nodes in 'roots', i.e., the
1038 'outroots' is the list of reachable nodes in 'roots', i.e., the
1029 subset of 'roots' that is returned in 'nodes'. Likewise,
1039 subset of 'roots' that is returned in 'nodes'. Likewise,
1030 'outheads' is the subset of 'heads' that is also in 'nodes'.
1040 'outheads' is the subset of 'heads' that is also in 'nodes'.
1031
1041
1032 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1042 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1033 unspecified, uses nullid as the only root. If 'heads' is
1043 unspecified, uses nullid as the only root. If 'heads' is
1034 unspecified, uses list of all of the revlog's heads."""
1044 unspecified, uses list of all of the revlog's heads."""
1035 nonodes = ([], [], [])
1045 nonodes = ([], [], [])
1036 if roots is not None:
1046 if roots is not None:
1037 roots = list(roots)
1047 roots = list(roots)
1038 if not roots:
1048 if not roots:
1039 return nonodes
1049 return nonodes
1040 lowestrev = min([self.rev(n) for n in roots])
1050 lowestrev = min([self.rev(n) for n in roots])
1041 else:
1051 else:
1042 roots = [nullid] # Everybody's a descendant of nullid
1052 roots = [nullid] # Everybody's a descendant of nullid
1043 lowestrev = nullrev
1053 lowestrev = nullrev
1044 if (lowestrev == nullrev) and (heads is None):
1054 if (lowestrev == nullrev) and (heads is None):
1045 # We want _all_ the nodes!
1055 # We want _all_ the nodes!
1046 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1056 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1047 if heads is None:
1057 if heads is None:
1048 # All nodes are ancestors, so the latest ancestor is the last
1058 # All nodes are ancestors, so the latest ancestor is the last
1049 # node.
1059 # node.
1050 highestrev = len(self) - 1
1060 highestrev = len(self) - 1
1051 # Set ancestors to None to signal that every node is an ancestor.
1061 # Set ancestors to None to signal that every node is an ancestor.
1052 ancestors = None
1062 ancestors = None
1053 # Set heads to an empty dictionary for later discovery of heads
1063 # Set heads to an empty dictionary for later discovery of heads
1054 heads = {}
1064 heads = {}
1055 else:
1065 else:
1056 heads = list(heads)
1066 heads = list(heads)
1057 if not heads:
1067 if not heads:
1058 return nonodes
1068 return nonodes
1059 ancestors = set()
1069 ancestors = set()
1060 # Turn heads into a dictionary so we can remove 'fake' heads.
1070 # Turn heads into a dictionary so we can remove 'fake' heads.
1061 # Also, later we will be using it to filter out the heads we can't
1071 # Also, later we will be using it to filter out the heads we can't
1062 # find from roots.
1072 # find from roots.
1063 heads = dict.fromkeys(heads, False)
1073 heads = dict.fromkeys(heads, False)
1064 # Start at the top and keep marking parents until we're done.
1074 # Start at the top and keep marking parents until we're done.
1065 nodestotag = set(heads)
1075 nodestotag = set(heads)
1066 # Remember where the top was so we can use it as a limit later.
1076 # Remember where the top was so we can use it as a limit later.
1067 highestrev = max([self.rev(n) for n in nodestotag])
1077 highestrev = max([self.rev(n) for n in nodestotag])
1068 while nodestotag:
1078 while nodestotag:
1069 # grab a node to tag
1079 # grab a node to tag
1070 n = nodestotag.pop()
1080 n = nodestotag.pop()
1071 # Never tag nullid
1081 # Never tag nullid
1072 if n == nullid:
1082 if n == nullid:
1073 continue
1083 continue
1074 # A node's revision number represents its place in a
1084 # A node's revision number represents its place in a
1075 # topologically sorted list of nodes.
1085 # topologically sorted list of nodes.
1076 r = self.rev(n)
1086 r = self.rev(n)
1077 if r >= lowestrev:
1087 if r >= lowestrev:
1078 if n not in ancestors:
1088 if n not in ancestors:
1079 # If we are possibly a descendant of one of the roots
1089 # If we are possibly a descendant of one of the roots
1080 # and we haven't already been marked as an ancestor
1090 # and we haven't already been marked as an ancestor
1081 ancestors.add(n) # Mark as ancestor
1091 ancestors.add(n) # Mark as ancestor
1082 # Add non-nullid parents to list of nodes to tag.
1092 # Add non-nullid parents to list of nodes to tag.
1083 nodestotag.update(
1093 nodestotag.update(
1084 [p for p in self.parents(n) if p != nullid]
1094 [p for p in self.parents(n) if p != nullid]
1085 )
1095 )
1086 elif n in heads: # We've seen it before, is it a fake head?
1096 elif n in heads: # We've seen it before, is it a fake head?
1087 # So it is, real heads should not be the ancestors of
1097 # So it is, real heads should not be the ancestors of
1088 # any other heads.
1098 # any other heads.
1089 heads.pop(n)
1099 heads.pop(n)
1090 if not ancestors:
1100 if not ancestors:
1091 return nonodes
1101 return nonodes
1092 # Now that we have our set of ancestors, we want to remove any
1102 # Now that we have our set of ancestors, we want to remove any
1093 # roots that are not ancestors.
1103 # roots that are not ancestors.
1094
1104
1095 # If one of the roots was nullid, everything is included anyway.
1105 # If one of the roots was nullid, everything is included anyway.
1096 if lowestrev > nullrev:
1106 if lowestrev > nullrev:
1097 # But, since we weren't, let's recompute the lowest rev to not
1107 # But, since we weren't, let's recompute the lowest rev to not
1098 # include roots that aren't ancestors.
1108 # include roots that aren't ancestors.
1099
1109
1100 # Filter out roots that aren't ancestors of heads
1110 # Filter out roots that aren't ancestors of heads
1101 roots = [root for root in roots if root in ancestors]
1111 roots = [root for root in roots if root in ancestors]
1102 # Recompute the lowest revision
1112 # Recompute the lowest revision
1103 if roots:
1113 if roots:
1104 lowestrev = min([self.rev(root) for root in roots])
1114 lowestrev = min([self.rev(root) for root in roots])
1105 else:
1115 else:
1106 # No more roots? Return empty list
1116 # No more roots? Return empty list
1107 return nonodes
1117 return nonodes
1108 else:
1118 else:
1109 # We are descending from nullid, and don't need to care about
1119 # We are descending from nullid, and don't need to care about
1110 # any other roots.
1120 # any other roots.
1111 lowestrev = nullrev
1121 lowestrev = nullrev
1112 roots = [nullid]
1122 roots = [nullid]
1113 # Transform our roots list into a set.
1123 # Transform our roots list into a set.
1114 descendants = set(roots)
1124 descendants = set(roots)
1115 # Also, keep the original roots so we can filter out roots that aren't
1125 # Also, keep the original roots so we can filter out roots that aren't
1116 # 'real' roots (i.e. are descended from other roots).
1126 # 'real' roots (i.e. are descended from other roots).
1117 roots = descendants.copy()
1127 roots = descendants.copy()
1118 # Our topologically sorted list of output nodes.
1128 # Our topologically sorted list of output nodes.
1119 orderedout = []
1129 orderedout = []
1120 # Don't start at nullid since we don't want nullid in our output list,
1130 # Don't start at nullid since we don't want nullid in our output list,
1121 # and if nullid shows up in descendants, empty parents will look like
1131 # and if nullid shows up in descendants, empty parents will look like
1122 # they're descendants.
1132 # they're descendants.
1123 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1133 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1124 n = self.node(r)
1134 n = self.node(r)
1125 isdescendant = False
1135 isdescendant = False
1126 if lowestrev == nullrev: # Everybody is a descendant of nullid
1136 if lowestrev == nullrev: # Everybody is a descendant of nullid
1127 isdescendant = True
1137 isdescendant = True
1128 elif n in descendants:
1138 elif n in descendants:
1129 # n is already a descendant
1139 # n is already a descendant
1130 isdescendant = True
1140 isdescendant = True
1131 # This check only needs to be done here because all the roots
1141 # This check only needs to be done here because all the roots
1132 # will start being marked is descendants before the loop.
1142 # will start being marked is descendants before the loop.
1133 if n in roots:
1143 if n in roots:
1134 # If n was a root, check if it's a 'real' root.
1144 # If n was a root, check if it's a 'real' root.
1135 p = tuple(self.parents(n))
1145 p = tuple(self.parents(n))
1136 # If any of its parents are descendants, it's not a root.
1146 # If any of its parents are descendants, it's not a root.
1137 if (p[0] in descendants) or (p[1] in descendants):
1147 if (p[0] in descendants) or (p[1] in descendants):
1138 roots.remove(n)
1148 roots.remove(n)
1139 else:
1149 else:
1140 p = tuple(self.parents(n))
1150 p = tuple(self.parents(n))
1141 # A node is a descendant if either of its parents are
1151 # A node is a descendant if either of its parents are
1142 # descendants. (We seeded the dependents list with the roots
1152 # descendants. (We seeded the dependents list with the roots
1143 # up there, remember?)
1153 # up there, remember?)
1144 if (p[0] in descendants) or (p[1] in descendants):
1154 if (p[0] in descendants) or (p[1] in descendants):
1145 descendants.add(n)
1155 descendants.add(n)
1146 isdescendant = True
1156 isdescendant = True
1147 if isdescendant and ((ancestors is None) or (n in ancestors)):
1157 if isdescendant and ((ancestors is None) or (n in ancestors)):
1148 # Only include nodes that are both descendants and ancestors.
1158 # Only include nodes that are both descendants and ancestors.
1149 orderedout.append(n)
1159 orderedout.append(n)
1150 if (ancestors is not None) and (n in heads):
1160 if (ancestors is not None) and (n in heads):
1151 # We're trying to figure out which heads are reachable
1161 # We're trying to figure out which heads are reachable
1152 # from roots.
1162 # from roots.
1153 # Mark this head as having been reached
1163 # Mark this head as having been reached
1154 heads[n] = True
1164 heads[n] = True
1155 elif ancestors is None:
1165 elif ancestors is None:
1156 # Otherwise, we're trying to discover the heads.
1166 # Otherwise, we're trying to discover the heads.
1157 # Assume this is a head because if it isn't, the next step
1167 # Assume this is a head because if it isn't, the next step
1158 # will eventually remove it.
1168 # will eventually remove it.
1159 heads[n] = True
1169 heads[n] = True
1160 # But, obviously its parents aren't.
1170 # But, obviously its parents aren't.
1161 for p in self.parents(n):
1171 for p in self.parents(n):
1162 heads.pop(p, None)
1172 heads.pop(p, None)
1163 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1173 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1164 roots = list(roots)
1174 roots = list(roots)
1165 assert orderedout
1175 assert orderedout
1166 assert roots
1176 assert roots
1167 assert heads
1177 assert heads
1168 return (orderedout, roots, heads)
1178 return (orderedout, roots, heads)
1169
1179
1170 def headrevs(self, revs=None):
1180 def headrevs(self, revs=None):
1171 if revs is None:
1181 if revs is None:
1172 try:
1182 try:
1173 return self.index.headrevs()
1183 return self.index.headrevs()
1174 except AttributeError:
1184 except AttributeError:
1175 return self._headrevs()
1185 return self._headrevs()
1176 if rustdagop is not None:
1186 if rustdagop is not None:
1177 return rustdagop.headrevs(self.index, revs)
1187 return rustdagop.headrevs(self.index, revs)
1178 return dagop.headrevs(revs, self._uncheckedparentrevs)
1188 return dagop.headrevs(revs, self._uncheckedparentrevs)
1179
1189
1180 def computephases(self, roots):
1190 def computephases(self, roots):
1181 return self.index.computephasesmapsets(roots)
1191 return self.index.computephasesmapsets(roots)
1182
1192
1183 def _headrevs(self):
1193 def _headrevs(self):
1184 count = len(self)
1194 count = len(self)
1185 if not count:
1195 if not count:
1186 return [nullrev]
1196 return [nullrev]
1187 # we won't iter over filtered rev so nobody is a head at start
1197 # we won't iter over filtered rev so nobody is a head at start
1188 ishead = [0] * (count + 1)
1198 ishead = [0] * (count + 1)
1189 index = self.index
1199 index = self.index
1190 for r in self:
1200 for r in self:
1191 ishead[r] = 1 # I may be an head
1201 ishead[r] = 1 # I may be an head
1192 e = index[r]
1202 e = index[r]
1193 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1203 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1194 return [r for r, val in enumerate(ishead) if val]
1204 return [r for r, val in enumerate(ishead) if val]
1195
1205
1196 def heads(self, start=None, stop=None):
1206 def heads(self, start=None, stop=None):
1197 """return the list of all nodes that have no children
1207 """return the list of all nodes that have no children
1198
1208
1199 if start is specified, only heads that are descendants of
1209 if start is specified, only heads that are descendants of
1200 start will be returned
1210 start will be returned
1201 if stop is specified, it will consider all the revs from stop
1211 if stop is specified, it will consider all the revs from stop
1202 as if they had no children
1212 as if they had no children
1203 """
1213 """
1204 if start is None and stop is None:
1214 if start is None and stop is None:
1205 if not len(self):
1215 if not len(self):
1206 return [nullid]
1216 return [nullid]
1207 return [self.node(r) for r in self.headrevs()]
1217 return [self.node(r) for r in self.headrevs()]
1208
1218
1209 if start is None:
1219 if start is None:
1210 start = nullrev
1220 start = nullrev
1211 else:
1221 else:
1212 start = self.rev(start)
1222 start = self.rev(start)
1213
1223
1214 stoprevs = set(self.rev(n) for n in stop or [])
1224 stoprevs = set(self.rev(n) for n in stop or [])
1215
1225
1216 revs = dagop.headrevssubset(
1226 revs = dagop.headrevssubset(
1217 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1227 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1218 )
1228 )
1219
1229
1220 return [self.node(rev) for rev in revs]
1230 return [self.node(rev) for rev in revs]
1221
1231
1222 def children(self, node):
1232 def children(self, node):
1223 """find the children of a given node"""
1233 """find the children of a given node"""
1224 c = []
1234 c = []
1225 p = self.rev(node)
1235 p = self.rev(node)
1226 for r in self.revs(start=p + 1):
1236 for r in self.revs(start=p + 1):
1227 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1237 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1228 if prevs:
1238 if prevs:
1229 for pr in prevs:
1239 for pr in prevs:
1230 if pr == p:
1240 if pr == p:
1231 c.append(self.node(r))
1241 c.append(self.node(r))
1232 elif p == nullrev:
1242 elif p == nullrev:
1233 c.append(self.node(r))
1243 c.append(self.node(r))
1234 return c
1244 return c
1235
1245
1236 def commonancestorsheads(self, a, b):
1246 def commonancestorsheads(self, a, b):
1237 """calculate all the heads of the common ancestors of nodes a and b"""
1247 """calculate all the heads of the common ancestors of nodes a and b"""
1238 a, b = self.rev(a), self.rev(b)
1248 a, b = self.rev(a), self.rev(b)
1239 ancs = self._commonancestorsheads(a, b)
1249 ancs = self._commonancestorsheads(a, b)
1240 return pycompat.maplist(self.node, ancs)
1250 return pycompat.maplist(self.node, ancs)
1241
1251
1242 def _commonancestorsheads(self, *revs):
1252 def _commonancestorsheads(self, *revs):
1243 """calculate all the heads of the common ancestors of revs"""
1253 """calculate all the heads of the common ancestors of revs"""
1244 try:
1254 try:
1245 ancs = self.index.commonancestorsheads(*revs)
1255 ancs = self.index.commonancestorsheads(*revs)
1246 except (AttributeError, OverflowError): # C implementation failed
1256 except (AttributeError, OverflowError): # C implementation failed
1247 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1257 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1248 return ancs
1258 return ancs
1249
1259
1250 def isancestor(self, a, b):
1260 def isancestor(self, a, b):
1251 """return True if node a is an ancestor of node b
1261 """return True if node a is an ancestor of node b
1252
1262
1253 A revision is considered an ancestor of itself."""
1263 A revision is considered an ancestor of itself."""
1254 a, b = self.rev(a), self.rev(b)
1264 a, b = self.rev(a), self.rev(b)
1255 return self.isancestorrev(a, b)
1265 return self.isancestorrev(a, b)
1256
1266
1257 def isancestorrev(self, a, b):
1267 def isancestorrev(self, a, b):
1258 """return True if revision a is an ancestor of revision b
1268 """return True if revision a is an ancestor of revision b
1259
1269
1260 A revision is considered an ancestor of itself.
1270 A revision is considered an ancestor of itself.
1261
1271
1262 The implementation of this is trivial but the use of
1272 The implementation of this is trivial but the use of
1263 reachableroots is not."""
1273 reachableroots is not."""
1264 if a == nullrev:
1274 if a == nullrev:
1265 return True
1275 return True
1266 elif a == b:
1276 elif a == b:
1267 return True
1277 return True
1268 elif a > b:
1278 elif a > b:
1269 return False
1279 return False
1270 return bool(self.reachableroots(a, [b], [a], includepath=False))
1280 return bool(self.reachableroots(a, [b], [a], includepath=False))
1271
1281
1272 def reachableroots(self, minroot, heads, roots, includepath=False):
1282 def reachableroots(self, minroot, heads, roots, includepath=False):
1273 """return (heads(::<roots> and <roots>::<heads>))
1283 """return (heads(::<roots> and <roots>::<heads>))
1274
1284
1275 If includepath is True, return (<roots>::<heads>)."""
1285 If includepath is True, return (<roots>::<heads>)."""
1276 try:
1286 try:
1277 return self.index.reachableroots2(
1287 return self.index.reachableroots2(
1278 minroot, heads, roots, includepath
1288 minroot, heads, roots, includepath
1279 )
1289 )
1280 except AttributeError:
1290 except AttributeError:
1281 return dagop._reachablerootspure(
1291 return dagop._reachablerootspure(
1282 self.parentrevs, minroot, roots, heads, includepath
1292 self.parentrevs, minroot, roots, heads, includepath
1283 )
1293 )
1284
1294
1285 def ancestor(self, a, b):
1295 def ancestor(self, a, b):
1286 """calculate the "best" common ancestor of nodes a and b"""
1296 """calculate the "best" common ancestor of nodes a and b"""
1287
1297
1288 a, b = self.rev(a), self.rev(b)
1298 a, b = self.rev(a), self.rev(b)
1289 try:
1299 try:
1290 ancs = self.index.ancestors(a, b)
1300 ancs = self.index.ancestors(a, b)
1291 except (AttributeError, OverflowError):
1301 except (AttributeError, OverflowError):
1292 ancs = ancestor.ancestors(self.parentrevs, a, b)
1302 ancs = ancestor.ancestors(self.parentrevs, a, b)
1293 if ancs:
1303 if ancs:
1294 # choose a consistent winner when there's a tie
1304 # choose a consistent winner when there's a tie
1295 return min(map(self.node, ancs))
1305 return min(map(self.node, ancs))
1296 return nullid
1306 return nullid
1297
1307
1298 def _match(self, id):
1308 def _match(self, id):
1299 if isinstance(id, int):
1309 if isinstance(id, int):
1300 # rev
1310 # rev
1301 return self.node(id)
1311 return self.node(id)
1302 if len(id) == 20:
1312 if len(id) == 20:
1303 # possibly a binary node
1313 # possibly a binary node
1304 # odds of a binary node being all hex in ASCII are 1 in 10**25
1314 # odds of a binary node being all hex in ASCII are 1 in 10**25
1305 try:
1315 try:
1306 node = id
1316 node = id
1307 self.rev(node) # quick search the index
1317 self.rev(node) # quick search the index
1308 return node
1318 return node
1309 except error.LookupError:
1319 except error.LookupError:
1310 pass # may be partial hex id
1320 pass # may be partial hex id
1311 try:
1321 try:
1312 # str(rev)
1322 # str(rev)
1313 rev = int(id)
1323 rev = int(id)
1314 if b"%d" % rev != id:
1324 if b"%d" % rev != id:
1315 raise ValueError
1325 raise ValueError
1316 if rev < 0:
1326 if rev < 0:
1317 rev = len(self) + rev
1327 rev = len(self) + rev
1318 if rev < 0 or rev >= len(self):
1328 if rev < 0 or rev >= len(self):
1319 raise ValueError
1329 raise ValueError
1320 return self.node(rev)
1330 return self.node(rev)
1321 except (ValueError, OverflowError):
1331 except (ValueError, OverflowError):
1322 pass
1332 pass
1323 if len(id) == 40:
1333 if len(id) == 40:
1324 try:
1334 try:
1325 # a full hex nodeid?
1335 # a full hex nodeid?
1326 node = bin(id)
1336 node = bin(id)
1327 self.rev(node)
1337 self.rev(node)
1328 return node
1338 return node
1329 except (TypeError, error.LookupError):
1339 except (TypeError, error.LookupError):
1330 pass
1340 pass
1331
1341
1332 def _partialmatch(self, id):
1342 def _partialmatch(self, id):
1333 # we don't care wdirfilenodeids as they should be always full hash
1343 # we don't care wdirfilenodeids as they should be always full hash
1334 maybewdir = wdirhex.startswith(id)
1344 maybewdir = wdirhex.startswith(id)
1335 try:
1345 try:
1336 partial = self.index.partialmatch(id)
1346 partial = self.index.partialmatch(id)
1337 if partial and self.hasnode(partial):
1347 if partial and self.hasnode(partial):
1338 if maybewdir:
1348 if maybewdir:
1339 # single 'ff...' match in radix tree, ambiguous with wdir
1349 # single 'ff...' match in radix tree, ambiguous with wdir
1340 raise error.RevlogError
1350 raise error.RevlogError
1341 return partial
1351 return partial
1342 if maybewdir:
1352 if maybewdir:
1343 # no 'ff...' match in radix tree, wdir identified
1353 # no 'ff...' match in radix tree, wdir identified
1344 raise error.WdirUnsupported
1354 raise error.WdirUnsupported
1345 return None
1355 return None
1346 except error.RevlogError:
1356 except error.RevlogError:
1347 # parsers.c radix tree lookup gave multiple matches
1357 # parsers.c radix tree lookup gave multiple matches
1348 # fast path: for unfiltered changelog, radix tree is accurate
1358 # fast path: for unfiltered changelog, radix tree is accurate
1349 if not getattr(self, 'filteredrevs', None):
1359 if not getattr(self, 'filteredrevs', None):
1350 raise error.AmbiguousPrefixLookupError(
1360 raise error.AmbiguousPrefixLookupError(
1351 id, self.indexfile, _(b'ambiguous identifier')
1361 id, self.indexfile, _(b'ambiguous identifier')
1352 )
1362 )
1353 # fall through to slow path that filters hidden revisions
1363 # fall through to slow path that filters hidden revisions
1354 except (AttributeError, ValueError):
1364 except (AttributeError, ValueError):
1355 # we are pure python, or key was too short to search radix tree
1365 # we are pure python, or key was too short to search radix tree
1356 pass
1366 pass
1357
1367
1358 if id in self._pcache:
1368 if id in self._pcache:
1359 return self._pcache[id]
1369 return self._pcache[id]
1360
1370
1361 if len(id) <= 40:
1371 if len(id) <= 40:
1362 try:
1372 try:
1363 # hex(node)[:...]
1373 # hex(node)[:...]
1364 l = len(id) // 2 # grab an even number of digits
1374 l = len(id) // 2 # grab an even number of digits
1365 prefix = bin(id[: l * 2])
1375 prefix = bin(id[: l * 2])
1366 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1376 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1367 nl = [
1377 nl = [
1368 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1378 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1369 ]
1379 ]
1370 if nullhex.startswith(id):
1380 if nullhex.startswith(id):
1371 nl.append(nullid)
1381 nl.append(nullid)
1372 if len(nl) > 0:
1382 if len(nl) > 0:
1373 if len(nl) == 1 and not maybewdir:
1383 if len(nl) == 1 and not maybewdir:
1374 self._pcache[id] = nl[0]
1384 self._pcache[id] = nl[0]
1375 return nl[0]
1385 return nl[0]
1376 raise error.AmbiguousPrefixLookupError(
1386 raise error.AmbiguousPrefixLookupError(
1377 id, self.indexfile, _(b'ambiguous identifier')
1387 id, self.indexfile, _(b'ambiguous identifier')
1378 )
1388 )
1379 if maybewdir:
1389 if maybewdir:
1380 raise error.WdirUnsupported
1390 raise error.WdirUnsupported
1381 return None
1391 return None
1382 except TypeError:
1392 except TypeError:
1383 pass
1393 pass
1384
1394
1385 def lookup(self, id):
1395 def lookup(self, id):
1386 """locate a node based on:
1396 """locate a node based on:
1387 - revision number or str(revision number)
1397 - revision number or str(revision number)
1388 - nodeid or subset of hex nodeid
1398 - nodeid or subset of hex nodeid
1389 """
1399 """
1390 n = self._match(id)
1400 n = self._match(id)
1391 if n is not None:
1401 if n is not None:
1392 return n
1402 return n
1393 n = self._partialmatch(id)
1403 n = self._partialmatch(id)
1394 if n:
1404 if n:
1395 return n
1405 return n
1396
1406
1397 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1407 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1398
1408
1399 def shortest(self, node, minlength=1):
1409 def shortest(self, node, minlength=1):
1400 """Find the shortest unambiguous prefix that matches node."""
1410 """Find the shortest unambiguous prefix that matches node."""
1401
1411
1402 def isvalid(prefix):
1412 def isvalid(prefix):
1403 try:
1413 try:
1404 matchednode = self._partialmatch(prefix)
1414 matchednode = self._partialmatch(prefix)
1405 except error.AmbiguousPrefixLookupError:
1415 except error.AmbiguousPrefixLookupError:
1406 return False
1416 return False
1407 except error.WdirUnsupported:
1417 except error.WdirUnsupported:
1408 # single 'ff...' match
1418 # single 'ff...' match
1409 return True
1419 return True
1410 if matchednode is None:
1420 if matchednode is None:
1411 raise error.LookupError(node, self.indexfile, _(b'no node'))
1421 raise error.LookupError(node, self.indexfile, _(b'no node'))
1412 return True
1422 return True
1413
1423
1414 def maybewdir(prefix):
1424 def maybewdir(prefix):
1415 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1425 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1416
1426
1417 hexnode = hex(node)
1427 hexnode = hex(node)
1418
1428
1419 def disambiguate(hexnode, minlength):
1429 def disambiguate(hexnode, minlength):
1420 """Disambiguate against wdirid."""
1430 """Disambiguate against wdirid."""
1421 for length in range(minlength, 41):
1431 for length in range(minlength, 41):
1422 prefix = hexnode[:length]
1432 prefix = hexnode[:length]
1423 if not maybewdir(prefix):
1433 if not maybewdir(prefix):
1424 return prefix
1434 return prefix
1425
1435
1426 if not getattr(self, 'filteredrevs', None):
1436 if not getattr(self, 'filteredrevs', None):
1427 try:
1437 try:
1428 length = max(self.index.shortest(node), minlength)
1438 length = max(self.index.shortest(node), minlength)
1429 return disambiguate(hexnode, length)
1439 return disambiguate(hexnode, length)
1430 except error.RevlogError:
1440 except error.RevlogError:
1431 if node != wdirid:
1441 if node != wdirid:
1432 raise error.LookupError(node, self.indexfile, _(b'no node'))
1442 raise error.LookupError(node, self.indexfile, _(b'no node'))
1433 except AttributeError:
1443 except AttributeError:
1434 # Fall through to pure code
1444 # Fall through to pure code
1435 pass
1445 pass
1436
1446
1437 if node == wdirid:
1447 if node == wdirid:
1438 for length in range(minlength, 41):
1448 for length in range(minlength, 41):
1439 prefix = hexnode[:length]
1449 prefix = hexnode[:length]
1440 if isvalid(prefix):
1450 if isvalid(prefix):
1441 return prefix
1451 return prefix
1442
1452
1443 for length in range(minlength, 41):
1453 for length in range(minlength, 41):
1444 prefix = hexnode[:length]
1454 prefix = hexnode[:length]
1445 if isvalid(prefix):
1455 if isvalid(prefix):
1446 return disambiguate(hexnode, length)
1456 return disambiguate(hexnode, length)
1447
1457
1448 def cmp(self, node, text):
1458 def cmp(self, node, text):
1449 """compare text with a given file revision
1459 """compare text with a given file revision
1450
1460
1451 returns True if text is different than what is stored.
1461 returns True if text is different than what is stored.
1452 """
1462 """
1453 p1, p2 = self.parents(node)
1463 p1, p2 = self.parents(node)
1454 return storageutil.hashrevisionsha1(text, p1, p2) != node
1464 return storageutil.hashrevisionsha1(text, p1, p2) != node
1455
1465
1456 def _cachesegment(self, offset, data):
1466 def _cachesegment(self, offset, data):
1457 """Add a segment to the revlog cache.
1467 """Add a segment to the revlog cache.
1458
1468
1459 Accepts an absolute offset and the data that is at that location.
1469 Accepts an absolute offset and the data that is at that location.
1460 """
1470 """
1461 o, d = self._chunkcache
1471 o, d = self._chunkcache
1462 # try to add to existing cache
1472 # try to add to existing cache
1463 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1473 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1464 self._chunkcache = o, d + data
1474 self._chunkcache = o, d + data
1465 else:
1475 else:
1466 self._chunkcache = offset, data
1476 self._chunkcache = offset, data
1467
1477
1468 def _readsegment(self, offset, length, df=None):
1478 def _readsegment(self, offset, length, df=None):
1469 """Load a segment of raw data from the revlog.
1479 """Load a segment of raw data from the revlog.
1470
1480
1471 Accepts an absolute offset, length to read, and an optional existing
1481 Accepts an absolute offset, length to read, and an optional existing
1472 file handle to read from.
1482 file handle to read from.
1473
1483
1474 If an existing file handle is passed, it will be seeked and the
1484 If an existing file handle is passed, it will be seeked and the
1475 original seek position will NOT be restored.
1485 original seek position will NOT be restored.
1476
1486
1477 Returns a str or buffer of raw byte data.
1487 Returns a str or buffer of raw byte data.
1478
1488
1479 Raises if the requested number of bytes could not be read.
1489 Raises if the requested number of bytes could not be read.
1480 """
1490 """
1481 # Cache data both forward and backward around the requested
1491 # Cache data both forward and backward around the requested
1482 # data, in a fixed size window. This helps speed up operations
1492 # data, in a fixed size window. This helps speed up operations
1483 # involving reading the revlog backwards.
1493 # involving reading the revlog backwards.
1484 cachesize = self._chunkcachesize
1494 cachesize = self._chunkcachesize
1485 realoffset = offset & ~(cachesize - 1)
1495 realoffset = offset & ~(cachesize - 1)
1486 reallength = (
1496 reallength = (
1487 (offset + length + cachesize) & ~(cachesize - 1)
1497 (offset + length + cachesize) & ~(cachesize - 1)
1488 ) - realoffset
1498 ) - realoffset
1489 with self._datareadfp(df) as df:
1499 with self._datareadfp(df) as df:
1490 df.seek(realoffset)
1500 df.seek(realoffset)
1491 d = df.read(reallength)
1501 d = df.read(reallength)
1492
1502
1493 self._cachesegment(realoffset, d)
1503 self._cachesegment(realoffset, d)
1494 if offset != realoffset or reallength != length:
1504 if offset != realoffset or reallength != length:
1495 startoffset = offset - realoffset
1505 startoffset = offset - realoffset
1496 if len(d) - startoffset < length:
1506 if len(d) - startoffset < length:
1497 raise error.RevlogError(
1507 raise error.RevlogError(
1498 _(
1508 _(
1499 b'partial read of revlog %s; expected %d bytes from '
1509 b'partial read of revlog %s; expected %d bytes from '
1500 b'offset %d, got %d'
1510 b'offset %d, got %d'
1501 )
1511 )
1502 % (
1512 % (
1503 self.indexfile if self._inline else self.datafile,
1513 self.indexfile if self._inline else self.datafile,
1504 length,
1514 length,
1505 realoffset,
1515 realoffset,
1506 len(d) - startoffset,
1516 len(d) - startoffset,
1507 )
1517 )
1508 )
1518 )
1509
1519
1510 return util.buffer(d, startoffset, length)
1520 return util.buffer(d, startoffset, length)
1511
1521
1512 if len(d) < length:
1522 if len(d) < length:
1513 raise error.RevlogError(
1523 raise error.RevlogError(
1514 _(
1524 _(
1515 b'partial read of revlog %s; expected %d bytes from offset '
1525 b'partial read of revlog %s; expected %d bytes from offset '
1516 b'%d, got %d'
1526 b'%d, got %d'
1517 )
1527 )
1518 % (
1528 % (
1519 self.indexfile if self._inline else self.datafile,
1529 self.indexfile if self._inline else self.datafile,
1520 length,
1530 length,
1521 offset,
1531 offset,
1522 len(d),
1532 len(d),
1523 )
1533 )
1524 )
1534 )
1525
1535
1526 return d
1536 return d
1527
1537
1528 def _getsegment(self, offset, length, df=None):
1538 def _getsegment(self, offset, length, df=None):
1529 """Obtain a segment of raw data from the revlog.
1539 """Obtain a segment of raw data from the revlog.
1530
1540
1531 Accepts an absolute offset, length of bytes to obtain, and an
1541 Accepts an absolute offset, length of bytes to obtain, and an
1532 optional file handle to the already-opened revlog. If the file
1542 optional file handle to the already-opened revlog. If the file
1533 handle is used, it's original seek position will not be preserved.
1543 handle is used, it's original seek position will not be preserved.
1534
1544
1535 Requests for data may be returned from a cache.
1545 Requests for data may be returned from a cache.
1536
1546
1537 Returns a str or a buffer instance of raw byte data.
1547 Returns a str or a buffer instance of raw byte data.
1538 """
1548 """
1539 o, d = self._chunkcache
1549 o, d = self._chunkcache
1540 l = len(d)
1550 l = len(d)
1541
1551
1542 # is it in the cache?
1552 # is it in the cache?
1543 cachestart = offset - o
1553 cachestart = offset - o
1544 cacheend = cachestart + length
1554 cacheend = cachestart + length
1545 if cachestart >= 0 and cacheend <= l:
1555 if cachestart >= 0 and cacheend <= l:
1546 if cachestart == 0 and cacheend == l:
1556 if cachestart == 0 and cacheend == l:
1547 return d # avoid a copy
1557 return d # avoid a copy
1548 return util.buffer(d, cachestart, cacheend - cachestart)
1558 return util.buffer(d, cachestart, cacheend - cachestart)
1549
1559
1550 return self._readsegment(offset, length, df=df)
1560 return self._readsegment(offset, length, df=df)
1551
1561
1552 def _getsegmentforrevs(self, startrev, endrev, df=None):
1562 def _getsegmentforrevs(self, startrev, endrev, df=None):
1553 """Obtain a segment of raw data corresponding to a range of revisions.
1563 """Obtain a segment of raw data corresponding to a range of revisions.
1554
1564
1555 Accepts the start and end revisions and an optional already-open
1565 Accepts the start and end revisions and an optional already-open
1556 file handle to be used for reading. If the file handle is read, its
1566 file handle to be used for reading. If the file handle is read, its
1557 seek position will not be preserved.
1567 seek position will not be preserved.
1558
1568
1559 Requests for data may be satisfied by a cache.
1569 Requests for data may be satisfied by a cache.
1560
1570
1561 Returns a 2-tuple of (offset, data) for the requested range of
1571 Returns a 2-tuple of (offset, data) for the requested range of
1562 revisions. Offset is the integer offset from the beginning of the
1572 revisions. Offset is the integer offset from the beginning of the
1563 revlog and data is a str or buffer of the raw byte data.
1573 revlog and data is a str or buffer of the raw byte data.
1564
1574
1565 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1575 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1566 to determine where each revision's data begins and ends.
1576 to determine where each revision's data begins and ends.
1567 """
1577 """
1568 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1578 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1569 # (functions are expensive).
1579 # (functions are expensive).
1570 index = self.index
1580 index = self.index
1571 istart = index[startrev]
1581 istart = index[startrev]
1572 start = int(istart[0] >> 16)
1582 start = int(istart[0] >> 16)
1573 if startrev == endrev:
1583 if startrev == endrev:
1574 end = start + istart[1]
1584 end = start + istart[1]
1575 else:
1585 else:
1576 iend = index[endrev]
1586 iend = index[endrev]
1577 end = int(iend[0] >> 16) + iend[1]
1587 end = int(iend[0] >> 16) + iend[1]
1578
1588
1579 if self._inline:
1589 if self._inline:
1580 start += (startrev + 1) * self._io.size
1590 start += (startrev + 1) * self._io.size
1581 end += (endrev + 1) * self._io.size
1591 end += (endrev + 1) * self._io.size
1582 length = end - start
1592 length = end - start
1583
1593
1584 return start, self._getsegment(start, length, df=df)
1594 return start, self._getsegment(start, length, df=df)
1585
1595
1586 def _chunk(self, rev, df=None):
1596 def _chunk(self, rev, df=None):
1587 """Obtain a single decompressed chunk for a revision.
1597 """Obtain a single decompressed chunk for a revision.
1588
1598
1589 Accepts an integer revision and an optional already-open file handle
1599 Accepts an integer revision and an optional already-open file handle
1590 to be used for reading. If used, the seek position of the file will not
1600 to be used for reading. If used, the seek position of the file will not
1591 be preserved.
1601 be preserved.
1592
1602
1593 Returns a str holding uncompressed data for the requested revision.
1603 Returns a str holding uncompressed data for the requested revision.
1594 """
1604 """
1595 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1605 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1596
1606
1597 def _chunks(self, revs, df=None, targetsize=None):
1607 def _chunks(self, revs, df=None, targetsize=None):
1598 """Obtain decompressed chunks for the specified revisions.
1608 """Obtain decompressed chunks for the specified revisions.
1599
1609
1600 Accepts an iterable of numeric revisions that are assumed to be in
1610 Accepts an iterable of numeric revisions that are assumed to be in
1601 ascending order. Also accepts an optional already-open file handle
1611 ascending order. Also accepts an optional already-open file handle
1602 to be used for reading. If used, the seek position of the file will
1612 to be used for reading. If used, the seek position of the file will
1603 not be preserved.
1613 not be preserved.
1604
1614
1605 This function is similar to calling ``self._chunk()`` multiple times,
1615 This function is similar to calling ``self._chunk()`` multiple times,
1606 but is faster.
1616 but is faster.
1607
1617
1608 Returns a list with decompressed data for each requested revision.
1618 Returns a list with decompressed data for each requested revision.
1609 """
1619 """
1610 if not revs:
1620 if not revs:
1611 return []
1621 return []
1612 start = self.start
1622 start = self.start
1613 length = self.length
1623 length = self.length
1614 inline = self._inline
1624 inline = self._inline
1615 iosize = self._io.size
1625 iosize = self._io.size
1616 buffer = util.buffer
1626 buffer = util.buffer
1617
1627
1618 l = []
1628 l = []
1619 ladd = l.append
1629 ladd = l.append
1620
1630
1621 if not self._withsparseread:
1631 if not self._withsparseread:
1622 slicedchunks = (revs,)
1632 slicedchunks = (revs,)
1623 else:
1633 else:
1624 slicedchunks = deltautil.slicechunk(
1634 slicedchunks = deltautil.slicechunk(
1625 self, revs, targetsize=targetsize
1635 self, revs, targetsize=targetsize
1626 )
1636 )
1627
1637
1628 for revschunk in slicedchunks:
1638 for revschunk in slicedchunks:
1629 firstrev = revschunk[0]
1639 firstrev = revschunk[0]
1630 # Skip trailing revisions with empty diff
1640 # Skip trailing revisions with empty diff
1631 for lastrev in revschunk[::-1]:
1641 for lastrev in revschunk[::-1]:
1632 if length(lastrev) != 0:
1642 if length(lastrev) != 0:
1633 break
1643 break
1634
1644
1635 try:
1645 try:
1636 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1646 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1637 except OverflowError:
1647 except OverflowError:
1638 # issue4215 - we can't cache a run of chunks greater than
1648 # issue4215 - we can't cache a run of chunks greater than
1639 # 2G on Windows
1649 # 2G on Windows
1640 return [self._chunk(rev, df=df) for rev in revschunk]
1650 return [self._chunk(rev, df=df) for rev in revschunk]
1641
1651
1642 decomp = self.decompress
1652 decomp = self.decompress
1643 for rev in revschunk:
1653 for rev in revschunk:
1644 chunkstart = start(rev)
1654 chunkstart = start(rev)
1645 if inline:
1655 if inline:
1646 chunkstart += (rev + 1) * iosize
1656 chunkstart += (rev + 1) * iosize
1647 chunklength = length(rev)
1657 chunklength = length(rev)
1648 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1658 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1649
1659
1650 return l
1660 return l
1651
1661
1652 def _chunkclear(self):
1662 def _chunkclear(self):
1653 """Clear the raw chunk cache."""
1663 """Clear the raw chunk cache."""
1654 self._chunkcache = (0, b'')
1664 self._chunkcache = (0, b'')
1655
1665
1656 def deltaparent(self, rev):
1666 def deltaparent(self, rev):
1657 """return deltaparent of the given revision"""
1667 """return deltaparent of the given revision"""
1658 base = self.index[rev][3]
1668 base = self.index[rev][3]
1659 if base == rev:
1669 if base == rev:
1660 return nullrev
1670 return nullrev
1661 elif self._generaldelta:
1671 elif self._generaldelta:
1662 return base
1672 return base
1663 else:
1673 else:
1664 return rev - 1
1674 return rev - 1
1665
1675
1666 def issnapshot(self, rev):
1676 def issnapshot(self, rev):
1667 """tells whether rev is a snapshot
1677 """tells whether rev is a snapshot
1668 """
1678 """
1669 if not self._sparserevlog:
1679 if not self._sparserevlog:
1670 return self.deltaparent(rev) == nullrev
1680 return self.deltaparent(rev) == nullrev
1671 elif util.safehasattr(self.index, b'issnapshot'):
1681 elif util.safehasattr(self.index, b'issnapshot'):
1672 # directly assign the method to cache the testing and access
1682 # directly assign the method to cache the testing and access
1673 self.issnapshot = self.index.issnapshot
1683 self.issnapshot = self.index.issnapshot
1674 return self.issnapshot(rev)
1684 return self.issnapshot(rev)
1675 if rev == nullrev:
1685 if rev == nullrev:
1676 return True
1686 return True
1677 entry = self.index[rev]
1687 entry = self.index[rev]
1678 base = entry[3]
1688 base = entry[3]
1679 if base == rev:
1689 if base == rev:
1680 return True
1690 return True
1681 if base == nullrev:
1691 if base == nullrev:
1682 return True
1692 return True
1683 p1 = entry[5]
1693 p1 = entry[5]
1684 p2 = entry[6]
1694 p2 = entry[6]
1685 if base == p1 or base == p2:
1695 if base == p1 or base == p2:
1686 return False
1696 return False
1687 return self.issnapshot(base)
1697 return self.issnapshot(base)
1688
1698
1689 def snapshotdepth(self, rev):
1699 def snapshotdepth(self, rev):
1690 """number of snapshot in the chain before this one"""
1700 """number of snapshot in the chain before this one"""
1691 if not self.issnapshot(rev):
1701 if not self.issnapshot(rev):
1692 raise error.ProgrammingError(b'revision %d not a snapshot')
1702 raise error.ProgrammingError(b'revision %d not a snapshot')
1693 return len(self._deltachain(rev)[0]) - 1
1703 return len(self._deltachain(rev)[0]) - 1
1694
1704
1695 def revdiff(self, rev1, rev2):
1705 def revdiff(self, rev1, rev2):
1696 """return or calculate a delta between two revisions
1706 """return or calculate a delta between two revisions
1697
1707
1698 The delta calculated is in binary form and is intended to be written to
1708 The delta calculated is in binary form and is intended to be written to
1699 revlog data directly. So this function needs raw revision data.
1709 revlog data directly. So this function needs raw revision data.
1700 """
1710 """
1701 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1711 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1702 return bytes(self._chunk(rev2))
1712 return bytes(self._chunk(rev2))
1703
1713
1704 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1714 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1705
1715
1706 def _processflags(self, text, flags, operation, raw=False):
1716 def _processflags(self, text, flags, operation, raw=False):
1707 """deprecated entry point to access flag processors"""
1717 """deprecated entry point to access flag processors"""
1708 msg = b'_processflag(...) use the specialized variant'
1718 msg = b'_processflag(...) use the specialized variant'
1709 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1719 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1710 if raw:
1720 if raw:
1711 return text, flagutil.processflagsraw(self, text, flags)
1721 return text, flagutil.processflagsraw(self, text, flags)
1712 elif operation == b'read':
1722 elif operation == b'read':
1713 return flagutil.processflagsread(self, text, flags)
1723 return flagutil.processflagsread(self, text, flags)
1714 else: # write operation
1724 else: # write operation
1715 return flagutil.processflagswrite(self, text, flags)
1725 return flagutil.processflagswrite(self, text, flags)
1716
1726
1717 def revision(self, nodeorrev, _df=None, raw=False):
1727 def revision(self, nodeorrev, _df=None, raw=False):
1718 """return an uncompressed revision of a given node or revision
1728 """return an uncompressed revision of a given node or revision
1719 number.
1729 number.
1720
1730
1721 _df - an existing file handle to read from. (internal-only)
1731 _df - an existing file handle to read from. (internal-only)
1722 raw - an optional argument specifying if the revision data is to be
1732 raw - an optional argument specifying if the revision data is to be
1723 treated as raw data when applying flag transforms. 'raw' should be set
1733 treated as raw data when applying flag transforms. 'raw' should be set
1724 to True when generating changegroups or in debug commands.
1734 to True when generating changegroups or in debug commands.
1725 """
1735 """
1726 if raw:
1736 if raw:
1727 msg = (
1737 msg = (
1728 b'revlog.revision(..., raw=True) is deprecated, '
1738 b'revlog.revision(..., raw=True) is deprecated, '
1729 b'use revlog.rawdata(...)'
1739 b'use revlog.rawdata(...)'
1730 )
1740 )
1731 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1741 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1732 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1742 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1733
1743
1734 def sidedata(self, nodeorrev, _df=None):
1744 def sidedata(self, nodeorrev, _df=None):
1735 """a map of extra data related to the changeset but not part of the hash
1745 """a map of extra data related to the changeset but not part of the hash
1736
1746
1737 This function currently return a dictionary. However, more advanced
1747 This function currently return a dictionary. However, more advanced
1738 mapping object will likely be used in the future for a more
1748 mapping object will likely be used in the future for a more
1739 efficient/lazy code.
1749 efficient/lazy code.
1740 """
1750 """
1741 return self._revisiondata(nodeorrev, _df)[1]
1751 return self._revisiondata(nodeorrev, _df)[1]
1742
1752
1743 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1753 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1744 # deal with <nodeorrev> argument type
1754 # deal with <nodeorrev> argument type
1745 if isinstance(nodeorrev, int):
1755 if isinstance(nodeorrev, int):
1746 rev = nodeorrev
1756 rev = nodeorrev
1747 node = self.node(rev)
1757 node = self.node(rev)
1748 else:
1758 else:
1749 node = nodeorrev
1759 node = nodeorrev
1750 rev = None
1760 rev = None
1751
1761
1752 # fast path the special `nullid` rev
1762 # fast path the special `nullid` rev
1753 if node == nullid:
1763 if node == nullid:
1754 return b"", {}
1764 return b"", {}
1755
1765
1756 # The text as stored inside the revlog. Might be the revision or might
1766 # The text as stored inside the revlog. Might be the revision or might
1757 # need to be processed to retrieve the revision.
1767 # need to be processed to retrieve the revision.
1758 rawtext = None
1768 rawtext = None
1759
1769
1760 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1770 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1761
1771
1762 if raw and validated:
1772 if raw and validated:
1763 # if we don't want to process the raw text and that raw
1773 # if we don't want to process the raw text and that raw
1764 # text is cached, we can exit early.
1774 # text is cached, we can exit early.
1765 return rawtext, {}
1775 return rawtext, {}
1766 if rev is None:
1776 if rev is None:
1767 rev = self.rev(node)
1777 rev = self.rev(node)
1768 # the revlog's flag for this revision
1778 # the revlog's flag for this revision
1769 # (usually alter its state or content)
1779 # (usually alter its state or content)
1770 flags = self.flags(rev)
1780 flags = self.flags(rev)
1771
1781
1772 if validated and flags == REVIDX_DEFAULT_FLAGS:
1782 if validated and flags == REVIDX_DEFAULT_FLAGS:
1773 # no extra flags set, no flag processor runs, text = rawtext
1783 # no extra flags set, no flag processor runs, text = rawtext
1774 return rawtext, {}
1784 return rawtext, {}
1775
1785
1776 sidedata = {}
1786 sidedata = {}
1777 if raw:
1787 if raw:
1778 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1788 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1779 text = rawtext
1789 text = rawtext
1780 else:
1790 else:
1781 try:
1791 try:
1782 r = flagutil.processflagsread(self, rawtext, flags)
1792 r = flagutil.processflagsread(self, rawtext, flags)
1783 except error.SidedataHashError as exc:
1793 except error.SidedataHashError as exc:
1784 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1794 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1785 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1795 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1786 raise error.RevlogError(msg)
1796 raise error.RevlogError(msg)
1787 text, validatehash, sidedata = r
1797 text, validatehash, sidedata = r
1788 if validatehash:
1798 if validatehash:
1789 self.checkhash(text, node, rev=rev)
1799 self.checkhash(text, node, rev=rev)
1790 if not validated:
1800 if not validated:
1791 self._revisioncache = (node, rev, rawtext)
1801 self._revisioncache = (node, rev, rawtext)
1792
1802
1793 return text, sidedata
1803 return text, sidedata
1794
1804
1795 def _rawtext(self, node, rev, _df=None):
1805 def _rawtext(self, node, rev, _df=None):
1796 """return the possibly unvalidated rawtext for a revision
1806 """return the possibly unvalidated rawtext for a revision
1797
1807
1798 returns (rev, rawtext, validated)
1808 returns (rev, rawtext, validated)
1799 """
1809 """
1800
1810
1801 # revision in the cache (could be useful to apply delta)
1811 # revision in the cache (could be useful to apply delta)
1802 cachedrev = None
1812 cachedrev = None
1803 # An intermediate text to apply deltas to
1813 # An intermediate text to apply deltas to
1804 basetext = None
1814 basetext = None
1805
1815
1806 # Check if we have the entry in cache
1816 # Check if we have the entry in cache
1807 # The cache entry looks like (node, rev, rawtext)
1817 # The cache entry looks like (node, rev, rawtext)
1808 if self._revisioncache:
1818 if self._revisioncache:
1809 if self._revisioncache[0] == node:
1819 if self._revisioncache[0] == node:
1810 return (rev, self._revisioncache[2], True)
1820 return (rev, self._revisioncache[2], True)
1811 cachedrev = self._revisioncache[1]
1821 cachedrev = self._revisioncache[1]
1812
1822
1813 if rev is None:
1823 if rev is None:
1814 rev = self.rev(node)
1824 rev = self.rev(node)
1815
1825
1816 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1826 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1817 if stopped:
1827 if stopped:
1818 basetext = self._revisioncache[2]
1828 basetext = self._revisioncache[2]
1819
1829
1820 # drop cache to save memory, the caller is expected to
1830 # drop cache to save memory, the caller is expected to
1821 # update self._revisioncache after validating the text
1831 # update self._revisioncache after validating the text
1822 self._revisioncache = None
1832 self._revisioncache = None
1823
1833
1824 targetsize = None
1834 targetsize = None
1825 rawsize = self.index[rev][2]
1835 rawsize = self.index[rev][2]
1826 if 0 <= rawsize:
1836 if 0 <= rawsize:
1827 targetsize = 4 * rawsize
1837 targetsize = 4 * rawsize
1828
1838
1829 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1839 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1830 if basetext is None:
1840 if basetext is None:
1831 basetext = bytes(bins[0])
1841 basetext = bytes(bins[0])
1832 bins = bins[1:]
1842 bins = bins[1:]
1833
1843
1834 rawtext = mdiff.patches(basetext, bins)
1844 rawtext = mdiff.patches(basetext, bins)
1835 del basetext # let us have a chance to free memory early
1845 del basetext # let us have a chance to free memory early
1836 return (rev, rawtext, False)
1846 return (rev, rawtext, False)
1837
1847
1838 def rawdata(self, nodeorrev, _df=None):
1848 def rawdata(self, nodeorrev, _df=None):
1839 """return an uncompressed raw data of a given node or revision number.
1849 """return an uncompressed raw data of a given node or revision number.
1840
1850
1841 _df - an existing file handle to read from. (internal-only)
1851 _df - an existing file handle to read from. (internal-only)
1842 """
1852 """
1843 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1853 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1844
1854
1845 def hash(self, text, p1, p2):
1855 def hash(self, text, p1, p2):
1846 """Compute a node hash.
1856 """Compute a node hash.
1847
1857
1848 Available as a function so that subclasses can replace the hash
1858 Available as a function so that subclasses can replace the hash
1849 as needed.
1859 as needed.
1850 """
1860 """
1851 return storageutil.hashrevisionsha1(text, p1, p2)
1861 return storageutil.hashrevisionsha1(text, p1, p2)
1852
1862
1853 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1863 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1854 """Check node hash integrity.
1864 """Check node hash integrity.
1855
1865
1856 Available as a function so that subclasses can extend hash mismatch
1866 Available as a function so that subclasses can extend hash mismatch
1857 behaviors as needed.
1867 behaviors as needed.
1858 """
1868 """
1859 try:
1869 try:
1860 if p1 is None and p2 is None:
1870 if p1 is None and p2 is None:
1861 p1, p2 = self.parents(node)
1871 p1, p2 = self.parents(node)
1862 if node != self.hash(text, p1, p2):
1872 if node != self.hash(text, p1, p2):
1863 # Clear the revision cache on hash failure. The revision cache
1873 # Clear the revision cache on hash failure. The revision cache
1864 # only stores the raw revision and clearing the cache does have
1874 # only stores the raw revision and clearing the cache does have
1865 # the side-effect that we won't have a cache hit when the raw
1875 # the side-effect that we won't have a cache hit when the raw
1866 # revision data is accessed. But this case should be rare and
1876 # revision data is accessed. But this case should be rare and
1867 # it is extra work to teach the cache about the hash
1877 # it is extra work to teach the cache about the hash
1868 # verification state.
1878 # verification state.
1869 if self._revisioncache and self._revisioncache[0] == node:
1879 if self._revisioncache and self._revisioncache[0] == node:
1870 self._revisioncache = None
1880 self._revisioncache = None
1871
1881
1872 revornode = rev
1882 revornode = rev
1873 if revornode is None:
1883 if revornode is None:
1874 revornode = templatefilters.short(hex(node))
1884 revornode = templatefilters.short(hex(node))
1875 raise error.RevlogError(
1885 raise error.RevlogError(
1876 _(b"integrity check failed on %s:%s")
1886 _(b"integrity check failed on %s:%s")
1877 % (self.indexfile, pycompat.bytestr(revornode))
1887 % (self.indexfile, pycompat.bytestr(revornode))
1878 )
1888 )
1879 except error.RevlogError:
1889 except error.RevlogError:
1880 if self._censorable and storageutil.iscensoredtext(text):
1890 if self._censorable and storageutil.iscensoredtext(text):
1881 raise error.CensoredNodeError(self.indexfile, node, text)
1891 raise error.CensoredNodeError(self.indexfile, node, text)
1882 raise
1892 raise
1883
1893
1884 def _enforceinlinesize(self, tr, fp=None):
1894 def _enforceinlinesize(self, tr, fp=None):
1885 """Check if the revlog is too big for inline and convert if so.
1895 """Check if the revlog is too big for inline and convert if so.
1886
1896
1887 This should be called after revisions are added to the revlog. If the
1897 This should be called after revisions are added to the revlog. If the
1888 revlog has grown too large to be an inline revlog, it will convert it
1898 revlog has grown too large to be an inline revlog, it will convert it
1889 to use multiple index and data files.
1899 to use multiple index and data files.
1890 """
1900 """
1891 tiprev = len(self) - 1
1901 tiprev = len(self) - 1
1892 if (
1902 if (
1893 not self._inline
1903 not self._inline
1894 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1904 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1895 ):
1905 ):
1896 return
1906 return
1897
1907
1898 trinfo = tr.find(self.indexfile)
1908 trinfo = tr.find(self.indexfile)
1899 if trinfo is None:
1909 if trinfo is None:
1900 raise error.RevlogError(
1910 raise error.RevlogError(
1901 _(b"%s not found in the transaction") % self.indexfile
1911 _(b"%s not found in the transaction") % self.indexfile
1902 )
1912 )
1903
1913
1904 trindex = trinfo[2]
1914 trindex = trinfo[2]
1905 if trindex is not None:
1915 if trindex is not None:
1906 dataoff = self.start(trindex)
1916 dataoff = self.start(trindex)
1907 else:
1917 else:
1908 # revlog was stripped at start of transaction, use all leftover data
1918 # revlog was stripped at start of transaction, use all leftover data
1909 trindex = len(self) - 1
1919 trindex = len(self) - 1
1910 dataoff = self.end(tiprev)
1920 dataoff = self.end(tiprev)
1911
1921
1912 tr.add(self.datafile, dataoff)
1922 tr.add(self.datafile, dataoff)
1913
1923
1914 if fp:
1924 if fp:
1915 fp.flush()
1925 fp.flush()
1916 fp.close()
1926 fp.close()
1917 # We can't use the cached file handle after close(). So prevent
1927 # We can't use the cached file handle after close(). So prevent
1918 # its usage.
1928 # its usage.
1919 self._writinghandles = None
1929 self._writinghandles = None
1920
1930
1921 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1931 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1922 for r in self:
1932 for r in self:
1923 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1933 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1924
1934
1925 with self._indexfp(b'w') as fp:
1935 with self._indexfp(b'w') as fp:
1926 self.version &= ~FLAG_INLINE_DATA
1936 self.version &= ~FLAG_INLINE_DATA
1927 self._inline = False
1937 self._inline = False
1928 io = self._io
1938 io = self._io
1929 for i in self:
1939 for i in self:
1930 e = io.packentry(self.index[i], self.node, self.version, i)
1940 e = io.packentry(self.index[i], self.node, self.version, i)
1931 fp.write(e)
1941 fp.write(e)
1932
1942
1933 # the temp file replace the real index when we exit the context
1943 # the temp file replace the real index when we exit the context
1934 # manager
1944 # manager
1935
1945
1936 tr.replace(self.indexfile, trindex * self._io.size)
1946 tr.replace(self.indexfile, trindex * self._io.size)
1937 self._chunkclear()
1947 self._chunkclear()
1938
1948
1939 def _nodeduplicatecallback(self, transaction, node):
1949 def _nodeduplicatecallback(self, transaction, node):
1940 """called when trying to add a node already stored.
1950 """called when trying to add a node already stored.
1941 """
1951 """
1942
1952
1943 def addrevision(
1953 def addrevision(
1944 self,
1954 self,
1945 text,
1955 text,
1946 transaction,
1956 transaction,
1947 link,
1957 link,
1948 p1,
1958 p1,
1949 p2,
1959 p2,
1950 cachedelta=None,
1960 cachedelta=None,
1951 node=None,
1961 node=None,
1952 flags=REVIDX_DEFAULT_FLAGS,
1962 flags=REVIDX_DEFAULT_FLAGS,
1953 deltacomputer=None,
1963 deltacomputer=None,
1954 sidedata=None,
1964 sidedata=None,
1955 ):
1965 ):
1956 """add a revision to the log
1966 """add a revision to the log
1957
1967
1958 text - the revision data to add
1968 text - the revision data to add
1959 transaction - the transaction object used for rollback
1969 transaction - the transaction object used for rollback
1960 link - the linkrev data to add
1970 link - the linkrev data to add
1961 p1, p2 - the parent nodeids of the revision
1971 p1, p2 - the parent nodeids of the revision
1962 cachedelta - an optional precomputed delta
1972 cachedelta - an optional precomputed delta
1963 node - nodeid of revision; typically node is not specified, and it is
1973 node - nodeid of revision; typically node is not specified, and it is
1964 computed by default as hash(text, p1, p2), however subclasses might
1974 computed by default as hash(text, p1, p2), however subclasses might
1965 use different hashing method (and override checkhash() in such case)
1975 use different hashing method (and override checkhash() in such case)
1966 flags - the known flags to set on the revision
1976 flags - the known flags to set on the revision
1967 deltacomputer - an optional deltacomputer instance shared between
1977 deltacomputer - an optional deltacomputer instance shared between
1968 multiple calls
1978 multiple calls
1969 """
1979 """
1970 if link == nullrev:
1980 if link == nullrev:
1971 raise error.RevlogError(
1981 raise error.RevlogError(
1972 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1982 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1973 )
1983 )
1974
1984
1975 if sidedata is None:
1985 if sidedata is None:
1976 sidedata = {}
1986 sidedata = {}
1977 flags = flags & ~REVIDX_SIDEDATA
1987 flags = flags & ~REVIDX_SIDEDATA
1978 elif not self.hassidedata:
1988 elif not self.hassidedata:
1979 raise error.ProgrammingError(
1989 raise error.ProgrammingError(
1980 _(b"trying to add sidedata to a revlog who don't support them")
1990 _(b"trying to add sidedata to a revlog who don't support them")
1981 )
1991 )
1982 else:
1992 else:
1983 flags |= REVIDX_SIDEDATA
1993 flags |= REVIDX_SIDEDATA
1984
1994
1985 if flags:
1995 if flags:
1986 node = node or self.hash(text, p1, p2)
1996 node = node or self.hash(text, p1, p2)
1987
1997
1988 rawtext, validatehash = flagutil.processflagswrite(
1998 rawtext, validatehash = flagutil.processflagswrite(
1989 self, text, flags, sidedata=sidedata
1999 self, text, flags, sidedata=sidedata
1990 )
2000 )
1991
2001
1992 # If the flag processor modifies the revision data, ignore any provided
2002 # If the flag processor modifies the revision data, ignore any provided
1993 # cachedelta.
2003 # cachedelta.
1994 if rawtext != text:
2004 if rawtext != text:
1995 cachedelta = None
2005 cachedelta = None
1996
2006
1997 if len(rawtext) > _maxentrysize:
2007 if len(rawtext) > _maxentrysize:
1998 raise error.RevlogError(
2008 raise error.RevlogError(
1999 _(
2009 _(
2000 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2010 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2001 )
2011 )
2002 % (self.indexfile, len(rawtext))
2012 % (self.indexfile, len(rawtext))
2003 )
2013 )
2004
2014
2005 node = node or self.hash(rawtext, p1, p2)
2015 node = node or self.hash(rawtext, p1, p2)
2006 if self.index.has_node(node):
2016 if self.index.has_node(node):
2007 return node
2017 return node
2008
2018
2009 if validatehash:
2019 if validatehash:
2010 self.checkhash(rawtext, node, p1=p1, p2=p2)
2020 self.checkhash(rawtext, node, p1=p1, p2=p2)
2011
2021
2012 return self.addrawrevision(
2022 return self.addrawrevision(
2013 rawtext,
2023 rawtext,
2014 transaction,
2024 transaction,
2015 link,
2025 link,
2016 p1,
2026 p1,
2017 p2,
2027 p2,
2018 node,
2028 node,
2019 flags,
2029 flags,
2020 cachedelta=cachedelta,
2030 cachedelta=cachedelta,
2021 deltacomputer=deltacomputer,
2031 deltacomputer=deltacomputer,
2022 )
2032 )
2023
2033
2024 def addrawrevision(
2034 def addrawrevision(
2025 self,
2035 self,
2026 rawtext,
2036 rawtext,
2027 transaction,
2037 transaction,
2028 link,
2038 link,
2029 p1,
2039 p1,
2030 p2,
2040 p2,
2031 node,
2041 node,
2032 flags,
2042 flags,
2033 cachedelta=None,
2043 cachedelta=None,
2034 deltacomputer=None,
2044 deltacomputer=None,
2035 ):
2045 ):
2036 """add a raw revision with known flags, node and parents
2046 """add a raw revision with known flags, node and parents
2037 useful when reusing a revision not stored in this revlog (ex: received
2047 useful when reusing a revision not stored in this revlog (ex: received
2038 over wire, or read from an external bundle).
2048 over wire, or read from an external bundle).
2039 """
2049 """
2040 dfh = None
2050 dfh = None
2041 if not self._inline:
2051 if not self._inline:
2042 dfh = self._datafp(b"a+")
2052 dfh = self._datafp(b"a+")
2043 ifh = self._indexfp(b"a+")
2053 ifh = self._indexfp(b"a+")
2044 try:
2054 try:
2045 return self._addrevision(
2055 return self._addrevision(
2046 node,
2056 node,
2047 rawtext,
2057 rawtext,
2048 transaction,
2058 transaction,
2049 link,
2059 link,
2050 p1,
2060 p1,
2051 p2,
2061 p2,
2052 flags,
2062 flags,
2053 cachedelta,
2063 cachedelta,
2054 ifh,
2064 ifh,
2055 dfh,
2065 dfh,
2056 deltacomputer=deltacomputer,
2066 deltacomputer=deltacomputer,
2057 )
2067 )
2058 finally:
2068 finally:
2059 if dfh:
2069 if dfh:
2060 dfh.close()
2070 dfh.close()
2061 ifh.close()
2071 ifh.close()
2062
2072
2063 def compress(self, data):
2073 def compress(self, data):
2064 """Generate a possibly-compressed representation of data."""
2074 """Generate a possibly-compressed representation of data."""
2065 if not data:
2075 if not data:
2066 return b'', data
2076 return b'', data
2067
2077
2068 compressed = self._compressor.compress(data)
2078 compressed = self._compressor.compress(data)
2069
2079
2070 if compressed:
2080 if compressed:
2071 # The revlog compressor added the header in the returned data.
2081 # The revlog compressor added the header in the returned data.
2072 return b'', compressed
2082 return b'', compressed
2073
2083
2074 if data[0:1] == b'\0':
2084 if data[0:1] == b'\0':
2075 return b'', data
2085 return b'', data
2076 return b'u', data
2086 return b'u', data
2077
2087
2078 def decompress(self, data):
2088 def decompress(self, data):
2079 """Decompress a revlog chunk.
2089 """Decompress a revlog chunk.
2080
2090
2081 The chunk is expected to begin with a header identifying the
2091 The chunk is expected to begin with a header identifying the
2082 format type so it can be routed to an appropriate decompressor.
2092 format type so it can be routed to an appropriate decompressor.
2083 """
2093 """
2084 if not data:
2094 if not data:
2085 return data
2095 return data
2086
2096
2087 # Revlogs are read much more frequently than they are written and many
2097 # Revlogs are read much more frequently than they are written and many
2088 # chunks only take microseconds to decompress, so performance is
2098 # chunks only take microseconds to decompress, so performance is
2089 # important here.
2099 # important here.
2090 #
2100 #
2091 # We can make a few assumptions about revlogs:
2101 # We can make a few assumptions about revlogs:
2092 #
2102 #
2093 # 1) the majority of chunks will be compressed (as opposed to inline
2103 # 1) the majority of chunks will be compressed (as opposed to inline
2094 # raw data).
2104 # raw data).
2095 # 2) decompressing *any* data will likely by at least 10x slower than
2105 # 2) decompressing *any* data will likely by at least 10x slower than
2096 # returning raw inline data.
2106 # returning raw inline data.
2097 # 3) we want to prioritize common and officially supported compression
2107 # 3) we want to prioritize common and officially supported compression
2098 # engines
2108 # engines
2099 #
2109 #
2100 # It follows that we want to optimize for "decompress compressed data
2110 # It follows that we want to optimize for "decompress compressed data
2101 # when encoded with common and officially supported compression engines"
2111 # when encoded with common and officially supported compression engines"
2102 # case over "raw data" and "data encoded by less common or non-official
2112 # case over "raw data" and "data encoded by less common or non-official
2103 # compression engines." That is why we have the inline lookup first
2113 # compression engines." That is why we have the inline lookup first
2104 # followed by the compengines lookup.
2114 # followed by the compengines lookup.
2105 #
2115 #
2106 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2116 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2107 # compressed chunks. And this matters for changelog and manifest reads.
2117 # compressed chunks. And this matters for changelog and manifest reads.
2108 t = data[0:1]
2118 t = data[0:1]
2109
2119
2110 if t == b'x':
2120 if t == b'x':
2111 try:
2121 try:
2112 return _zlibdecompress(data)
2122 return _zlibdecompress(data)
2113 except zlib.error as e:
2123 except zlib.error as e:
2114 raise error.RevlogError(
2124 raise error.RevlogError(
2115 _(b'revlog decompress error: %s')
2125 _(b'revlog decompress error: %s')
2116 % stringutil.forcebytestr(e)
2126 % stringutil.forcebytestr(e)
2117 )
2127 )
2118 # '\0' is more common than 'u' so it goes first.
2128 # '\0' is more common than 'u' so it goes first.
2119 elif t == b'\0':
2129 elif t == b'\0':
2120 return data
2130 return data
2121 elif t == b'u':
2131 elif t == b'u':
2122 return util.buffer(data, 1)
2132 return util.buffer(data, 1)
2123
2133
2124 try:
2134 try:
2125 compressor = self._decompressors[t]
2135 compressor = self._decompressors[t]
2126 except KeyError:
2136 except KeyError:
2127 try:
2137 try:
2128 engine = util.compengines.forrevlogheader(t)
2138 engine = util.compengines.forrevlogheader(t)
2129 compressor = engine.revlogcompressor(self._compengineopts)
2139 compressor = engine.revlogcompressor(self._compengineopts)
2130 self._decompressors[t] = compressor
2140 self._decompressors[t] = compressor
2131 except KeyError:
2141 except KeyError:
2132 raise error.RevlogError(_(b'unknown compression type %r') % t)
2142 raise error.RevlogError(_(b'unknown compression type %r') % t)
2133
2143
2134 return compressor.decompress(data)
2144 return compressor.decompress(data)
2135
2145
2136 def _addrevision(
2146 def _addrevision(
2137 self,
2147 self,
2138 node,
2148 node,
2139 rawtext,
2149 rawtext,
2140 transaction,
2150 transaction,
2141 link,
2151 link,
2142 p1,
2152 p1,
2143 p2,
2153 p2,
2144 flags,
2154 flags,
2145 cachedelta,
2155 cachedelta,
2146 ifh,
2156 ifh,
2147 dfh,
2157 dfh,
2148 alwayscache=False,
2158 alwayscache=False,
2149 deltacomputer=None,
2159 deltacomputer=None,
2150 ):
2160 ):
2151 """internal function to add revisions to the log
2161 """internal function to add revisions to the log
2152
2162
2153 see addrevision for argument descriptions.
2163 see addrevision for argument descriptions.
2154
2164
2155 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2165 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2156
2166
2157 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2167 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2158 be used.
2168 be used.
2159
2169
2160 invariants:
2170 invariants:
2161 - rawtext is optional (can be None); if not set, cachedelta must be set.
2171 - rawtext is optional (can be None); if not set, cachedelta must be set.
2162 if both are set, they must correspond to each other.
2172 if both are set, they must correspond to each other.
2163 """
2173 """
2164 if node == nullid:
2174 if node == nullid:
2165 raise error.RevlogError(
2175 raise error.RevlogError(
2166 _(b"%s: attempt to add null revision") % self.indexfile
2176 _(b"%s: attempt to add null revision") % self.indexfile
2167 )
2177 )
2168 if node == wdirid or node in wdirfilenodeids:
2178 if node == wdirid or node in wdirfilenodeids:
2169 raise error.RevlogError(
2179 raise error.RevlogError(
2170 _(b"%s: attempt to add wdir revision") % self.indexfile
2180 _(b"%s: attempt to add wdir revision") % self.indexfile
2171 )
2181 )
2172
2182
2173 if self._inline:
2183 if self._inline:
2174 fh = ifh
2184 fh = ifh
2175 else:
2185 else:
2176 fh = dfh
2186 fh = dfh
2177
2187
2178 btext = [rawtext]
2188 btext = [rawtext]
2179
2189
2180 curr = len(self)
2190 curr = len(self)
2181 prev = curr - 1
2191 prev = curr - 1
2182 offset = self.end(prev)
2192 offset = self.end(prev)
2183 p1r, p2r = self.rev(p1), self.rev(p2)
2193 p1r, p2r = self.rev(p1), self.rev(p2)
2184
2194
2185 # full versions are inserted when the needed deltas
2195 # full versions are inserted when the needed deltas
2186 # become comparable to the uncompressed text
2196 # become comparable to the uncompressed text
2187 if rawtext is None:
2197 if rawtext is None:
2188 # need rawtext size, before changed by flag processors, which is
2198 # need rawtext size, before changed by flag processors, which is
2189 # the non-raw size. use revlog explicitly to avoid filelog's extra
2199 # the non-raw size. use revlog explicitly to avoid filelog's extra
2190 # logic that might remove metadata size.
2200 # logic that might remove metadata size.
2191 textlen = mdiff.patchedsize(
2201 textlen = mdiff.patchedsize(
2192 revlog.size(self, cachedelta[0]), cachedelta[1]
2202 revlog.size(self, cachedelta[0]), cachedelta[1]
2193 )
2203 )
2194 else:
2204 else:
2195 textlen = len(rawtext)
2205 textlen = len(rawtext)
2196
2206
2197 if deltacomputer is None:
2207 if deltacomputer is None:
2198 deltacomputer = deltautil.deltacomputer(self)
2208 deltacomputer = deltautil.deltacomputer(self)
2199
2209
2200 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2210 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2201
2211
2202 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2212 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2203
2213
2204 e = (
2214 e = (
2205 offset_type(offset, flags),
2215 offset_type(offset, flags),
2206 deltainfo.deltalen,
2216 deltainfo.deltalen,
2207 textlen,
2217 textlen,
2208 deltainfo.base,
2218 deltainfo.base,
2209 link,
2219 link,
2210 p1r,
2220 p1r,
2211 p2r,
2221 p2r,
2212 node,
2222 node,
2213 )
2223 )
2214 self.index.append(e)
2224 self.index.append(e)
2215
2225
2216 # Reset the pure node cache start lookup offset to account for new
2226 # Reset the pure node cache start lookup offset to account for new
2217 # revision.
2227 # revision.
2218 if self._nodepos is not None:
2228 if self._nodepos is not None:
2219 self._nodepos = curr
2229 self._nodepos = curr
2220
2230
2221 entry = self._io.packentry(e, self.node, self.version, curr)
2231 entry = self._io.packentry(e, self.node, self.version, curr)
2222 self._writeentry(
2232 self._writeentry(
2223 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2233 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2224 )
2234 )
2225
2235
2226 rawtext = btext[0]
2236 rawtext = btext[0]
2227
2237
2228 if alwayscache and rawtext is None:
2238 if alwayscache and rawtext is None:
2229 rawtext = deltacomputer.buildtext(revinfo, fh)
2239 rawtext = deltacomputer.buildtext(revinfo, fh)
2230
2240
2231 if type(rawtext) == bytes: # only accept immutable objects
2241 if type(rawtext) == bytes: # only accept immutable objects
2232 self._revisioncache = (node, curr, rawtext)
2242 self._revisioncache = (node, curr, rawtext)
2233 self._chainbasecache[curr] = deltainfo.chainbase
2243 self._chainbasecache[curr] = deltainfo.chainbase
2234 return node
2244 return node
2235
2245
2236 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2246 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2237 # Files opened in a+ mode have inconsistent behavior on various
2247 # Files opened in a+ mode have inconsistent behavior on various
2238 # platforms. Windows requires that a file positioning call be made
2248 # platforms. Windows requires that a file positioning call be made
2239 # when the file handle transitions between reads and writes. See
2249 # when the file handle transitions between reads and writes. See
2240 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2250 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2241 # platforms, Python or the platform itself can be buggy. Some versions
2251 # platforms, Python or the platform itself can be buggy. Some versions
2242 # of Solaris have been observed to not append at the end of the file
2252 # of Solaris have been observed to not append at the end of the file
2243 # if the file was seeked to before the end. See issue4943 for more.
2253 # if the file was seeked to before the end. See issue4943 for more.
2244 #
2254 #
2245 # We work around this issue by inserting a seek() before writing.
2255 # We work around this issue by inserting a seek() before writing.
2246 # Note: This is likely not necessary on Python 3. However, because
2256 # Note: This is likely not necessary on Python 3. However, because
2247 # the file handle is reused for reads and may be seeked there, we need
2257 # the file handle is reused for reads and may be seeked there, we need
2248 # to be careful before changing this.
2258 # to be careful before changing this.
2249 ifh.seek(0, os.SEEK_END)
2259 ifh.seek(0, os.SEEK_END)
2250 if dfh:
2260 if dfh:
2251 dfh.seek(0, os.SEEK_END)
2261 dfh.seek(0, os.SEEK_END)
2252
2262
2253 curr = len(self) - 1
2263 curr = len(self) - 1
2254 if not self._inline:
2264 if not self._inline:
2255 transaction.add(self.datafile, offset)
2265 transaction.add(self.datafile, offset)
2256 transaction.add(self.indexfile, curr * len(entry))
2266 transaction.add(self.indexfile, curr * len(entry))
2257 if data[0]:
2267 if data[0]:
2258 dfh.write(data[0])
2268 dfh.write(data[0])
2259 dfh.write(data[1])
2269 dfh.write(data[1])
2260 ifh.write(entry)
2270 ifh.write(entry)
2261 else:
2271 else:
2262 offset += curr * self._io.size
2272 offset += curr * self._io.size
2263 transaction.add(self.indexfile, offset, curr)
2273 transaction.add(self.indexfile, offset, curr)
2264 ifh.write(entry)
2274 ifh.write(entry)
2265 ifh.write(data[0])
2275 ifh.write(data[0])
2266 ifh.write(data[1])
2276 ifh.write(data[1])
2267 self._enforceinlinesize(transaction, ifh)
2277 self._enforceinlinesize(transaction, ifh)
2268
2278
2269 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2279 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2270 """
2280 """
2271 add a delta group
2281 add a delta group
2272
2282
2273 given a set of deltas, add them to the revision log. the
2283 given a set of deltas, add them to the revision log. the
2274 first delta is against its parent, which should be in our
2284 first delta is against its parent, which should be in our
2275 log, the rest are against the previous delta.
2285 log, the rest are against the previous delta.
2276
2286
2277 If ``addrevisioncb`` is defined, it will be called with arguments of
2287 If ``addrevisioncb`` is defined, it will be called with arguments of
2278 this revlog and the node that was added.
2288 this revlog and the node that was added.
2279 """
2289 """
2280
2290
2281 if self._writinghandles:
2291 if self._writinghandles:
2282 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2292 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2283
2293
2284 nodes = []
2294 nodes = []
2285
2295
2286 r = len(self)
2296 r = len(self)
2287 end = 0
2297 end = 0
2288 if r:
2298 if r:
2289 end = self.end(r - 1)
2299 end = self.end(r - 1)
2290 ifh = self._indexfp(b"a+")
2300 ifh = self._indexfp(b"a+")
2291 isize = r * self._io.size
2301 isize = r * self._io.size
2292 if self._inline:
2302 if self._inline:
2293 transaction.add(self.indexfile, end + isize, r)
2303 transaction.add(self.indexfile, end + isize, r)
2294 dfh = None
2304 dfh = None
2295 else:
2305 else:
2296 transaction.add(self.indexfile, isize, r)
2306 transaction.add(self.indexfile, isize, r)
2297 transaction.add(self.datafile, end)
2307 transaction.add(self.datafile, end)
2298 dfh = self._datafp(b"a+")
2308 dfh = self._datafp(b"a+")
2299
2309
2300 def flush():
2310 def flush():
2301 if dfh:
2311 if dfh:
2302 dfh.flush()
2312 dfh.flush()
2303 ifh.flush()
2313 ifh.flush()
2304
2314
2305 self._writinghandles = (ifh, dfh)
2315 self._writinghandles = (ifh, dfh)
2306
2316
2307 try:
2317 try:
2308 deltacomputer = deltautil.deltacomputer(self)
2318 deltacomputer = deltautil.deltacomputer(self)
2309 # loop through our set of deltas
2319 # loop through our set of deltas
2310 for data in deltas:
2320 for data in deltas:
2311 node, p1, p2, linknode, deltabase, delta, flags = data
2321 node, p1, p2, linknode, deltabase, delta, flags = data
2312 link = linkmapper(linknode)
2322 link = linkmapper(linknode)
2313 flags = flags or REVIDX_DEFAULT_FLAGS
2323 flags = flags or REVIDX_DEFAULT_FLAGS
2314
2324
2315 nodes.append(node)
2325 nodes.append(node)
2316
2326
2317 if self.index.has_node(node):
2327 if self.index.has_node(node):
2318 self._nodeduplicatecallback(transaction, node)
2328 self._nodeduplicatecallback(transaction, node)
2319 # this can happen if two branches make the same change
2329 # this can happen if two branches make the same change
2320 continue
2330 continue
2321
2331
2322 for p in (p1, p2):
2332 for p in (p1, p2):
2323 if not self.index.has_node(p):
2333 if not self.index.has_node(p):
2324 raise error.LookupError(
2334 raise error.LookupError(
2325 p, self.indexfile, _(b'unknown parent')
2335 p, self.indexfile, _(b'unknown parent')
2326 )
2336 )
2327
2337
2328 if not self.index.has_node(deltabase):
2338 if not self.index.has_node(deltabase):
2329 raise error.LookupError(
2339 raise error.LookupError(
2330 deltabase, self.indexfile, _(b'unknown delta base')
2340 deltabase, self.indexfile, _(b'unknown delta base')
2331 )
2341 )
2332
2342
2333 baserev = self.rev(deltabase)
2343 baserev = self.rev(deltabase)
2334
2344
2335 if baserev != nullrev and self.iscensored(baserev):
2345 if baserev != nullrev and self.iscensored(baserev):
2336 # if base is censored, delta must be full replacement in a
2346 # if base is censored, delta must be full replacement in a
2337 # single patch operation
2347 # single patch operation
2338 hlen = struct.calcsize(b">lll")
2348 hlen = struct.calcsize(b">lll")
2339 oldlen = self.rawsize(baserev)
2349 oldlen = self.rawsize(baserev)
2340 newlen = len(delta) - hlen
2350 newlen = len(delta) - hlen
2341 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2351 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2342 raise error.CensoredBaseError(
2352 raise error.CensoredBaseError(
2343 self.indexfile, self.node(baserev)
2353 self.indexfile, self.node(baserev)
2344 )
2354 )
2345
2355
2346 if not flags and self._peek_iscensored(baserev, delta, flush):
2356 if not flags and self._peek_iscensored(baserev, delta, flush):
2347 flags |= REVIDX_ISCENSORED
2357 flags |= REVIDX_ISCENSORED
2348
2358
2349 # We assume consumers of addrevisioncb will want to retrieve
2359 # We assume consumers of addrevisioncb will want to retrieve
2350 # the added revision, which will require a call to
2360 # the added revision, which will require a call to
2351 # revision(). revision() will fast path if there is a cache
2361 # revision(). revision() will fast path if there is a cache
2352 # hit. So, we tell _addrevision() to always cache in this case.
2362 # hit. So, we tell _addrevision() to always cache in this case.
2353 # We're only using addgroup() in the context of changegroup
2363 # We're only using addgroup() in the context of changegroup
2354 # generation so the revision data can always be handled as raw
2364 # generation so the revision data can always be handled as raw
2355 # by the flagprocessor.
2365 # by the flagprocessor.
2356 self._addrevision(
2366 self._addrevision(
2357 node,
2367 node,
2358 None,
2368 None,
2359 transaction,
2369 transaction,
2360 link,
2370 link,
2361 p1,
2371 p1,
2362 p2,
2372 p2,
2363 flags,
2373 flags,
2364 (baserev, delta),
2374 (baserev, delta),
2365 ifh,
2375 ifh,
2366 dfh,
2376 dfh,
2367 alwayscache=bool(addrevisioncb),
2377 alwayscache=bool(addrevisioncb),
2368 deltacomputer=deltacomputer,
2378 deltacomputer=deltacomputer,
2369 )
2379 )
2370
2380
2371 if addrevisioncb:
2381 if addrevisioncb:
2372 addrevisioncb(self, node)
2382 addrevisioncb(self, node)
2373
2383
2374 if not dfh and not self._inline:
2384 if not dfh and not self._inline:
2375 # addrevision switched from inline to conventional
2385 # addrevision switched from inline to conventional
2376 # reopen the index
2386 # reopen the index
2377 ifh.close()
2387 ifh.close()
2378 dfh = self._datafp(b"a+")
2388 dfh = self._datafp(b"a+")
2379 ifh = self._indexfp(b"a+")
2389 ifh = self._indexfp(b"a+")
2380 self._writinghandles = (ifh, dfh)
2390 self._writinghandles = (ifh, dfh)
2381 finally:
2391 finally:
2382 self._writinghandles = None
2392 self._writinghandles = None
2383
2393
2384 if dfh:
2394 if dfh:
2385 dfh.close()
2395 dfh.close()
2386 ifh.close()
2396 ifh.close()
2387
2397
2388 return nodes
2398 return nodes
2389
2399
2390 def iscensored(self, rev):
2400 def iscensored(self, rev):
2391 """Check if a file revision is censored."""
2401 """Check if a file revision is censored."""
2392 if not self._censorable:
2402 if not self._censorable:
2393 return False
2403 return False
2394
2404
2395 return self.flags(rev) & REVIDX_ISCENSORED
2405 return self.flags(rev) & REVIDX_ISCENSORED
2396
2406
2397 def _peek_iscensored(self, baserev, delta, flush):
2407 def _peek_iscensored(self, baserev, delta, flush):
2398 """Quickly check if a delta produces a censored revision."""
2408 """Quickly check if a delta produces a censored revision."""
2399 if not self._censorable:
2409 if not self._censorable:
2400 return False
2410 return False
2401
2411
2402 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2412 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2403
2413
2404 def getstrippoint(self, minlink):
2414 def getstrippoint(self, minlink):
2405 """find the minimum rev that must be stripped to strip the linkrev
2415 """find the minimum rev that must be stripped to strip the linkrev
2406
2416
2407 Returns a tuple containing the minimum rev and a set of all revs that
2417 Returns a tuple containing the minimum rev and a set of all revs that
2408 have linkrevs that will be broken by this strip.
2418 have linkrevs that will be broken by this strip.
2409 """
2419 """
2410 return storageutil.resolvestripinfo(
2420 return storageutil.resolvestripinfo(
2411 minlink,
2421 minlink,
2412 len(self) - 1,
2422 len(self) - 1,
2413 self.headrevs(),
2423 self.headrevs(),
2414 self.linkrev,
2424 self.linkrev,
2415 self.parentrevs,
2425 self.parentrevs,
2416 )
2426 )
2417
2427
2418 def strip(self, minlink, transaction):
2428 def strip(self, minlink, transaction):
2419 """truncate the revlog on the first revision with a linkrev >= minlink
2429 """truncate the revlog on the first revision with a linkrev >= minlink
2420
2430
2421 This function is called when we're stripping revision minlink and
2431 This function is called when we're stripping revision minlink and
2422 its descendants from the repository.
2432 its descendants from the repository.
2423
2433
2424 We have to remove all revisions with linkrev >= minlink, because
2434 We have to remove all revisions with linkrev >= minlink, because
2425 the equivalent changelog revisions will be renumbered after the
2435 the equivalent changelog revisions will be renumbered after the
2426 strip.
2436 strip.
2427
2437
2428 So we truncate the revlog on the first of these revisions, and
2438 So we truncate the revlog on the first of these revisions, and
2429 trust that the caller has saved the revisions that shouldn't be
2439 trust that the caller has saved the revisions that shouldn't be
2430 removed and that it'll re-add them after this truncation.
2440 removed and that it'll re-add them after this truncation.
2431 """
2441 """
2432 if len(self) == 0:
2442 if len(self) == 0:
2433 return
2443 return
2434
2444
2435 rev, _ = self.getstrippoint(minlink)
2445 rev, _ = self.getstrippoint(minlink)
2436 if rev == len(self):
2446 if rev == len(self):
2437 return
2447 return
2438
2448
2439 # first truncate the files on disk
2449 # first truncate the files on disk
2440 end = self.start(rev)
2450 end = self.start(rev)
2441 if not self._inline:
2451 if not self._inline:
2442 transaction.add(self.datafile, end)
2452 transaction.add(self.datafile, end)
2443 end = rev * self._io.size
2453 end = rev * self._io.size
2444 else:
2454 else:
2445 end += rev * self._io.size
2455 end += rev * self._io.size
2446
2456
2447 transaction.add(self.indexfile, end)
2457 transaction.add(self.indexfile, end)
2448
2458
2449 # then reset internal state in memory to forget those revisions
2459 # then reset internal state in memory to forget those revisions
2450 self._revisioncache = None
2460 self._revisioncache = None
2451 self._chaininfocache = {}
2461 self._chaininfocache = {}
2452 self._chunkclear()
2462 self._chunkclear()
2453
2463
2454 del self.index[rev:-1]
2464 del self.index[rev:-1]
2455 self._nodepos = None
2465 self._nodepos = None
2456
2466
2457 def checksize(self):
2467 def checksize(self):
2458 """Check size of index and data files
2468 """Check size of index and data files
2459
2469
2460 return a (dd, di) tuple.
2470 return a (dd, di) tuple.
2461 - dd: extra bytes for the "data" file
2471 - dd: extra bytes for the "data" file
2462 - di: extra bytes for the "index" file
2472 - di: extra bytes for the "index" file
2463
2473
2464 A healthy revlog will return (0, 0).
2474 A healthy revlog will return (0, 0).
2465 """
2475 """
2466 expected = 0
2476 expected = 0
2467 if len(self):
2477 if len(self):
2468 expected = max(0, self.end(len(self) - 1))
2478 expected = max(0, self.end(len(self) - 1))
2469
2479
2470 try:
2480 try:
2471 with self._datafp() as f:
2481 with self._datafp() as f:
2472 f.seek(0, io.SEEK_END)
2482 f.seek(0, io.SEEK_END)
2473 actual = f.tell()
2483 actual = f.tell()
2474 dd = actual - expected
2484 dd = actual - expected
2475 except IOError as inst:
2485 except IOError as inst:
2476 if inst.errno != errno.ENOENT:
2486 if inst.errno != errno.ENOENT:
2477 raise
2487 raise
2478 dd = 0
2488 dd = 0
2479
2489
2480 try:
2490 try:
2481 f = self.opener(self.indexfile)
2491 f = self.opener(self.indexfile)
2482 f.seek(0, io.SEEK_END)
2492 f.seek(0, io.SEEK_END)
2483 actual = f.tell()
2493 actual = f.tell()
2484 f.close()
2494 f.close()
2485 s = self._io.size
2495 s = self._io.size
2486 i = max(0, actual // s)
2496 i = max(0, actual // s)
2487 di = actual - (i * s)
2497 di = actual - (i * s)
2488 if self._inline:
2498 if self._inline:
2489 databytes = 0
2499 databytes = 0
2490 for r in self:
2500 for r in self:
2491 databytes += max(0, self.length(r))
2501 databytes += max(0, self.length(r))
2492 dd = 0
2502 dd = 0
2493 di = actual - len(self) * s - databytes
2503 di = actual - len(self) * s - databytes
2494 except IOError as inst:
2504 except IOError as inst:
2495 if inst.errno != errno.ENOENT:
2505 if inst.errno != errno.ENOENT:
2496 raise
2506 raise
2497 di = 0
2507 di = 0
2498
2508
2499 return (dd, di)
2509 return (dd, di)
2500
2510
2501 def files(self):
2511 def files(self):
2502 res = [self.indexfile]
2512 res = [self.indexfile]
2503 if not self._inline:
2513 if not self._inline:
2504 res.append(self.datafile)
2514 res.append(self.datafile)
2505 return res
2515 return res
2506
2516
2507 def emitrevisions(
2517 def emitrevisions(
2508 self,
2518 self,
2509 nodes,
2519 nodes,
2510 nodesorder=None,
2520 nodesorder=None,
2511 revisiondata=False,
2521 revisiondata=False,
2512 assumehaveparentrevisions=False,
2522 assumehaveparentrevisions=False,
2513 deltamode=repository.CG_DELTAMODE_STD,
2523 deltamode=repository.CG_DELTAMODE_STD,
2514 ):
2524 ):
2515 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2525 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2516 raise error.ProgrammingError(
2526 raise error.ProgrammingError(
2517 b'unhandled value for nodesorder: %s' % nodesorder
2527 b'unhandled value for nodesorder: %s' % nodesorder
2518 )
2528 )
2519
2529
2520 if nodesorder is None and not self._generaldelta:
2530 if nodesorder is None and not self._generaldelta:
2521 nodesorder = b'storage'
2531 nodesorder = b'storage'
2522
2532
2523 if (
2533 if (
2524 not self._storedeltachains
2534 not self._storedeltachains
2525 and deltamode != repository.CG_DELTAMODE_PREV
2535 and deltamode != repository.CG_DELTAMODE_PREV
2526 ):
2536 ):
2527 deltamode = repository.CG_DELTAMODE_FULL
2537 deltamode = repository.CG_DELTAMODE_FULL
2528
2538
2529 return storageutil.emitrevisions(
2539 return storageutil.emitrevisions(
2530 self,
2540 self,
2531 nodes,
2541 nodes,
2532 nodesorder,
2542 nodesorder,
2533 revlogrevisiondelta,
2543 revlogrevisiondelta,
2534 deltaparentfn=self.deltaparent,
2544 deltaparentfn=self.deltaparent,
2535 candeltafn=self.candelta,
2545 candeltafn=self.candelta,
2536 rawsizefn=self.rawsize,
2546 rawsizefn=self.rawsize,
2537 revdifffn=self.revdiff,
2547 revdifffn=self.revdiff,
2538 flagsfn=self.flags,
2548 flagsfn=self.flags,
2539 deltamode=deltamode,
2549 deltamode=deltamode,
2540 revisiondata=revisiondata,
2550 revisiondata=revisiondata,
2541 assumehaveparentrevisions=assumehaveparentrevisions,
2551 assumehaveparentrevisions=assumehaveparentrevisions,
2542 )
2552 )
2543
2553
2544 DELTAREUSEALWAYS = b'always'
2554 DELTAREUSEALWAYS = b'always'
2545 DELTAREUSESAMEREVS = b'samerevs'
2555 DELTAREUSESAMEREVS = b'samerevs'
2546 DELTAREUSENEVER = b'never'
2556 DELTAREUSENEVER = b'never'
2547
2557
2548 DELTAREUSEFULLADD = b'fulladd'
2558 DELTAREUSEFULLADD = b'fulladd'
2549
2559
2550 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2560 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2551
2561
2552 def clone(
2562 def clone(
2553 self,
2563 self,
2554 tr,
2564 tr,
2555 destrevlog,
2565 destrevlog,
2556 addrevisioncb=None,
2566 addrevisioncb=None,
2557 deltareuse=DELTAREUSESAMEREVS,
2567 deltareuse=DELTAREUSESAMEREVS,
2558 forcedeltabothparents=None,
2568 forcedeltabothparents=None,
2559 sidedatacompanion=None,
2569 sidedatacompanion=None,
2560 ):
2570 ):
2561 """Copy this revlog to another, possibly with format changes.
2571 """Copy this revlog to another, possibly with format changes.
2562
2572
2563 The destination revlog will contain the same revisions and nodes.
2573 The destination revlog will contain the same revisions and nodes.
2564 However, it may not be bit-for-bit identical due to e.g. delta encoding
2574 However, it may not be bit-for-bit identical due to e.g. delta encoding
2565 differences.
2575 differences.
2566
2576
2567 The ``deltareuse`` argument control how deltas from the existing revlog
2577 The ``deltareuse`` argument control how deltas from the existing revlog
2568 are preserved in the destination revlog. The argument can have the
2578 are preserved in the destination revlog. The argument can have the
2569 following values:
2579 following values:
2570
2580
2571 DELTAREUSEALWAYS
2581 DELTAREUSEALWAYS
2572 Deltas will always be reused (if possible), even if the destination
2582 Deltas will always be reused (if possible), even if the destination
2573 revlog would not select the same revisions for the delta. This is the
2583 revlog would not select the same revisions for the delta. This is the
2574 fastest mode of operation.
2584 fastest mode of operation.
2575 DELTAREUSESAMEREVS
2585 DELTAREUSESAMEREVS
2576 Deltas will be reused if the destination revlog would pick the same
2586 Deltas will be reused if the destination revlog would pick the same
2577 revisions for the delta. This mode strikes a balance between speed
2587 revisions for the delta. This mode strikes a balance between speed
2578 and optimization.
2588 and optimization.
2579 DELTAREUSENEVER
2589 DELTAREUSENEVER
2580 Deltas will never be reused. This is the slowest mode of execution.
2590 Deltas will never be reused. This is the slowest mode of execution.
2581 This mode can be used to recompute deltas (e.g. if the diff/delta
2591 This mode can be used to recompute deltas (e.g. if the diff/delta
2582 algorithm changes).
2592 algorithm changes).
2583 DELTAREUSEFULLADD
2593 DELTAREUSEFULLADD
2584 Revision will be re-added as if their were new content. This is
2594 Revision will be re-added as if their were new content. This is
2585 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2595 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2586 eg: large file detection and handling.
2596 eg: large file detection and handling.
2587
2597
2588 Delta computation can be slow, so the choice of delta reuse policy can
2598 Delta computation can be slow, so the choice of delta reuse policy can
2589 significantly affect run time.
2599 significantly affect run time.
2590
2600
2591 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2601 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2592 two extremes. Deltas will be reused if they are appropriate. But if the
2602 two extremes. Deltas will be reused if they are appropriate. But if the
2593 delta could choose a better revision, it will do so. This means if you
2603 delta could choose a better revision, it will do so. This means if you
2594 are converting a non-generaldelta revlog to a generaldelta revlog,
2604 are converting a non-generaldelta revlog to a generaldelta revlog,
2595 deltas will be recomputed if the delta's parent isn't a parent of the
2605 deltas will be recomputed if the delta's parent isn't a parent of the
2596 revision.
2606 revision.
2597
2607
2598 In addition to the delta policy, the ``forcedeltabothparents``
2608 In addition to the delta policy, the ``forcedeltabothparents``
2599 argument controls whether to force compute deltas against both parents
2609 argument controls whether to force compute deltas against both parents
2600 for merges. By default, the current default is used.
2610 for merges. By default, the current default is used.
2601
2611
2602 If not None, the `sidedatacompanion` is callable that accept two
2612 If not None, the `sidedatacompanion` is callable that accept two
2603 arguments:
2613 arguments:
2604
2614
2605 (srcrevlog, rev)
2615 (srcrevlog, rev)
2606
2616
2607 and return a triplet that control changes to sidedata content from the
2617 and return a triplet that control changes to sidedata content from the
2608 old revision to the new clone result:
2618 old revision to the new clone result:
2609
2619
2610 (dropall, filterout, update)
2620 (dropall, filterout, update)
2611
2621
2612 * if `dropall` is True, all sidedata should be dropped
2622 * if `dropall` is True, all sidedata should be dropped
2613 * `filterout` is a set of sidedata keys that should be dropped
2623 * `filterout` is a set of sidedata keys that should be dropped
2614 * `update` is a mapping of additionnal/new key -> value
2624 * `update` is a mapping of additionnal/new key -> value
2615 """
2625 """
2616 if deltareuse not in self.DELTAREUSEALL:
2626 if deltareuse not in self.DELTAREUSEALL:
2617 raise ValueError(
2627 raise ValueError(
2618 _(b'value for deltareuse invalid: %s') % deltareuse
2628 _(b'value for deltareuse invalid: %s') % deltareuse
2619 )
2629 )
2620
2630
2621 if len(destrevlog):
2631 if len(destrevlog):
2622 raise ValueError(_(b'destination revlog is not empty'))
2632 raise ValueError(_(b'destination revlog is not empty'))
2623
2633
2624 if getattr(self, 'filteredrevs', None):
2634 if getattr(self, 'filteredrevs', None):
2625 raise ValueError(_(b'source revlog has filtered revisions'))
2635 raise ValueError(_(b'source revlog has filtered revisions'))
2626 if getattr(destrevlog, 'filteredrevs', None):
2636 if getattr(destrevlog, 'filteredrevs', None):
2627 raise ValueError(_(b'destination revlog has filtered revisions'))
2637 raise ValueError(_(b'destination revlog has filtered revisions'))
2628
2638
2629 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2639 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2630 # if possible.
2640 # if possible.
2631 oldlazydelta = destrevlog._lazydelta
2641 oldlazydelta = destrevlog._lazydelta
2632 oldlazydeltabase = destrevlog._lazydeltabase
2642 oldlazydeltabase = destrevlog._lazydeltabase
2633 oldamd = destrevlog._deltabothparents
2643 oldamd = destrevlog._deltabothparents
2634
2644
2635 try:
2645 try:
2636 if deltareuse == self.DELTAREUSEALWAYS:
2646 if deltareuse == self.DELTAREUSEALWAYS:
2637 destrevlog._lazydeltabase = True
2647 destrevlog._lazydeltabase = True
2638 destrevlog._lazydelta = True
2648 destrevlog._lazydelta = True
2639 elif deltareuse == self.DELTAREUSESAMEREVS:
2649 elif deltareuse == self.DELTAREUSESAMEREVS:
2640 destrevlog._lazydeltabase = False
2650 destrevlog._lazydeltabase = False
2641 destrevlog._lazydelta = True
2651 destrevlog._lazydelta = True
2642 elif deltareuse == self.DELTAREUSENEVER:
2652 elif deltareuse == self.DELTAREUSENEVER:
2643 destrevlog._lazydeltabase = False
2653 destrevlog._lazydeltabase = False
2644 destrevlog._lazydelta = False
2654 destrevlog._lazydelta = False
2645
2655
2646 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2656 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2647
2657
2648 self._clone(
2658 self._clone(
2649 tr,
2659 tr,
2650 destrevlog,
2660 destrevlog,
2651 addrevisioncb,
2661 addrevisioncb,
2652 deltareuse,
2662 deltareuse,
2653 forcedeltabothparents,
2663 forcedeltabothparents,
2654 sidedatacompanion,
2664 sidedatacompanion,
2655 )
2665 )
2656
2666
2657 finally:
2667 finally:
2658 destrevlog._lazydelta = oldlazydelta
2668 destrevlog._lazydelta = oldlazydelta
2659 destrevlog._lazydeltabase = oldlazydeltabase
2669 destrevlog._lazydeltabase = oldlazydeltabase
2660 destrevlog._deltabothparents = oldamd
2670 destrevlog._deltabothparents = oldamd
2661
2671
2662 def _clone(
2672 def _clone(
2663 self,
2673 self,
2664 tr,
2674 tr,
2665 destrevlog,
2675 destrevlog,
2666 addrevisioncb,
2676 addrevisioncb,
2667 deltareuse,
2677 deltareuse,
2668 forcedeltabothparents,
2678 forcedeltabothparents,
2669 sidedatacompanion,
2679 sidedatacompanion,
2670 ):
2680 ):
2671 """perform the core duty of `revlog.clone` after parameter processing"""
2681 """perform the core duty of `revlog.clone` after parameter processing"""
2672 deltacomputer = deltautil.deltacomputer(destrevlog)
2682 deltacomputer = deltautil.deltacomputer(destrevlog)
2673 index = self.index
2683 index = self.index
2674 for rev in self:
2684 for rev in self:
2675 entry = index[rev]
2685 entry = index[rev]
2676
2686
2677 # Some classes override linkrev to take filtered revs into
2687 # Some classes override linkrev to take filtered revs into
2678 # account. Use raw entry from index.
2688 # account. Use raw entry from index.
2679 flags = entry[0] & 0xFFFF
2689 flags = entry[0] & 0xFFFF
2680 linkrev = entry[4]
2690 linkrev = entry[4]
2681 p1 = index[entry[5]][7]
2691 p1 = index[entry[5]][7]
2682 p2 = index[entry[6]][7]
2692 p2 = index[entry[6]][7]
2683 node = entry[7]
2693 node = entry[7]
2684
2694
2685 sidedataactions = (False, [], {})
2695 sidedataactions = (False, [], {})
2686 if sidedatacompanion is not None:
2696 if sidedatacompanion is not None:
2687 sidedataactions = sidedatacompanion(self, rev)
2697 sidedataactions = sidedatacompanion(self, rev)
2688
2698
2689 # (Possibly) reuse the delta from the revlog if allowed and
2699 # (Possibly) reuse the delta from the revlog if allowed and
2690 # the revlog chunk is a delta.
2700 # the revlog chunk is a delta.
2691 cachedelta = None
2701 cachedelta = None
2692 rawtext = None
2702 rawtext = None
2693 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2703 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2694 dropall, filterout, update = sidedataactions
2704 dropall, filterout, update = sidedataactions
2695 text, sidedata = self._revisiondata(rev)
2705 text, sidedata = self._revisiondata(rev)
2696 if dropall:
2706 if dropall:
2697 sidedata = {}
2707 sidedata = {}
2698 for key in filterout:
2708 for key in filterout:
2699 sidedata.pop(key, None)
2709 sidedata.pop(key, None)
2700 sidedata.update(update)
2710 sidedata.update(update)
2701 if not sidedata:
2711 if not sidedata:
2702 sidedata = None
2712 sidedata = None
2703 destrevlog.addrevision(
2713 destrevlog.addrevision(
2704 text,
2714 text,
2705 tr,
2715 tr,
2706 linkrev,
2716 linkrev,
2707 p1,
2717 p1,
2708 p2,
2718 p2,
2709 cachedelta=cachedelta,
2719 cachedelta=cachedelta,
2710 node=node,
2720 node=node,
2711 flags=flags,
2721 flags=flags,
2712 deltacomputer=deltacomputer,
2722 deltacomputer=deltacomputer,
2713 sidedata=sidedata,
2723 sidedata=sidedata,
2714 )
2724 )
2715 else:
2725 else:
2716 if destrevlog._lazydelta:
2726 if destrevlog._lazydelta:
2717 dp = self.deltaparent(rev)
2727 dp = self.deltaparent(rev)
2718 if dp != nullrev:
2728 if dp != nullrev:
2719 cachedelta = (dp, bytes(self._chunk(rev)))
2729 cachedelta = (dp, bytes(self._chunk(rev)))
2720
2730
2721 if not cachedelta:
2731 if not cachedelta:
2722 rawtext = self.rawdata(rev)
2732 rawtext = self.rawdata(rev)
2723
2733
2724 ifh = destrevlog.opener(
2734 ifh = destrevlog.opener(
2725 destrevlog.indexfile, b'a+', checkambig=False
2735 destrevlog.indexfile, b'a+', checkambig=False
2726 )
2736 )
2727 dfh = None
2737 dfh = None
2728 if not destrevlog._inline:
2738 if not destrevlog._inline:
2729 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2739 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2730 try:
2740 try:
2731 destrevlog._addrevision(
2741 destrevlog._addrevision(
2732 node,
2742 node,
2733 rawtext,
2743 rawtext,
2734 tr,
2744 tr,
2735 linkrev,
2745 linkrev,
2736 p1,
2746 p1,
2737 p2,
2747 p2,
2738 flags,
2748 flags,
2739 cachedelta,
2749 cachedelta,
2740 ifh,
2750 ifh,
2741 dfh,
2751 dfh,
2742 deltacomputer=deltacomputer,
2752 deltacomputer=deltacomputer,
2743 )
2753 )
2744 finally:
2754 finally:
2745 if dfh:
2755 if dfh:
2746 dfh.close()
2756 dfh.close()
2747 ifh.close()
2757 ifh.close()
2748
2758
2749 if addrevisioncb:
2759 if addrevisioncb:
2750 addrevisioncb(self, rev, node)
2760 addrevisioncb(self, rev, node)
2751
2761
2752 def censorrevision(self, tr, censornode, tombstone=b''):
2762 def censorrevision(self, tr, censornode, tombstone=b''):
2753 if (self.version & 0xFFFF) == REVLOGV0:
2763 if (self.version & 0xFFFF) == REVLOGV0:
2754 raise error.RevlogError(
2764 raise error.RevlogError(
2755 _(b'cannot censor with version %d revlogs') % self.version
2765 _(b'cannot censor with version %d revlogs') % self.version
2756 )
2766 )
2757
2767
2758 censorrev = self.rev(censornode)
2768 censorrev = self.rev(censornode)
2759 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2769 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2760
2770
2761 if len(tombstone) > self.rawsize(censorrev):
2771 if len(tombstone) > self.rawsize(censorrev):
2762 raise error.Abort(
2772 raise error.Abort(
2763 _(b'censor tombstone must be no longer than censored data')
2773 _(b'censor tombstone must be no longer than censored data')
2764 )
2774 )
2765
2775
2766 # Rewriting the revlog in place is hard. Our strategy for censoring is
2776 # Rewriting the revlog in place is hard. Our strategy for censoring is
2767 # to create a new revlog, copy all revisions to it, then replace the
2777 # to create a new revlog, copy all revisions to it, then replace the
2768 # revlogs on transaction close.
2778 # revlogs on transaction close.
2769
2779
2770 newindexfile = self.indexfile + b'.tmpcensored'
2780 newindexfile = self.indexfile + b'.tmpcensored'
2771 newdatafile = self.datafile + b'.tmpcensored'
2781 newdatafile = self.datafile + b'.tmpcensored'
2772
2782
2773 # This is a bit dangerous. We could easily have a mismatch of state.
2783 # This is a bit dangerous. We could easily have a mismatch of state.
2774 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2784 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2775 newrl.version = self.version
2785 newrl.version = self.version
2776 newrl._generaldelta = self._generaldelta
2786 newrl._generaldelta = self._generaldelta
2777 newrl._io = self._io
2787 newrl._io = self._io
2778
2788
2779 for rev in self.revs():
2789 for rev in self.revs():
2780 node = self.node(rev)
2790 node = self.node(rev)
2781 p1, p2 = self.parents(node)
2791 p1, p2 = self.parents(node)
2782
2792
2783 if rev == censorrev:
2793 if rev == censorrev:
2784 newrl.addrawrevision(
2794 newrl.addrawrevision(
2785 tombstone,
2795 tombstone,
2786 tr,
2796 tr,
2787 self.linkrev(censorrev),
2797 self.linkrev(censorrev),
2788 p1,
2798 p1,
2789 p2,
2799 p2,
2790 censornode,
2800 censornode,
2791 REVIDX_ISCENSORED,
2801 REVIDX_ISCENSORED,
2792 )
2802 )
2793
2803
2794 if newrl.deltaparent(rev) != nullrev:
2804 if newrl.deltaparent(rev) != nullrev:
2795 raise error.Abort(
2805 raise error.Abort(
2796 _(
2806 _(
2797 b'censored revision stored as delta; '
2807 b'censored revision stored as delta; '
2798 b'cannot censor'
2808 b'cannot censor'
2799 ),
2809 ),
2800 hint=_(
2810 hint=_(
2801 b'censoring of revlogs is not '
2811 b'censoring of revlogs is not '
2802 b'fully implemented; please report '
2812 b'fully implemented; please report '
2803 b'this bug'
2813 b'this bug'
2804 ),
2814 ),
2805 )
2815 )
2806 continue
2816 continue
2807
2817
2808 if self.iscensored(rev):
2818 if self.iscensored(rev):
2809 if self.deltaparent(rev) != nullrev:
2819 if self.deltaparent(rev) != nullrev:
2810 raise error.Abort(
2820 raise error.Abort(
2811 _(
2821 _(
2812 b'cannot censor due to censored '
2822 b'cannot censor due to censored '
2813 b'revision having delta stored'
2823 b'revision having delta stored'
2814 )
2824 )
2815 )
2825 )
2816 rawtext = self._chunk(rev)
2826 rawtext = self._chunk(rev)
2817 else:
2827 else:
2818 rawtext = self.rawdata(rev)
2828 rawtext = self.rawdata(rev)
2819
2829
2820 newrl.addrawrevision(
2830 newrl.addrawrevision(
2821 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2831 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2822 )
2832 )
2823
2833
2824 tr.addbackup(self.indexfile, location=b'store')
2834 tr.addbackup(self.indexfile, location=b'store')
2825 if not self._inline:
2835 if not self._inline:
2826 tr.addbackup(self.datafile, location=b'store')
2836 tr.addbackup(self.datafile, location=b'store')
2827
2837
2828 self.opener.rename(newrl.indexfile, self.indexfile)
2838 self.opener.rename(newrl.indexfile, self.indexfile)
2829 if not self._inline:
2839 if not self._inline:
2830 self.opener.rename(newrl.datafile, self.datafile)
2840 self.opener.rename(newrl.datafile, self.datafile)
2831
2841
2832 self.clearcaches()
2842 self.clearcaches()
2833 self._loadindex()
2843 self._loadindex()
2834
2844
2835 def verifyintegrity(self, state):
2845 def verifyintegrity(self, state):
2836 """Verifies the integrity of the revlog.
2846 """Verifies the integrity of the revlog.
2837
2847
2838 Yields ``revlogproblem`` instances describing problems that are
2848 Yields ``revlogproblem`` instances describing problems that are
2839 found.
2849 found.
2840 """
2850 """
2841 dd, di = self.checksize()
2851 dd, di = self.checksize()
2842 if dd:
2852 if dd:
2843 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2853 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2844 if di:
2854 if di:
2845 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2855 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2846
2856
2847 version = self.version & 0xFFFF
2857 version = self.version & 0xFFFF
2848
2858
2849 # The verifier tells us what version revlog we should be.
2859 # The verifier tells us what version revlog we should be.
2850 if version != state[b'expectedversion']:
2860 if version != state[b'expectedversion']:
2851 yield revlogproblem(
2861 yield revlogproblem(
2852 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2862 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2853 % (self.indexfile, version, state[b'expectedversion'])
2863 % (self.indexfile, version, state[b'expectedversion'])
2854 )
2864 )
2855
2865
2856 state[b'skipread'] = set()
2866 state[b'skipread'] = set()
2857
2867
2858 for rev in self:
2868 for rev in self:
2859 node = self.node(rev)
2869 node = self.node(rev)
2860
2870
2861 # Verify contents. 4 cases to care about:
2871 # Verify contents. 4 cases to care about:
2862 #
2872 #
2863 # common: the most common case
2873 # common: the most common case
2864 # rename: with a rename
2874 # rename: with a rename
2865 # meta: file content starts with b'\1\n', the metadata
2875 # meta: file content starts with b'\1\n', the metadata
2866 # header defined in filelog.py, but without a rename
2876 # header defined in filelog.py, but without a rename
2867 # ext: content stored externally
2877 # ext: content stored externally
2868 #
2878 #
2869 # More formally, their differences are shown below:
2879 # More formally, their differences are shown below:
2870 #
2880 #
2871 # | common | rename | meta | ext
2881 # | common | rename | meta | ext
2872 # -------------------------------------------------------
2882 # -------------------------------------------------------
2873 # flags() | 0 | 0 | 0 | not 0
2883 # flags() | 0 | 0 | 0 | not 0
2874 # renamed() | False | True | False | ?
2884 # renamed() | False | True | False | ?
2875 # rawtext[0:2]=='\1\n'| False | True | True | ?
2885 # rawtext[0:2]=='\1\n'| False | True | True | ?
2876 #
2886 #
2877 # "rawtext" means the raw text stored in revlog data, which
2887 # "rawtext" means the raw text stored in revlog data, which
2878 # could be retrieved by "rawdata(rev)". "text"
2888 # could be retrieved by "rawdata(rev)". "text"
2879 # mentioned below is "revision(rev)".
2889 # mentioned below is "revision(rev)".
2880 #
2890 #
2881 # There are 3 different lengths stored physically:
2891 # There are 3 different lengths stored physically:
2882 # 1. L1: rawsize, stored in revlog index
2892 # 1. L1: rawsize, stored in revlog index
2883 # 2. L2: len(rawtext), stored in revlog data
2893 # 2. L2: len(rawtext), stored in revlog data
2884 # 3. L3: len(text), stored in revlog data if flags==0, or
2894 # 3. L3: len(text), stored in revlog data if flags==0, or
2885 # possibly somewhere else if flags!=0
2895 # possibly somewhere else if flags!=0
2886 #
2896 #
2887 # L1 should be equal to L2. L3 could be different from them.
2897 # L1 should be equal to L2. L3 could be different from them.
2888 # "text" may or may not affect commit hash depending on flag
2898 # "text" may or may not affect commit hash depending on flag
2889 # processors (see flagutil.addflagprocessor).
2899 # processors (see flagutil.addflagprocessor).
2890 #
2900 #
2891 # | common | rename | meta | ext
2901 # | common | rename | meta | ext
2892 # -------------------------------------------------
2902 # -------------------------------------------------
2893 # rawsize() | L1 | L1 | L1 | L1
2903 # rawsize() | L1 | L1 | L1 | L1
2894 # size() | L1 | L2-LM | L1(*) | L1 (?)
2904 # size() | L1 | L2-LM | L1(*) | L1 (?)
2895 # len(rawtext) | L2 | L2 | L2 | L2
2905 # len(rawtext) | L2 | L2 | L2 | L2
2896 # len(text) | L2 | L2 | L2 | L3
2906 # len(text) | L2 | L2 | L2 | L3
2897 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2907 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2898 #
2908 #
2899 # LM: length of metadata, depending on rawtext
2909 # LM: length of metadata, depending on rawtext
2900 # (*): not ideal, see comment in filelog.size
2910 # (*): not ideal, see comment in filelog.size
2901 # (?): could be "- len(meta)" if the resolved content has
2911 # (?): could be "- len(meta)" if the resolved content has
2902 # rename metadata
2912 # rename metadata
2903 #
2913 #
2904 # Checks needed to be done:
2914 # Checks needed to be done:
2905 # 1. length check: L1 == L2, in all cases.
2915 # 1. length check: L1 == L2, in all cases.
2906 # 2. hash check: depending on flag processor, we may need to
2916 # 2. hash check: depending on flag processor, we may need to
2907 # use either "text" (external), or "rawtext" (in revlog).
2917 # use either "text" (external), or "rawtext" (in revlog).
2908
2918
2909 try:
2919 try:
2910 skipflags = state.get(b'skipflags', 0)
2920 skipflags = state.get(b'skipflags', 0)
2911 if skipflags:
2921 if skipflags:
2912 skipflags &= self.flags(rev)
2922 skipflags &= self.flags(rev)
2913
2923
2914 if skipflags:
2924 if skipflags:
2915 state[b'skipread'].add(node)
2925 state[b'skipread'].add(node)
2916 else:
2926 else:
2917 # Side-effect: read content and verify hash.
2927 # Side-effect: read content and verify hash.
2918 self.revision(node)
2928 self.revision(node)
2919
2929
2920 l1 = self.rawsize(rev)
2930 l1 = self.rawsize(rev)
2921 l2 = len(self.rawdata(node))
2931 l2 = len(self.rawdata(node))
2922
2932
2923 if l1 != l2:
2933 if l1 != l2:
2924 yield revlogproblem(
2934 yield revlogproblem(
2925 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2935 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2926 node=node,
2936 node=node,
2927 )
2937 )
2928
2938
2929 except error.CensoredNodeError:
2939 except error.CensoredNodeError:
2930 if state[b'erroroncensored']:
2940 if state[b'erroroncensored']:
2931 yield revlogproblem(
2941 yield revlogproblem(
2932 error=_(b'censored file data'), node=node
2942 error=_(b'censored file data'), node=node
2933 )
2943 )
2934 state[b'skipread'].add(node)
2944 state[b'skipread'].add(node)
2935 except Exception as e:
2945 except Exception as e:
2936 yield revlogproblem(
2946 yield revlogproblem(
2937 error=_(b'unpacking %s: %s')
2947 error=_(b'unpacking %s: %s')
2938 % (short(node), stringutil.forcebytestr(e)),
2948 % (short(node), stringutil.forcebytestr(e)),
2939 node=node,
2949 node=node,
2940 )
2950 )
2941 state[b'skipread'].add(node)
2951 state[b'skipread'].add(node)
2942
2952
2943 def storageinfo(
2953 def storageinfo(
2944 self,
2954 self,
2945 exclusivefiles=False,
2955 exclusivefiles=False,
2946 sharedfiles=False,
2956 sharedfiles=False,
2947 revisionscount=False,
2957 revisionscount=False,
2948 trackedsize=False,
2958 trackedsize=False,
2949 storedsize=False,
2959 storedsize=False,
2950 ):
2960 ):
2951 d = {}
2961 d = {}
2952
2962
2953 if exclusivefiles:
2963 if exclusivefiles:
2954 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2964 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2955 if not self._inline:
2965 if not self._inline:
2956 d[b'exclusivefiles'].append((self.opener, self.datafile))
2966 d[b'exclusivefiles'].append((self.opener, self.datafile))
2957
2967
2958 if sharedfiles:
2968 if sharedfiles:
2959 d[b'sharedfiles'] = []
2969 d[b'sharedfiles'] = []
2960
2970
2961 if revisionscount:
2971 if revisionscount:
2962 d[b'revisionscount'] = len(self)
2972 d[b'revisionscount'] = len(self)
2963
2973
2964 if trackedsize:
2974 if trackedsize:
2965 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2975 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2966
2976
2967 if storedsize:
2977 if storedsize:
2968 d[b'storedsize'] = sum(
2978 d[b'storedsize'] = sum(
2969 self.opener.stat(path).st_size for path in self.files()
2979 self.opener.stat(path).st_size for path in self.files()
2970 )
2980 )
2971
2981
2972 return d
2982 return d
General Comments 0
You need to be logged in to leave comments. Login now