##// END OF EJS Templates
revlogutils: move the NodeMap class in a dedicated nodemap module...
marmoute -
r44486:ab595920 default
parent child Browse files
Show More
@@ -0,0 +1,15 b''
1 # nodemap.py - nodemap related code and utilities
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
8
9 from __future__ import absolute_import
10 from .. import error
11
12
13 class NodeMap(dict):
14 def __missing__(self, x):
15 raise error.RevlogError(b'unknown node: %s' % x)
@@ -1,235 +1,236 b''
1 # parsers.py - Python implementation of parsers.c
1 # parsers.py - Python implementation of parsers.c
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11 import zlib
11 import zlib
12
12
13 from ..node import nullid, nullrev
13 from ..node import nullid, nullrev
14 from .. import (
14 from .. import (
15 pycompat,
15 pycompat,
16 revlogutils,
17 util,
16 util,
18 )
17 )
19
18
19 from ..revlogutils import nodemap as nodemaputil
20
20 stringio = pycompat.bytesio
21 stringio = pycompat.bytesio
21
22
22
23
23 _pack = struct.pack
24 _pack = struct.pack
24 _unpack = struct.unpack
25 _unpack = struct.unpack
25 _compress = zlib.compress
26 _compress = zlib.compress
26 _decompress = zlib.decompress
27 _decompress = zlib.decompress
27
28
28 # Some code below makes tuples directly because it's more convenient. However,
29 # Some code below makes tuples directly because it's more convenient. However,
29 # code outside this module should always use dirstatetuple.
30 # code outside this module should always use dirstatetuple.
30 def dirstatetuple(*x):
31 def dirstatetuple(*x):
31 # x is a tuple
32 # x is a tuple
32 return x
33 return x
33
34
34
35
35 indexformatng = b">Qiiiiii20s12x"
36 indexformatng = b">Qiiiiii20s12x"
36 indexfirst = struct.calcsize(b'Q')
37 indexfirst = struct.calcsize(b'Q')
37 sizeint = struct.calcsize(b'i')
38 sizeint = struct.calcsize(b'i')
38 indexsize = struct.calcsize(indexformatng)
39 indexsize = struct.calcsize(indexformatng)
39
40
40
41
41 def gettype(q):
42 def gettype(q):
42 return int(q & 0xFFFF)
43 return int(q & 0xFFFF)
43
44
44
45
45 def offset_type(offset, type):
46 def offset_type(offset, type):
46 return int(int(offset) << 16 | type)
47 return int(int(offset) << 16 | type)
47
48
48
49
49 class BaseIndexObject(object):
50 class BaseIndexObject(object):
50 @property
51 @property
51 def nodemap(self):
52 def nodemap(self):
52 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
53 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
53 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
54 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
54 return self._nodemap
55 return self._nodemap
55
56
56 @util.propertycache
57 @util.propertycache
57 def _nodemap(self):
58 def _nodemap(self):
58 nodemap = revlogutils.NodeMap({nullid: nullrev})
59 nodemap = nodemaputil.NodeMap({nullid: nullrev})
59 for r in range(0, len(self)):
60 for r in range(0, len(self)):
60 n = self[r][7]
61 n = self[r][7]
61 nodemap[n] = r
62 nodemap[n] = r
62 return nodemap
63 return nodemap
63
64
64 def has_node(self, node):
65 def has_node(self, node):
65 """return True if the node exist in the index"""
66 """return True if the node exist in the index"""
66 return node in self._nodemap
67 return node in self._nodemap
67
68
68 def rev(self, node):
69 def rev(self, node):
69 """return a revision for a node
70 """return a revision for a node
70
71
71 If the node is unknown, raise a RevlogError"""
72 If the node is unknown, raise a RevlogError"""
72 return self._nodemap[node]
73 return self._nodemap[node]
73
74
74 def get_rev(self, node):
75 def get_rev(self, node):
75 """return a revision for a node
76 """return a revision for a node
76
77
77 If the node is unknown, return None"""
78 If the node is unknown, return None"""
78 return self._nodemap.get(node)
79 return self._nodemap.get(node)
79
80
80 def _stripnodes(self, start):
81 def _stripnodes(self, start):
81 if '_nodemap' in vars(self):
82 if '_nodemap' in vars(self):
82 for r in range(start, len(self)):
83 for r in range(start, len(self)):
83 n = self[r][7]
84 n = self[r][7]
84 del self._nodemap[n]
85 del self._nodemap[n]
85
86
86 def clearcaches(self):
87 def clearcaches(self):
87 self.__dict__.pop('_nodemap', None)
88 self.__dict__.pop('_nodemap', None)
88
89
89 def __len__(self):
90 def __len__(self):
90 return self._lgt + len(self._extra)
91 return self._lgt + len(self._extra)
91
92
92 def append(self, tup):
93 def append(self, tup):
93 if '_nodemap' in vars(self):
94 if '_nodemap' in vars(self):
94 self._nodemap[tup[7]] = len(self)
95 self._nodemap[tup[7]] = len(self)
95 self._extra.append(tup)
96 self._extra.append(tup)
96
97
97 def _check_index(self, i):
98 def _check_index(self, i):
98 if not isinstance(i, int):
99 if not isinstance(i, int):
99 raise TypeError(b"expecting int indexes")
100 raise TypeError(b"expecting int indexes")
100 if i < 0 or i >= len(self):
101 if i < 0 or i >= len(self):
101 raise IndexError
102 raise IndexError
102
103
103 def __getitem__(self, i):
104 def __getitem__(self, i):
104 if i == -1:
105 if i == -1:
105 return (0, 0, 0, -1, -1, -1, -1, nullid)
106 return (0, 0, 0, -1, -1, -1, -1, nullid)
106 self._check_index(i)
107 self._check_index(i)
107 if i >= self._lgt:
108 if i >= self._lgt:
108 return self._extra[i - self._lgt]
109 return self._extra[i - self._lgt]
109 index = self._calculate_index(i)
110 index = self._calculate_index(i)
110 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
111 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
111 if i == 0:
112 if i == 0:
112 e = list(r)
113 e = list(r)
113 type = gettype(e[0])
114 type = gettype(e[0])
114 e[0] = offset_type(0, type)
115 e[0] = offset_type(0, type)
115 return tuple(e)
116 return tuple(e)
116 return r
117 return r
117
118
118
119
119 class IndexObject(BaseIndexObject):
120 class IndexObject(BaseIndexObject):
120 def __init__(self, data):
121 def __init__(self, data):
121 assert len(data) % indexsize == 0
122 assert len(data) % indexsize == 0
122 self._data = data
123 self._data = data
123 self._lgt = len(data) // indexsize
124 self._lgt = len(data) // indexsize
124 self._extra = []
125 self._extra = []
125
126
126 def _calculate_index(self, i):
127 def _calculate_index(self, i):
127 return i * indexsize
128 return i * indexsize
128
129
129 def __delitem__(self, i):
130 def __delitem__(self, i):
130 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
131 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
131 raise ValueError(b"deleting slices only supports a:-1 with step 1")
132 raise ValueError(b"deleting slices only supports a:-1 with step 1")
132 i = i.start
133 i = i.start
133 self._check_index(i)
134 self._check_index(i)
134 self._stripnodes(i)
135 self._stripnodes(i)
135 if i < self._lgt:
136 if i < self._lgt:
136 self._data = self._data[: i * indexsize]
137 self._data = self._data[: i * indexsize]
137 self._lgt = i
138 self._lgt = i
138 self._extra = []
139 self._extra = []
139 else:
140 else:
140 self._extra = self._extra[: i - self._lgt]
141 self._extra = self._extra[: i - self._lgt]
141
142
142
143
143 class InlinedIndexObject(BaseIndexObject):
144 class InlinedIndexObject(BaseIndexObject):
144 def __init__(self, data, inline=0):
145 def __init__(self, data, inline=0):
145 self._data = data
146 self._data = data
146 self._lgt = self._inline_scan(None)
147 self._lgt = self._inline_scan(None)
147 self._inline_scan(self._lgt)
148 self._inline_scan(self._lgt)
148 self._extra = []
149 self._extra = []
149
150
150 def _inline_scan(self, lgt):
151 def _inline_scan(self, lgt):
151 off = 0
152 off = 0
152 if lgt is not None:
153 if lgt is not None:
153 self._offsets = [0] * lgt
154 self._offsets = [0] * lgt
154 count = 0
155 count = 0
155 while off <= len(self._data) - indexsize:
156 while off <= len(self._data) - indexsize:
156 (s,) = struct.unpack(
157 (s,) = struct.unpack(
157 b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
158 b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
158 )
159 )
159 if lgt is not None:
160 if lgt is not None:
160 self._offsets[count] = off
161 self._offsets[count] = off
161 count += 1
162 count += 1
162 off += indexsize + s
163 off += indexsize + s
163 if off != len(self._data):
164 if off != len(self._data):
164 raise ValueError(b"corrupted data")
165 raise ValueError(b"corrupted data")
165 return count
166 return count
166
167
167 def __delitem__(self, i):
168 def __delitem__(self, i):
168 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
169 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
169 raise ValueError(b"deleting slices only supports a:-1 with step 1")
170 raise ValueError(b"deleting slices only supports a:-1 with step 1")
170 i = i.start
171 i = i.start
171 self._check_index(i)
172 self._check_index(i)
172 self._stripnodes(i)
173 self._stripnodes(i)
173 if i < self._lgt:
174 if i < self._lgt:
174 self._offsets = self._offsets[:i]
175 self._offsets = self._offsets[:i]
175 self._lgt = i
176 self._lgt = i
176 self._extra = []
177 self._extra = []
177 else:
178 else:
178 self._extra = self._extra[: i - self._lgt]
179 self._extra = self._extra[: i - self._lgt]
179
180
180 def _calculate_index(self, i):
181 def _calculate_index(self, i):
181 return self._offsets[i]
182 return self._offsets[i]
182
183
183
184
184 def parse_index2(data, inline):
185 def parse_index2(data, inline):
185 if not inline:
186 if not inline:
186 return IndexObject(data), None
187 return IndexObject(data), None
187 return InlinedIndexObject(data, inline), (0, data)
188 return InlinedIndexObject(data, inline), (0, data)
188
189
189
190
190 def parse_dirstate(dmap, copymap, st):
191 def parse_dirstate(dmap, copymap, st):
191 parents = [st[:20], st[20:40]]
192 parents = [st[:20], st[20:40]]
192 # dereference fields so they will be local in loop
193 # dereference fields so they will be local in loop
193 format = b">cllll"
194 format = b">cllll"
194 e_size = struct.calcsize(format)
195 e_size = struct.calcsize(format)
195 pos1 = 40
196 pos1 = 40
196 l = len(st)
197 l = len(st)
197
198
198 # the inner loop
199 # the inner loop
199 while pos1 < l:
200 while pos1 < l:
200 pos2 = pos1 + e_size
201 pos2 = pos1 + e_size
201 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
202 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
202 pos1 = pos2 + e[4]
203 pos1 = pos2 + e[4]
203 f = st[pos2:pos1]
204 f = st[pos2:pos1]
204 if b'\0' in f:
205 if b'\0' in f:
205 f, c = f.split(b'\0')
206 f, c = f.split(b'\0')
206 copymap[f] = c
207 copymap[f] = c
207 dmap[f] = e[:4]
208 dmap[f] = e[:4]
208 return parents
209 return parents
209
210
210
211
211 def pack_dirstate(dmap, copymap, pl, now):
212 def pack_dirstate(dmap, copymap, pl, now):
212 now = int(now)
213 now = int(now)
213 cs = stringio()
214 cs = stringio()
214 write = cs.write
215 write = cs.write
215 write(b"".join(pl))
216 write(b"".join(pl))
216 for f, e in pycompat.iteritems(dmap):
217 for f, e in pycompat.iteritems(dmap):
217 if e[0] == b'n' and e[3] == now:
218 if e[0] == b'n' and e[3] == now:
218 # The file was last modified "simultaneously" with the current
219 # The file was last modified "simultaneously" with the current
219 # write to dirstate (i.e. within the same second for file-
220 # write to dirstate (i.e. within the same second for file-
220 # systems with a granularity of 1 sec). This commonly happens
221 # systems with a granularity of 1 sec). This commonly happens
221 # for at least a couple of files on 'update'.
222 # for at least a couple of files on 'update'.
222 # The user could change the file without changing its size
223 # The user could change the file without changing its size
223 # within the same second. Invalidate the file's mtime in
224 # within the same second. Invalidate the file's mtime in
224 # dirstate, forcing future 'status' calls to compare the
225 # dirstate, forcing future 'status' calls to compare the
225 # contents of the file if the size is the same. This prevents
226 # contents of the file if the size is the same. This prevents
226 # mistakenly treating such files as clean.
227 # mistakenly treating such files as clean.
227 e = dirstatetuple(e[0], e[1], e[2], -1)
228 e = dirstatetuple(e[0], e[1], e[2], -1)
228 dmap[f] = e
229 dmap[f] = e
229
230
230 if f in copymap:
231 if f in copymap:
231 f = b"%s\0%s" % (f, copymap[f])
232 f = b"%s\0%s" % (f, copymap[f])
232 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
233 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
233 write(e)
234 write(e)
234 write(f)
235 write(f)
235 return cs.getvalue()
236 return cs.getvalue()
@@ -1,2988 +1,2988 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_SIDEDATA,
58 REVIDX_SIDEDATA,
59 )
59 )
60 from .thirdparty import attr
60 from .thirdparty import attr
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 revlogutils,
69 templatefilters,
68 templatefilters,
70 util,
69 util,
71 )
70 )
72 from .interfaces import (
71 from .interfaces import (
73 repository,
72 repository,
74 util as interfaceutil,
73 util as interfaceutil,
75 )
74 )
76 from .revlogutils import (
75 from .revlogutils import (
77 deltas as deltautil,
76 deltas as deltautil,
78 flagutil,
77 flagutil,
78 nodemap as nodemaputil,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88 REVLOGV0
88 REVLOGV0
89 REVLOGV1
89 REVLOGV1
90 REVLOGV2
90 REVLOGV2
91 FLAG_INLINE_DATA
91 FLAG_INLINE_DATA
92 FLAG_GENERALDELTA
92 FLAG_GENERALDELTA
93 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_VERSION
95 REVLOG_DEFAULT_VERSION
96 REVLOGV1_FLAGS
96 REVLOGV1_FLAGS
97 REVLOGV2_FLAGS
97 REVLOGV2_FLAGS
98 REVIDX_ISCENSORED
98 REVIDX_ISCENSORED
99 REVIDX_ELLIPSIS
99 REVIDX_ELLIPSIS
100 REVIDX_SIDEDATA
100 REVIDX_SIDEDATA
101 REVIDX_EXTSTORED
101 REVIDX_EXTSTORED
102 REVIDX_DEFAULT_FLAGS
102 REVIDX_DEFAULT_FLAGS
103 REVIDX_FLAGS_ORDER
103 REVIDX_FLAGS_ORDER
104 REVIDX_RAWTEXT_CHANGING_FLAGS
104 REVIDX_RAWTEXT_CHANGING_FLAGS
105
105
106 parsers = policy.importmod('parsers')
106 parsers = policy.importmod('parsers')
107 rustancestor = policy.importrust('ancestor')
107 rustancestor = policy.importrust('ancestor')
108 rustdagop = policy.importrust('dagop')
108 rustdagop = policy.importrust('dagop')
109 rustrevlog = policy.importrust('revlog')
109 rustrevlog = policy.importrust('revlog')
110
110
111 # Aliased for performance.
111 # Aliased for performance.
112 _zlibdecompress = zlib.decompress
112 _zlibdecompress = zlib.decompress
113
113
114 # max size of revlog with inline data
114 # max size of revlog with inline data
115 _maxinline = 131072
115 _maxinline = 131072
116 _chunksize = 1048576
116 _chunksize = 1048576
117
117
118 # Flag processors for REVIDX_ELLIPSIS.
118 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
119 def ellipsisreadprocessor(rl, text):
120 return text, False, {}
120 return text, False, {}
121
121
122
122
123 def ellipsiswriteprocessor(rl, text, sidedata):
123 def ellipsiswriteprocessor(rl, text, sidedata):
124 return text, False
124 return text, False
125
125
126
126
127 def ellipsisrawprocessor(rl, text):
127 def ellipsisrawprocessor(rl, text):
128 return False
128 return False
129
129
130
130
131 ellipsisprocessor = (
131 ellipsisprocessor = (
132 ellipsisreadprocessor,
132 ellipsisreadprocessor,
133 ellipsiswriteprocessor,
133 ellipsiswriteprocessor,
134 ellipsisrawprocessor,
134 ellipsisrawprocessor,
135 )
135 )
136
136
137
137
138 def getoffset(q):
138 def getoffset(q):
139 return int(q >> 16)
139 return int(q >> 16)
140
140
141
141
142 def gettype(q):
142 def gettype(q):
143 return int(q & 0xFFFF)
143 return int(q & 0xFFFF)
144
144
145
145
146 def offset_type(offset, type):
146 def offset_type(offset, type):
147 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 raise ValueError(b'unknown revlog index flags')
148 raise ValueError(b'unknown revlog index flags')
149 return int(int(offset) << 16 | type)
149 return int(int(offset) << 16 | type)
150
150
151
151
152 def _verify_revision(rl, skipflags, state, node):
152 def _verify_revision(rl, skipflags, state, node):
153 """Verify the integrity of the given revlog ``node`` while providing a hook
153 """Verify the integrity of the given revlog ``node`` while providing a hook
154 point for extensions to influence the operation."""
154 point for extensions to influence the operation."""
155 if skipflags:
155 if skipflags:
156 state[b'skipread'].add(node)
156 state[b'skipread'].add(node)
157 else:
157 else:
158 # Side-effect: read content and verify hash.
158 # Side-effect: read content and verify hash.
159 rl.revision(node)
159 rl.revision(node)
160
160
161
161
162 @attr.s(slots=True, frozen=True)
162 @attr.s(slots=True, frozen=True)
163 class _revisioninfo(object):
163 class _revisioninfo(object):
164 """Information about a revision that allows building its fulltext
164 """Information about a revision that allows building its fulltext
165 node: expected hash of the revision
165 node: expected hash of the revision
166 p1, p2: parent revs of the revision
166 p1, p2: parent revs of the revision
167 btext: built text cache consisting of a one-element list
167 btext: built text cache consisting of a one-element list
168 cachedelta: (baserev, uncompressed_delta) or None
168 cachedelta: (baserev, uncompressed_delta) or None
169 flags: flags associated to the revision storage
169 flags: flags associated to the revision storage
170
170
171 One of btext[0] or cachedelta must be set.
171 One of btext[0] or cachedelta must be set.
172 """
172 """
173
173
174 node = attr.ib()
174 node = attr.ib()
175 p1 = attr.ib()
175 p1 = attr.ib()
176 p2 = attr.ib()
176 p2 = attr.ib()
177 btext = attr.ib()
177 btext = attr.ib()
178 textlen = attr.ib()
178 textlen = attr.ib()
179 cachedelta = attr.ib()
179 cachedelta = attr.ib()
180 flags = attr.ib()
180 flags = attr.ib()
181
181
182
182
183 @interfaceutil.implementer(repository.irevisiondelta)
183 @interfaceutil.implementer(repository.irevisiondelta)
184 @attr.s(slots=True)
184 @attr.s(slots=True)
185 class revlogrevisiondelta(object):
185 class revlogrevisiondelta(object):
186 node = attr.ib()
186 node = attr.ib()
187 p1node = attr.ib()
187 p1node = attr.ib()
188 p2node = attr.ib()
188 p2node = attr.ib()
189 basenode = attr.ib()
189 basenode = attr.ib()
190 flags = attr.ib()
190 flags = attr.ib()
191 baserevisionsize = attr.ib()
191 baserevisionsize = attr.ib()
192 revision = attr.ib()
192 revision = attr.ib()
193 delta = attr.ib()
193 delta = attr.ib()
194 linknode = attr.ib(default=None)
194 linknode = attr.ib(default=None)
195
195
196
196
197 @interfaceutil.implementer(repository.iverifyproblem)
197 @interfaceutil.implementer(repository.iverifyproblem)
198 @attr.s(frozen=True)
198 @attr.s(frozen=True)
199 class revlogproblem(object):
199 class revlogproblem(object):
200 warning = attr.ib(default=None)
200 warning = attr.ib(default=None)
201 error = attr.ib(default=None)
201 error = attr.ib(default=None)
202 node = attr.ib(default=None)
202 node = attr.ib(default=None)
203
203
204
204
205 # index v0:
205 # index v0:
206 # 4 bytes: offset
206 # 4 bytes: offset
207 # 4 bytes: compressed length
207 # 4 bytes: compressed length
208 # 4 bytes: base rev
208 # 4 bytes: base rev
209 # 4 bytes: link rev
209 # 4 bytes: link rev
210 # 20 bytes: parent 1 nodeid
210 # 20 bytes: parent 1 nodeid
211 # 20 bytes: parent 2 nodeid
211 # 20 bytes: parent 2 nodeid
212 # 20 bytes: nodeid
212 # 20 bytes: nodeid
213 indexformatv0 = struct.Struct(b">4l20s20s20s")
213 indexformatv0 = struct.Struct(b">4l20s20s20s")
214 indexformatv0_pack = indexformatv0.pack
214 indexformatv0_pack = indexformatv0.pack
215 indexformatv0_unpack = indexformatv0.unpack
215 indexformatv0_unpack = indexformatv0.unpack
216
216
217
217
218 class revlogoldindex(list):
218 class revlogoldindex(list):
219 @property
219 @property
220 def nodemap(self):
220 def nodemap(self):
221 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
221 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
222 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
222 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
223 return self._nodemap
223 return self._nodemap
224
224
225 @util.propertycache
225 @util.propertycache
226 def _nodemap(self):
226 def _nodemap(self):
227 nodemap = revlogutils.NodeMap({nullid: nullrev})
227 nodemap = nodemaputil.NodeMap({nullid: nullrev})
228 for r in range(0, len(self)):
228 for r in range(0, len(self)):
229 n = self[r][7]
229 n = self[r][7]
230 nodemap[n] = r
230 nodemap[n] = r
231 return nodemap
231 return nodemap
232
232
233 def has_node(self, node):
233 def has_node(self, node):
234 """return True if the node exist in the index"""
234 """return True if the node exist in the index"""
235 return node in self._nodemap
235 return node in self._nodemap
236
236
237 def rev(self, node):
237 def rev(self, node):
238 """return a revision for a node
238 """return a revision for a node
239
239
240 If the node is unknown, raise a RevlogError"""
240 If the node is unknown, raise a RevlogError"""
241 return self._nodemap[node]
241 return self._nodemap[node]
242
242
243 def get_rev(self, node):
243 def get_rev(self, node):
244 """return a revision for a node
244 """return a revision for a node
245
245
246 If the node is unknown, return None"""
246 If the node is unknown, return None"""
247 return self._nodemap.get(node)
247 return self._nodemap.get(node)
248
248
249 def append(self, tup):
249 def append(self, tup):
250 self._nodemap[tup[7]] = len(self)
250 self._nodemap[tup[7]] = len(self)
251 super(revlogoldindex, self).append(tup)
251 super(revlogoldindex, self).append(tup)
252
252
253 def __delitem__(self, i):
253 def __delitem__(self, i):
254 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
254 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
255 raise ValueError(b"deleting slices only supports a:-1 with step 1")
255 raise ValueError(b"deleting slices only supports a:-1 with step 1")
256 for r in pycompat.xrange(i.start, len(self)):
256 for r in pycompat.xrange(i.start, len(self)):
257 del self._nodemap[self[r][7]]
257 del self._nodemap[self[r][7]]
258 super(revlogoldindex, self).__delitem__(i)
258 super(revlogoldindex, self).__delitem__(i)
259
259
260 def clearcaches(self):
260 def clearcaches(self):
261 self.__dict__.pop('_nodemap', None)
261 self.__dict__.pop('_nodemap', None)
262
262
263 def __getitem__(self, i):
263 def __getitem__(self, i):
264 if i == -1:
264 if i == -1:
265 return (0, 0, 0, -1, -1, -1, -1, nullid)
265 return (0, 0, 0, -1, -1, -1, -1, nullid)
266 return list.__getitem__(self, i)
266 return list.__getitem__(self, i)
267
267
268
268
269 class revlogoldio(object):
269 class revlogoldio(object):
270 def __init__(self):
270 def __init__(self):
271 self.size = indexformatv0.size
271 self.size = indexformatv0.size
272
272
273 def parseindex(self, data, inline):
273 def parseindex(self, data, inline):
274 s = self.size
274 s = self.size
275 index = []
275 index = []
276 nodemap = revlogutils.NodeMap({nullid: nullrev})
276 nodemap = nodemaputil.NodeMap({nullid: nullrev})
277 n = off = 0
277 n = off = 0
278 l = len(data)
278 l = len(data)
279 while off + s <= l:
279 while off + s <= l:
280 cur = data[off : off + s]
280 cur = data[off : off + s]
281 off += s
281 off += s
282 e = indexformatv0_unpack(cur)
282 e = indexformatv0_unpack(cur)
283 # transform to revlogv1 format
283 # transform to revlogv1 format
284 e2 = (
284 e2 = (
285 offset_type(e[0], 0),
285 offset_type(e[0], 0),
286 e[1],
286 e[1],
287 -1,
287 -1,
288 e[2],
288 e[2],
289 e[3],
289 e[3],
290 nodemap.get(e[4], nullrev),
290 nodemap.get(e[4], nullrev),
291 nodemap.get(e[5], nullrev),
291 nodemap.get(e[5], nullrev),
292 e[6],
292 e[6],
293 )
293 )
294 index.append(e2)
294 index.append(e2)
295 nodemap[e[6]] = n
295 nodemap[e[6]] = n
296 n += 1
296 n += 1
297
297
298 index = revlogoldindex(index)
298 index = revlogoldindex(index)
299 return index, None
299 return index, None
300
300
301 def packentry(self, entry, node, version, rev):
301 def packentry(self, entry, node, version, rev):
302 if gettype(entry[0]):
302 if gettype(entry[0]):
303 raise error.RevlogError(
303 raise error.RevlogError(
304 _(b'index entry flags need revlog version 1')
304 _(b'index entry flags need revlog version 1')
305 )
305 )
306 e2 = (
306 e2 = (
307 getoffset(entry[0]),
307 getoffset(entry[0]),
308 entry[1],
308 entry[1],
309 entry[3],
309 entry[3],
310 entry[4],
310 entry[4],
311 node(entry[5]),
311 node(entry[5]),
312 node(entry[6]),
312 node(entry[6]),
313 entry[7],
313 entry[7],
314 )
314 )
315 return indexformatv0_pack(*e2)
315 return indexformatv0_pack(*e2)
316
316
317
317
318 # index ng:
318 # index ng:
319 # 6 bytes: offset
319 # 6 bytes: offset
320 # 2 bytes: flags
320 # 2 bytes: flags
321 # 4 bytes: compressed length
321 # 4 bytes: compressed length
322 # 4 bytes: uncompressed length
322 # 4 bytes: uncompressed length
323 # 4 bytes: base rev
323 # 4 bytes: base rev
324 # 4 bytes: link rev
324 # 4 bytes: link rev
325 # 4 bytes: parent 1 rev
325 # 4 bytes: parent 1 rev
326 # 4 bytes: parent 2 rev
326 # 4 bytes: parent 2 rev
327 # 32 bytes: nodeid
327 # 32 bytes: nodeid
328 indexformatng = struct.Struct(b">Qiiiiii20s12x")
328 indexformatng = struct.Struct(b">Qiiiiii20s12x")
329 indexformatng_pack = indexformatng.pack
329 indexformatng_pack = indexformatng.pack
330 versionformat = struct.Struct(b">I")
330 versionformat = struct.Struct(b">I")
331 versionformat_pack = versionformat.pack
331 versionformat_pack = versionformat.pack
332 versionformat_unpack = versionformat.unpack
332 versionformat_unpack = versionformat.unpack
333
333
334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
335 # signed integer)
335 # signed integer)
336 _maxentrysize = 0x7FFFFFFF
336 _maxentrysize = 0x7FFFFFFF
337
337
338
338
339 class revlogio(object):
339 class revlogio(object):
340 def __init__(self):
340 def __init__(self):
341 self.size = indexformatng.size
341 self.size = indexformatng.size
342
342
343 def parseindex(self, data, inline):
343 def parseindex(self, data, inline):
344 # call the C implementation to parse the index data
344 # call the C implementation to parse the index data
345 index, cache = parsers.parse_index2(data, inline)
345 index, cache = parsers.parse_index2(data, inline)
346 return index, cache
346 return index, cache
347
347
348 def packentry(self, entry, node, version, rev):
348 def packentry(self, entry, node, version, rev):
349 p = indexformatng_pack(*entry)
349 p = indexformatng_pack(*entry)
350 if rev == 0:
350 if rev == 0:
351 p = versionformat_pack(version) + p[4:]
351 p = versionformat_pack(version) + p[4:]
352 return p
352 return p
353
353
354
354
355 class rustrevlogio(revlogio):
355 class rustrevlogio(revlogio):
356 def parseindex(self, data, inline):
356 def parseindex(self, data, inline):
357 index, cache = super(rustrevlogio, self).parseindex(data, inline)
357 index, cache = super(rustrevlogio, self).parseindex(data, inline)
358 return rustrevlog.MixedIndex(index), cache
358 return rustrevlog.MixedIndex(index), cache
359
359
360
360
361 class revlog(object):
361 class revlog(object):
362 """
362 """
363 the underlying revision storage object
363 the underlying revision storage object
364
364
365 A revlog consists of two parts, an index and the revision data.
365 A revlog consists of two parts, an index and the revision data.
366
366
367 The index is a file with a fixed record size containing
367 The index is a file with a fixed record size containing
368 information on each revision, including its nodeid (hash), the
368 information on each revision, including its nodeid (hash), the
369 nodeids of its parents, the position and offset of its data within
369 nodeids of its parents, the position and offset of its data within
370 the data file, and the revision it's based on. Finally, each entry
370 the data file, and the revision it's based on. Finally, each entry
371 contains a linkrev entry that can serve as a pointer to external
371 contains a linkrev entry that can serve as a pointer to external
372 data.
372 data.
373
373
374 The revision data itself is a linear collection of data chunks.
374 The revision data itself is a linear collection of data chunks.
375 Each chunk represents a revision and is usually represented as a
375 Each chunk represents a revision and is usually represented as a
376 delta against the previous chunk. To bound lookup time, runs of
376 delta against the previous chunk. To bound lookup time, runs of
377 deltas are limited to about 2 times the length of the original
377 deltas are limited to about 2 times the length of the original
378 version data. This makes retrieval of a version proportional to
378 version data. This makes retrieval of a version proportional to
379 its size, or O(1) relative to the number of revisions.
379 its size, or O(1) relative to the number of revisions.
380
380
381 Both pieces of the revlog are written to in an append-only
381 Both pieces of the revlog are written to in an append-only
382 fashion, which means we never need to rewrite a file to insert or
382 fashion, which means we never need to rewrite a file to insert or
383 remove data, and can use some simple techniques to avoid the need
383 remove data, and can use some simple techniques to avoid the need
384 for locking while reading.
384 for locking while reading.
385
385
386 If checkambig, indexfile is opened with checkambig=True at
386 If checkambig, indexfile is opened with checkambig=True at
387 writing, to avoid file stat ambiguity.
387 writing, to avoid file stat ambiguity.
388
388
389 If mmaplargeindex is True, and an mmapindexthreshold is set, the
389 If mmaplargeindex is True, and an mmapindexthreshold is set, the
390 index will be mmapped rather than read if it is larger than the
390 index will be mmapped rather than read if it is larger than the
391 configured threshold.
391 configured threshold.
392
392
393 If censorable is True, the revlog can have censored revisions.
393 If censorable is True, the revlog can have censored revisions.
394
394
395 If `upperboundcomp` is not None, this is the expected maximal gain from
395 If `upperboundcomp` is not None, this is the expected maximal gain from
396 compression for the data content.
396 compression for the data content.
397 """
397 """
398
398
399 _flagserrorclass = error.RevlogError
399 _flagserrorclass = error.RevlogError
400
400
401 def __init__(
401 def __init__(
402 self,
402 self,
403 opener,
403 opener,
404 indexfile,
404 indexfile,
405 datafile=None,
405 datafile=None,
406 checkambig=False,
406 checkambig=False,
407 mmaplargeindex=False,
407 mmaplargeindex=False,
408 censorable=False,
408 censorable=False,
409 upperboundcomp=None,
409 upperboundcomp=None,
410 ):
410 ):
411 """
411 """
412 create a revlog object
412 create a revlog object
413
413
414 opener is a function that abstracts the file opening operation
414 opener is a function that abstracts the file opening operation
415 and can be used to implement COW semantics or the like.
415 and can be used to implement COW semantics or the like.
416
416
417 """
417 """
418 self.upperboundcomp = upperboundcomp
418 self.upperboundcomp = upperboundcomp
419 self.indexfile = indexfile
419 self.indexfile = indexfile
420 self.datafile = datafile or (indexfile[:-2] + b".d")
420 self.datafile = datafile or (indexfile[:-2] + b".d")
421 self.opener = opener
421 self.opener = opener
422 # When True, indexfile is opened with checkambig=True at writing, to
422 # When True, indexfile is opened with checkambig=True at writing, to
423 # avoid file stat ambiguity.
423 # avoid file stat ambiguity.
424 self._checkambig = checkambig
424 self._checkambig = checkambig
425 self._mmaplargeindex = mmaplargeindex
425 self._mmaplargeindex = mmaplargeindex
426 self._censorable = censorable
426 self._censorable = censorable
427 # 3-tuple of (node, rev, text) for a raw revision.
427 # 3-tuple of (node, rev, text) for a raw revision.
428 self._revisioncache = None
428 self._revisioncache = None
429 # Maps rev to chain base rev.
429 # Maps rev to chain base rev.
430 self._chainbasecache = util.lrucachedict(100)
430 self._chainbasecache = util.lrucachedict(100)
431 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
431 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
432 self._chunkcache = (0, b'')
432 self._chunkcache = (0, b'')
433 # How much data to read and cache into the raw revlog data cache.
433 # How much data to read and cache into the raw revlog data cache.
434 self._chunkcachesize = 65536
434 self._chunkcachesize = 65536
435 self._maxchainlen = None
435 self._maxchainlen = None
436 self._deltabothparents = True
436 self._deltabothparents = True
437 self.index = None
437 self.index = None
438 # Mapping of partial identifiers to full nodes.
438 # Mapping of partial identifiers to full nodes.
439 self._pcache = {}
439 self._pcache = {}
440 # Mapping of revision integer to full node.
440 # Mapping of revision integer to full node.
441 self._compengine = b'zlib'
441 self._compengine = b'zlib'
442 self._compengineopts = {}
442 self._compengineopts = {}
443 self._maxdeltachainspan = -1
443 self._maxdeltachainspan = -1
444 self._withsparseread = False
444 self._withsparseread = False
445 self._sparserevlog = False
445 self._sparserevlog = False
446 self._srdensitythreshold = 0.50
446 self._srdensitythreshold = 0.50
447 self._srmingapsize = 262144
447 self._srmingapsize = 262144
448
448
449 # Make copy of flag processors so each revlog instance can support
449 # Make copy of flag processors so each revlog instance can support
450 # custom flags.
450 # custom flags.
451 self._flagprocessors = dict(flagutil.flagprocessors)
451 self._flagprocessors = dict(flagutil.flagprocessors)
452
452
453 # 2-tuple of file handles being used for active writing.
453 # 2-tuple of file handles being used for active writing.
454 self._writinghandles = None
454 self._writinghandles = None
455
455
456 self._loadindex()
456 self._loadindex()
457
457
458 def _loadindex(self):
458 def _loadindex(self):
459 mmapindexthreshold = None
459 mmapindexthreshold = None
460 opts = self.opener.options
460 opts = self.opener.options
461
461
462 if b'revlogv2' in opts:
462 if b'revlogv2' in opts:
463 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
463 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
464 elif b'revlogv1' in opts:
464 elif b'revlogv1' in opts:
465 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
465 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
466 if b'generaldelta' in opts:
466 if b'generaldelta' in opts:
467 newversionflags |= FLAG_GENERALDELTA
467 newversionflags |= FLAG_GENERALDELTA
468 elif b'revlogv0' in self.opener.options:
468 elif b'revlogv0' in self.opener.options:
469 newversionflags = REVLOGV0
469 newversionflags = REVLOGV0
470 else:
470 else:
471 newversionflags = REVLOG_DEFAULT_VERSION
471 newversionflags = REVLOG_DEFAULT_VERSION
472
472
473 if b'chunkcachesize' in opts:
473 if b'chunkcachesize' in opts:
474 self._chunkcachesize = opts[b'chunkcachesize']
474 self._chunkcachesize = opts[b'chunkcachesize']
475 if b'maxchainlen' in opts:
475 if b'maxchainlen' in opts:
476 self._maxchainlen = opts[b'maxchainlen']
476 self._maxchainlen = opts[b'maxchainlen']
477 if b'deltabothparents' in opts:
477 if b'deltabothparents' in opts:
478 self._deltabothparents = opts[b'deltabothparents']
478 self._deltabothparents = opts[b'deltabothparents']
479 self._lazydelta = bool(opts.get(b'lazydelta', True))
479 self._lazydelta = bool(opts.get(b'lazydelta', True))
480 self._lazydeltabase = False
480 self._lazydeltabase = False
481 if self._lazydelta:
481 if self._lazydelta:
482 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
482 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
483 if b'compengine' in opts:
483 if b'compengine' in opts:
484 self._compengine = opts[b'compengine']
484 self._compengine = opts[b'compengine']
485 if b'zlib.level' in opts:
485 if b'zlib.level' in opts:
486 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
486 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
487 if b'zstd.level' in opts:
487 if b'zstd.level' in opts:
488 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
488 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
489 if b'maxdeltachainspan' in opts:
489 if b'maxdeltachainspan' in opts:
490 self._maxdeltachainspan = opts[b'maxdeltachainspan']
490 self._maxdeltachainspan = opts[b'maxdeltachainspan']
491 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
491 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
492 mmapindexthreshold = opts[b'mmapindexthreshold']
492 mmapindexthreshold = opts[b'mmapindexthreshold']
493 self.hassidedata = bool(opts.get(b'side-data', False))
493 self.hassidedata = bool(opts.get(b'side-data', False))
494 if self.hassidedata:
494 if self.hassidedata:
495 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
495 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
496 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
496 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
497 withsparseread = bool(opts.get(b'with-sparse-read', False))
497 withsparseread = bool(opts.get(b'with-sparse-read', False))
498 # sparse-revlog forces sparse-read
498 # sparse-revlog forces sparse-read
499 self._withsparseread = self._sparserevlog or withsparseread
499 self._withsparseread = self._sparserevlog or withsparseread
500 if b'sparse-read-density-threshold' in opts:
500 if b'sparse-read-density-threshold' in opts:
501 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
501 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
502 if b'sparse-read-min-gap-size' in opts:
502 if b'sparse-read-min-gap-size' in opts:
503 self._srmingapsize = opts[b'sparse-read-min-gap-size']
503 self._srmingapsize = opts[b'sparse-read-min-gap-size']
504 if opts.get(b'enableellipsis'):
504 if opts.get(b'enableellipsis'):
505 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
505 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
506
506
507 # revlog v0 doesn't have flag processors
507 # revlog v0 doesn't have flag processors
508 for flag, processor in pycompat.iteritems(
508 for flag, processor in pycompat.iteritems(
509 opts.get(b'flagprocessors', {})
509 opts.get(b'flagprocessors', {})
510 ):
510 ):
511 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
511 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
512
512
513 if self._chunkcachesize <= 0:
513 if self._chunkcachesize <= 0:
514 raise error.RevlogError(
514 raise error.RevlogError(
515 _(b'revlog chunk cache size %r is not greater than 0')
515 _(b'revlog chunk cache size %r is not greater than 0')
516 % self._chunkcachesize
516 % self._chunkcachesize
517 )
517 )
518 elif self._chunkcachesize & (self._chunkcachesize - 1):
518 elif self._chunkcachesize & (self._chunkcachesize - 1):
519 raise error.RevlogError(
519 raise error.RevlogError(
520 _(b'revlog chunk cache size %r is not a power of 2')
520 _(b'revlog chunk cache size %r is not a power of 2')
521 % self._chunkcachesize
521 % self._chunkcachesize
522 )
522 )
523
523
524 indexdata = b''
524 indexdata = b''
525 self._initempty = True
525 self._initempty = True
526 try:
526 try:
527 with self._indexfp() as f:
527 with self._indexfp() as f:
528 if (
528 if (
529 mmapindexthreshold is not None
529 mmapindexthreshold is not None
530 and self.opener.fstat(f).st_size >= mmapindexthreshold
530 and self.opener.fstat(f).st_size >= mmapindexthreshold
531 ):
531 ):
532 # TODO: should .close() to release resources without
532 # TODO: should .close() to release resources without
533 # relying on Python GC
533 # relying on Python GC
534 indexdata = util.buffer(util.mmapread(f))
534 indexdata = util.buffer(util.mmapread(f))
535 else:
535 else:
536 indexdata = f.read()
536 indexdata = f.read()
537 if len(indexdata) > 0:
537 if len(indexdata) > 0:
538 versionflags = versionformat_unpack(indexdata[:4])[0]
538 versionflags = versionformat_unpack(indexdata[:4])[0]
539 self._initempty = False
539 self._initempty = False
540 else:
540 else:
541 versionflags = newversionflags
541 versionflags = newversionflags
542 except IOError as inst:
542 except IOError as inst:
543 if inst.errno != errno.ENOENT:
543 if inst.errno != errno.ENOENT:
544 raise
544 raise
545
545
546 versionflags = newversionflags
546 versionflags = newversionflags
547
547
548 self.version = versionflags
548 self.version = versionflags
549
549
550 flags = versionflags & ~0xFFFF
550 flags = versionflags & ~0xFFFF
551 fmt = versionflags & 0xFFFF
551 fmt = versionflags & 0xFFFF
552
552
553 if fmt == REVLOGV0:
553 if fmt == REVLOGV0:
554 if flags:
554 if flags:
555 raise error.RevlogError(
555 raise error.RevlogError(
556 _(b'unknown flags (%#04x) in version %d revlog %s')
556 _(b'unknown flags (%#04x) in version %d revlog %s')
557 % (flags >> 16, fmt, self.indexfile)
557 % (flags >> 16, fmt, self.indexfile)
558 )
558 )
559
559
560 self._inline = False
560 self._inline = False
561 self._generaldelta = False
561 self._generaldelta = False
562
562
563 elif fmt == REVLOGV1:
563 elif fmt == REVLOGV1:
564 if flags & ~REVLOGV1_FLAGS:
564 if flags & ~REVLOGV1_FLAGS:
565 raise error.RevlogError(
565 raise error.RevlogError(
566 _(b'unknown flags (%#04x) in version %d revlog %s')
566 _(b'unknown flags (%#04x) in version %d revlog %s')
567 % (flags >> 16, fmt, self.indexfile)
567 % (flags >> 16, fmt, self.indexfile)
568 )
568 )
569
569
570 self._inline = versionflags & FLAG_INLINE_DATA
570 self._inline = versionflags & FLAG_INLINE_DATA
571 self._generaldelta = versionflags & FLAG_GENERALDELTA
571 self._generaldelta = versionflags & FLAG_GENERALDELTA
572
572
573 elif fmt == REVLOGV2:
573 elif fmt == REVLOGV2:
574 if flags & ~REVLOGV2_FLAGS:
574 if flags & ~REVLOGV2_FLAGS:
575 raise error.RevlogError(
575 raise error.RevlogError(
576 _(b'unknown flags (%#04x) in version %d revlog %s')
576 _(b'unknown flags (%#04x) in version %d revlog %s')
577 % (flags >> 16, fmt, self.indexfile)
577 % (flags >> 16, fmt, self.indexfile)
578 )
578 )
579
579
580 self._inline = versionflags & FLAG_INLINE_DATA
580 self._inline = versionflags & FLAG_INLINE_DATA
581 # generaldelta implied by version 2 revlogs.
581 # generaldelta implied by version 2 revlogs.
582 self._generaldelta = True
582 self._generaldelta = True
583
583
584 else:
584 else:
585 raise error.RevlogError(
585 raise error.RevlogError(
586 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
586 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
587 )
587 )
588 # sparse-revlog can't be on without general-delta (issue6056)
588 # sparse-revlog can't be on without general-delta (issue6056)
589 if not self._generaldelta:
589 if not self._generaldelta:
590 self._sparserevlog = False
590 self._sparserevlog = False
591
591
592 self._storedeltachains = True
592 self._storedeltachains = True
593
593
594 self._io = revlogio()
594 self._io = revlogio()
595 if rustrevlog is not None and self.opener.options.get('rust.index'):
595 if rustrevlog is not None and self.opener.options.get('rust.index'):
596 self._io = rustrevlogio()
596 self._io = rustrevlogio()
597 if self.version == REVLOGV0:
597 if self.version == REVLOGV0:
598 self._io = revlogoldio()
598 self._io = revlogoldio()
599 try:
599 try:
600 d = self._io.parseindex(indexdata, self._inline)
600 d = self._io.parseindex(indexdata, self._inline)
601 except (ValueError, IndexError):
601 except (ValueError, IndexError):
602 raise error.RevlogError(
602 raise error.RevlogError(
603 _(b"index %s is corrupted") % self.indexfile
603 _(b"index %s is corrupted") % self.indexfile
604 )
604 )
605 self.index, self._chunkcache = d
605 self.index, self._chunkcache = d
606 if not self._chunkcache:
606 if not self._chunkcache:
607 self._chunkclear()
607 self._chunkclear()
608 # revnum -> (chain-length, sum-delta-length)
608 # revnum -> (chain-length, sum-delta-length)
609 self._chaininfocache = {}
609 self._chaininfocache = {}
610 # revlog header -> revlog compressor
610 # revlog header -> revlog compressor
611 self._decompressors = {}
611 self._decompressors = {}
612
612
613 @util.propertycache
613 @util.propertycache
614 def _compressor(self):
614 def _compressor(self):
615 engine = util.compengines[self._compengine]
615 engine = util.compengines[self._compengine]
616 return engine.revlogcompressor(self._compengineopts)
616 return engine.revlogcompressor(self._compengineopts)
617
617
618 def _indexfp(self, mode=b'r'):
618 def _indexfp(self, mode=b'r'):
619 """file object for the revlog's index file"""
619 """file object for the revlog's index file"""
620 args = {'mode': mode}
620 args = {'mode': mode}
621 if mode != b'r':
621 if mode != b'r':
622 args['checkambig'] = self._checkambig
622 args['checkambig'] = self._checkambig
623 if mode == b'w':
623 if mode == b'w':
624 args['atomictemp'] = True
624 args['atomictemp'] = True
625 return self.opener(self.indexfile, **args)
625 return self.opener(self.indexfile, **args)
626
626
627 def _datafp(self, mode=b'r'):
627 def _datafp(self, mode=b'r'):
628 """file object for the revlog's data file"""
628 """file object for the revlog's data file"""
629 return self.opener(self.datafile, mode=mode)
629 return self.opener(self.datafile, mode=mode)
630
630
631 @contextlib.contextmanager
631 @contextlib.contextmanager
632 def _datareadfp(self, existingfp=None):
632 def _datareadfp(self, existingfp=None):
633 """file object suitable to read data"""
633 """file object suitable to read data"""
634 # Use explicit file handle, if given.
634 # Use explicit file handle, if given.
635 if existingfp is not None:
635 if existingfp is not None:
636 yield existingfp
636 yield existingfp
637
637
638 # Use a file handle being actively used for writes, if available.
638 # Use a file handle being actively used for writes, if available.
639 # There is some danger to doing this because reads will seek the
639 # There is some danger to doing this because reads will seek the
640 # file. However, _writeentry() performs a SEEK_END before all writes,
640 # file. However, _writeentry() performs a SEEK_END before all writes,
641 # so we should be safe.
641 # so we should be safe.
642 elif self._writinghandles:
642 elif self._writinghandles:
643 if self._inline:
643 if self._inline:
644 yield self._writinghandles[0]
644 yield self._writinghandles[0]
645 else:
645 else:
646 yield self._writinghandles[1]
646 yield self._writinghandles[1]
647
647
648 # Otherwise open a new file handle.
648 # Otherwise open a new file handle.
649 else:
649 else:
650 if self._inline:
650 if self._inline:
651 func = self._indexfp
651 func = self._indexfp
652 else:
652 else:
653 func = self._datafp
653 func = self._datafp
654 with func() as fp:
654 with func() as fp:
655 yield fp
655 yield fp
656
656
657 def tiprev(self):
657 def tiprev(self):
658 return len(self.index) - 1
658 return len(self.index) - 1
659
659
660 def tip(self):
660 def tip(self):
661 return self.node(self.tiprev())
661 return self.node(self.tiprev())
662
662
663 def __contains__(self, rev):
663 def __contains__(self, rev):
664 return 0 <= rev < len(self)
664 return 0 <= rev < len(self)
665
665
666 def __len__(self):
666 def __len__(self):
667 return len(self.index)
667 return len(self.index)
668
668
669 def __iter__(self):
669 def __iter__(self):
670 return iter(pycompat.xrange(len(self)))
670 return iter(pycompat.xrange(len(self)))
671
671
672 def revs(self, start=0, stop=None):
672 def revs(self, start=0, stop=None):
673 """iterate over all rev in this revlog (from start to stop)"""
673 """iterate over all rev in this revlog (from start to stop)"""
674 return storageutil.iterrevs(len(self), start=start, stop=stop)
674 return storageutil.iterrevs(len(self), start=start, stop=stop)
675
675
676 @property
676 @property
677 def nodemap(self):
677 def nodemap(self):
678 msg = (
678 msg = (
679 b"revlog.nodemap is deprecated, "
679 b"revlog.nodemap is deprecated, "
680 b"use revlog.index.[has_node|rev|get_rev]"
680 b"use revlog.index.[has_node|rev|get_rev]"
681 )
681 )
682 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
682 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
683 return self.index.nodemap
683 return self.index.nodemap
684
684
685 @property
685 @property
686 def _nodecache(self):
686 def _nodecache(self):
687 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
687 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
688 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
688 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
689 return self.index.nodemap
689 return self.index.nodemap
690
690
691 def hasnode(self, node):
691 def hasnode(self, node):
692 try:
692 try:
693 self.rev(node)
693 self.rev(node)
694 return True
694 return True
695 except KeyError:
695 except KeyError:
696 return False
696 return False
697
697
698 def candelta(self, baserev, rev):
698 def candelta(self, baserev, rev):
699 """whether two revisions (baserev, rev) can be delta-ed or not"""
699 """whether two revisions (baserev, rev) can be delta-ed or not"""
700 # Disable delta if either rev requires a content-changing flag
700 # Disable delta if either rev requires a content-changing flag
701 # processor (ex. LFS). This is because such flag processor can alter
701 # processor (ex. LFS). This is because such flag processor can alter
702 # the rawtext content that the delta will be based on, and two clients
702 # the rawtext content that the delta will be based on, and two clients
703 # could have a same revlog node with different flags (i.e. different
703 # could have a same revlog node with different flags (i.e. different
704 # rawtext contents) and the delta could be incompatible.
704 # rawtext contents) and the delta could be incompatible.
705 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
705 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
706 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
706 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
707 ):
707 ):
708 return False
708 return False
709 return True
709 return True
710
710
711 def clearcaches(self):
711 def clearcaches(self):
712 self._revisioncache = None
712 self._revisioncache = None
713 self._chainbasecache.clear()
713 self._chainbasecache.clear()
714 self._chunkcache = (0, b'')
714 self._chunkcache = (0, b'')
715 self._pcache = {}
715 self._pcache = {}
716 self.index.clearcaches()
716 self.index.clearcaches()
717
717
718 def rev(self, node):
718 def rev(self, node):
719 try:
719 try:
720 return self.index.rev(node)
720 return self.index.rev(node)
721 except TypeError:
721 except TypeError:
722 raise
722 raise
723 except error.RevlogError:
723 except error.RevlogError:
724 # parsers.c radix tree lookup failed
724 # parsers.c radix tree lookup failed
725 if node == wdirid or node in wdirfilenodeids:
725 if node == wdirid or node in wdirfilenodeids:
726 raise error.WdirUnsupported
726 raise error.WdirUnsupported
727 raise error.LookupError(node, self.indexfile, _(b'no node'))
727 raise error.LookupError(node, self.indexfile, _(b'no node'))
728
728
729 # Accessors for index entries.
729 # Accessors for index entries.
730
730
731 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
731 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
732 # are flags.
732 # are flags.
733 def start(self, rev):
733 def start(self, rev):
734 return int(self.index[rev][0] >> 16)
734 return int(self.index[rev][0] >> 16)
735
735
736 def flags(self, rev):
736 def flags(self, rev):
737 return self.index[rev][0] & 0xFFFF
737 return self.index[rev][0] & 0xFFFF
738
738
739 def length(self, rev):
739 def length(self, rev):
740 return self.index[rev][1]
740 return self.index[rev][1]
741
741
742 def rawsize(self, rev):
742 def rawsize(self, rev):
743 """return the length of the uncompressed text for a given revision"""
743 """return the length of the uncompressed text for a given revision"""
744 l = self.index[rev][2]
744 l = self.index[rev][2]
745 if l >= 0:
745 if l >= 0:
746 return l
746 return l
747
747
748 t = self.rawdata(rev)
748 t = self.rawdata(rev)
749 return len(t)
749 return len(t)
750
750
751 def size(self, rev):
751 def size(self, rev):
752 """length of non-raw text (processed by a "read" flag processor)"""
752 """length of non-raw text (processed by a "read" flag processor)"""
753 # fast path: if no "read" flag processor could change the content,
753 # fast path: if no "read" flag processor could change the content,
754 # size is rawsize. note: ELLIPSIS is known to not change the content.
754 # size is rawsize. note: ELLIPSIS is known to not change the content.
755 flags = self.flags(rev)
755 flags = self.flags(rev)
756 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
756 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
757 return self.rawsize(rev)
757 return self.rawsize(rev)
758
758
759 return len(self.revision(rev, raw=False))
759 return len(self.revision(rev, raw=False))
760
760
761 def chainbase(self, rev):
761 def chainbase(self, rev):
762 base = self._chainbasecache.get(rev)
762 base = self._chainbasecache.get(rev)
763 if base is not None:
763 if base is not None:
764 return base
764 return base
765
765
766 index = self.index
766 index = self.index
767 iterrev = rev
767 iterrev = rev
768 base = index[iterrev][3]
768 base = index[iterrev][3]
769 while base != iterrev:
769 while base != iterrev:
770 iterrev = base
770 iterrev = base
771 base = index[iterrev][3]
771 base = index[iterrev][3]
772
772
773 self._chainbasecache[rev] = base
773 self._chainbasecache[rev] = base
774 return base
774 return base
775
775
776 def linkrev(self, rev):
776 def linkrev(self, rev):
777 return self.index[rev][4]
777 return self.index[rev][4]
778
778
779 def parentrevs(self, rev):
779 def parentrevs(self, rev):
780 try:
780 try:
781 entry = self.index[rev]
781 entry = self.index[rev]
782 except IndexError:
782 except IndexError:
783 if rev == wdirrev:
783 if rev == wdirrev:
784 raise error.WdirUnsupported
784 raise error.WdirUnsupported
785 raise
785 raise
786
786
787 return entry[5], entry[6]
787 return entry[5], entry[6]
788
788
789 # fast parentrevs(rev) where rev isn't filtered
789 # fast parentrevs(rev) where rev isn't filtered
790 _uncheckedparentrevs = parentrevs
790 _uncheckedparentrevs = parentrevs
791
791
792 def node(self, rev):
792 def node(self, rev):
793 try:
793 try:
794 return self.index[rev][7]
794 return self.index[rev][7]
795 except IndexError:
795 except IndexError:
796 if rev == wdirrev:
796 if rev == wdirrev:
797 raise error.WdirUnsupported
797 raise error.WdirUnsupported
798 raise
798 raise
799
799
800 # Derived from index values.
800 # Derived from index values.
801
801
802 def end(self, rev):
802 def end(self, rev):
803 return self.start(rev) + self.length(rev)
803 return self.start(rev) + self.length(rev)
804
804
805 def parents(self, node):
805 def parents(self, node):
806 i = self.index
806 i = self.index
807 d = i[self.rev(node)]
807 d = i[self.rev(node)]
808 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
808 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
809
809
810 def chainlen(self, rev):
810 def chainlen(self, rev):
811 return self._chaininfo(rev)[0]
811 return self._chaininfo(rev)[0]
812
812
813 def _chaininfo(self, rev):
813 def _chaininfo(self, rev):
814 chaininfocache = self._chaininfocache
814 chaininfocache = self._chaininfocache
815 if rev in chaininfocache:
815 if rev in chaininfocache:
816 return chaininfocache[rev]
816 return chaininfocache[rev]
817 index = self.index
817 index = self.index
818 generaldelta = self._generaldelta
818 generaldelta = self._generaldelta
819 iterrev = rev
819 iterrev = rev
820 e = index[iterrev]
820 e = index[iterrev]
821 clen = 0
821 clen = 0
822 compresseddeltalen = 0
822 compresseddeltalen = 0
823 while iterrev != e[3]:
823 while iterrev != e[3]:
824 clen += 1
824 clen += 1
825 compresseddeltalen += e[1]
825 compresseddeltalen += e[1]
826 if generaldelta:
826 if generaldelta:
827 iterrev = e[3]
827 iterrev = e[3]
828 else:
828 else:
829 iterrev -= 1
829 iterrev -= 1
830 if iterrev in chaininfocache:
830 if iterrev in chaininfocache:
831 t = chaininfocache[iterrev]
831 t = chaininfocache[iterrev]
832 clen += t[0]
832 clen += t[0]
833 compresseddeltalen += t[1]
833 compresseddeltalen += t[1]
834 break
834 break
835 e = index[iterrev]
835 e = index[iterrev]
836 else:
836 else:
837 # Add text length of base since decompressing that also takes
837 # Add text length of base since decompressing that also takes
838 # work. For cache hits the length is already included.
838 # work. For cache hits the length is already included.
839 compresseddeltalen += e[1]
839 compresseddeltalen += e[1]
840 r = (clen, compresseddeltalen)
840 r = (clen, compresseddeltalen)
841 chaininfocache[rev] = r
841 chaininfocache[rev] = r
842 return r
842 return r
843
843
844 def _deltachain(self, rev, stoprev=None):
844 def _deltachain(self, rev, stoprev=None):
845 """Obtain the delta chain for a revision.
845 """Obtain the delta chain for a revision.
846
846
847 ``stoprev`` specifies a revision to stop at. If not specified, we
847 ``stoprev`` specifies a revision to stop at. If not specified, we
848 stop at the base of the chain.
848 stop at the base of the chain.
849
849
850 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
850 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
851 revs in ascending order and ``stopped`` is a bool indicating whether
851 revs in ascending order and ``stopped`` is a bool indicating whether
852 ``stoprev`` was hit.
852 ``stoprev`` was hit.
853 """
853 """
854 # Try C implementation.
854 # Try C implementation.
855 try:
855 try:
856 return self.index.deltachain(rev, stoprev, self._generaldelta)
856 return self.index.deltachain(rev, stoprev, self._generaldelta)
857 except AttributeError:
857 except AttributeError:
858 pass
858 pass
859
859
860 chain = []
860 chain = []
861
861
862 # Alias to prevent attribute lookup in tight loop.
862 # Alias to prevent attribute lookup in tight loop.
863 index = self.index
863 index = self.index
864 generaldelta = self._generaldelta
864 generaldelta = self._generaldelta
865
865
866 iterrev = rev
866 iterrev = rev
867 e = index[iterrev]
867 e = index[iterrev]
868 while iterrev != e[3] and iterrev != stoprev:
868 while iterrev != e[3] and iterrev != stoprev:
869 chain.append(iterrev)
869 chain.append(iterrev)
870 if generaldelta:
870 if generaldelta:
871 iterrev = e[3]
871 iterrev = e[3]
872 else:
872 else:
873 iterrev -= 1
873 iterrev -= 1
874 e = index[iterrev]
874 e = index[iterrev]
875
875
876 if iterrev == stoprev:
876 if iterrev == stoprev:
877 stopped = True
877 stopped = True
878 else:
878 else:
879 chain.append(iterrev)
879 chain.append(iterrev)
880 stopped = False
880 stopped = False
881
881
882 chain.reverse()
882 chain.reverse()
883 return chain, stopped
883 return chain, stopped
884
884
885 def ancestors(self, revs, stoprev=0, inclusive=False):
885 def ancestors(self, revs, stoprev=0, inclusive=False):
886 """Generate the ancestors of 'revs' in reverse revision order.
886 """Generate the ancestors of 'revs' in reverse revision order.
887 Does not generate revs lower than stoprev.
887 Does not generate revs lower than stoprev.
888
888
889 See the documentation for ancestor.lazyancestors for more details."""
889 See the documentation for ancestor.lazyancestors for more details."""
890
890
891 # first, make sure start revisions aren't filtered
891 # first, make sure start revisions aren't filtered
892 revs = list(revs)
892 revs = list(revs)
893 checkrev = self.node
893 checkrev = self.node
894 for r in revs:
894 for r in revs:
895 checkrev(r)
895 checkrev(r)
896 # and we're sure ancestors aren't filtered as well
896 # and we're sure ancestors aren't filtered as well
897
897
898 if rustancestor is not None:
898 if rustancestor is not None:
899 lazyancestors = rustancestor.LazyAncestors
899 lazyancestors = rustancestor.LazyAncestors
900 arg = self.index
900 arg = self.index
901 elif util.safehasattr(parsers, b'rustlazyancestors'):
901 elif util.safehasattr(parsers, b'rustlazyancestors'):
902 lazyancestors = ancestor.rustlazyancestors
902 lazyancestors = ancestor.rustlazyancestors
903 arg = self.index
903 arg = self.index
904 else:
904 else:
905 lazyancestors = ancestor.lazyancestors
905 lazyancestors = ancestor.lazyancestors
906 arg = self._uncheckedparentrevs
906 arg = self._uncheckedparentrevs
907 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
907 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
908
908
909 def descendants(self, revs):
909 def descendants(self, revs):
910 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
910 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
911
911
912 def findcommonmissing(self, common=None, heads=None):
912 def findcommonmissing(self, common=None, heads=None):
913 """Return a tuple of the ancestors of common and the ancestors of heads
913 """Return a tuple of the ancestors of common and the ancestors of heads
914 that are not ancestors of common. In revset terminology, we return the
914 that are not ancestors of common. In revset terminology, we return the
915 tuple:
915 tuple:
916
916
917 ::common, (::heads) - (::common)
917 ::common, (::heads) - (::common)
918
918
919 The list is sorted by revision number, meaning it is
919 The list is sorted by revision number, meaning it is
920 topologically sorted.
920 topologically sorted.
921
921
922 'heads' and 'common' are both lists of node IDs. If heads is
922 'heads' and 'common' are both lists of node IDs. If heads is
923 not supplied, uses all of the revlog's heads. If common is not
923 not supplied, uses all of the revlog's heads. If common is not
924 supplied, uses nullid."""
924 supplied, uses nullid."""
925 if common is None:
925 if common is None:
926 common = [nullid]
926 common = [nullid]
927 if heads is None:
927 if heads is None:
928 heads = self.heads()
928 heads = self.heads()
929
929
930 common = [self.rev(n) for n in common]
930 common = [self.rev(n) for n in common]
931 heads = [self.rev(n) for n in heads]
931 heads = [self.rev(n) for n in heads]
932
932
933 # we want the ancestors, but inclusive
933 # we want the ancestors, but inclusive
934 class lazyset(object):
934 class lazyset(object):
935 def __init__(self, lazyvalues):
935 def __init__(self, lazyvalues):
936 self.addedvalues = set()
936 self.addedvalues = set()
937 self.lazyvalues = lazyvalues
937 self.lazyvalues = lazyvalues
938
938
939 def __contains__(self, value):
939 def __contains__(self, value):
940 return value in self.addedvalues or value in self.lazyvalues
940 return value in self.addedvalues or value in self.lazyvalues
941
941
942 def __iter__(self):
942 def __iter__(self):
943 added = self.addedvalues
943 added = self.addedvalues
944 for r in added:
944 for r in added:
945 yield r
945 yield r
946 for r in self.lazyvalues:
946 for r in self.lazyvalues:
947 if not r in added:
947 if not r in added:
948 yield r
948 yield r
949
949
950 def add(self, value):
950 def add(self, value):
951 self.addedvalues.add(value)
951 self.addedvalues.add(value)
952
952
953 def update(self, values):
953 def update(self, values):
954 self.addedvalues.update(values)
954 self.addedvalues.update(values)
955
955
956 has = lazyset(self.ancestors(common))
956 has = lazyset(self.ancestors(common))
957 has.add(nullrev)
957 has.add(nullrev)
958 has.update(common)
958 has.update(common)
959
959
960 # take all ancestors from heads that aren't in has
960 # take all ancestors from heads that aren't in has
961 missing = set()
961 missing = set()
962 visit = collections.deque(r for r in heads if r not in has)
962 visit = collections.deque(r for r in heads if r not in has)
963 while visit:
963 while visit:
964 r = visit.popleft()
964 r = visit.popleft()
965 if r in missing:
965 if r in missing:
966 continue
966 continue
967 else:
967 else:
968 missing.add(r)
968 missing.add(r)
969 for p in self.parentrevs(r):
969 for p in self.parentrevs(r):
970 if p not in has:
970 if p not in has:
971 visit.append(p)
971 visit.append(p)
972 missing = list(missing)
972 missing = list(missing)
973 missing.sort()
973 missing.sort()
974 return has, [self.node(miss) for miss in missing]
974 return has, [self.node(miss) for miss in missing]
975
975
976 def incrementalmissingrevs(self, common=None):
976 def incrementalmissingrevs(self, common=None):
977 """Return an object that can be used to incrementally compute the
977 """Return an object that can be used to incrementally compute the
978 revision numbers of the ancestors of arbitrary sets that are not
978 revision numbers of the ancestors of arbitrary sets that are not
979 ancestors of common. This is an ancestor.incrementalmissingancestors
979 ancestors of common. This is an ancestor.incrementalmissingancestors
980 object.
980 object.
981
981
982 'common' is a list of revision numbers. If common is not supplied, uses
982 'common' is a list of revision numbers. If common is not supplied, uses
983 nullrev.
983 nullrev.
984 """
984 """
985 if common is None:
985 if common is None:
986 common = [nullrev]
986 common = [nullrev]
987
987
988 if rustancestor is not None:
988 if rustancestor is not None:
989 return rustancestor.MissingAncestors(self.index, common)
989 return rustancestor.MissingAncestors(self.index, common)
990 return ancestor.incrementalmissingancestors(self.parentrevs, common)
990 return ancestor.incrementalmissingancestors(self.parentrevs, common)
991
991
992 def findmissingrevs(self, common=None, heads=None):
992 def findmissingrevs(self, common=None, heads=None):
993 """Return the revision numbers of the ancestors of heads that
993 """Return the revision numbers of the ancestors of heads that
994 are not ancestors of common.
994 are not ancestors of common.
995
995
996 More specifically, return a list of revision numbers corresponding to
996 More specifically, return a list of revision numbers corresponding to
997 nodes N such that every N satisfies the following constraints:
997 nodes N such that every N satisfies the following constraints:
998
998
999 1. N is an ancestor of some node in 'heads'
999 1. N is an ancestor of some node in 'heads'
1000 2. N is not an ancestor of any node in 'common'
1000 2. N is not an ancestor of any node in 'common'
1001
1001
1002 The list is sorted by revision number, meaning it is
1002 The list is sorted by revision number, meaning it is
1003 topologically sorted.
1003 topologically sorted.
1004
1004
1005 'heads' and 'common' are both lists of revision numbers. If heads is
1005 'heads' and 'common' are both lists of revision numbers. If heads is
1006 not supplied, uses all of the revlog's heads. If common is not
1006 not supplied, uses all of the revlog's heads. If common is not
1007 supplied, uses nullid."""
1007 supplied, uses nullid."""
1008 if common is None:
1008 if common is None:
1009 common = [nullrev]
1009 common = [nullrev]
1010 if heads is None:
1010 if heads is None:
1011 heads = self.headrevs()
1011 heads = self.headrevs()
1012
1012
1013 inc = self.incrementalmissingrevs(common=common)
1013 inc = self.incrementalmissingrevs(common=common)
1014 return inc.missingancestors(heads)
1014 return inc.missingancestors(heads)
1015
1015
1016 def findmissing(self, common=None, heads=None):
1016 def findmissing(self, common=None, heads=None):
1017 """Return the ancestors of heads that are not ancestors of common.
1017 """Return the ancestors of heads that are not ancestors of common.
1018
1018
1019 More specifically, return a list of nodes N such that every N
1019 More specifically, return a list of nodes N such that every N
1020 satisfies the following constraints:
1020 satisfies the following constraints:
1021
1021
1022 1. N is an ancestor of some node in 'heads'
1022 1. N is an ancestor of some node in 'heads'
1023 2. N is not an ancestor of any node in 'common'
1023 2. N is not an ancestor of any node in 'common'
1024
1024
1025 The list is sorted by revision number, meaning it is
1025 The list is sorted by revision number, meaning it is
1026 topologically sorted.
1026 topologically sorted.
1027
1027
1028 'heads' and 'common' are both lists of node IDs. If heads is
1028 'heads' and 'common' are both lists of node IDs. If heads is
1029 not supplied, uses all of the revlog's heads. If common is not
1029 not supplied, uses all of the revlog's heads. If common is not
1030 supplied, uses nullid."""
1030 supplied, uses nullid."""
1031 if common is None:
1031 if common is None:
1032 common = [nullid]
1032 common = [nullid]
1033 if heads is None:
1033 if heads is None:
1034 heads = self.heads()
1034 heads = self.heads()
1035
1035
1036 common = [self.rev(n) for n in common]
1036 common = [self.rev(n) for n in common]
1037 heads = [self.rev(n) for n in heads]
1037 heads = [self.rev(n) for n in heads]
1038
1038
1039 inc = self.incrementalmissingrevs(common=common)
1039 inc = self.incrementalmissingrevs(common=common)
1040 return [self.node(r) for r in inc.missingancestors(heads)]
1040 return [self.node(r) for r in inc.missingancestors(heads)]
1041
1041
1042 def nodesbetween(self, roots=None, heads=None):
1042 def nodesbetween(self, roots=None, heads=None):
1043 """Return a topological path from 'roots' to 'heads'.
1043 """Return a topological path from 'roots' to 'heads'.
1044
1044
1045 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1045 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1046 topologically sorted list of all nodes N that satisfy both of
1046 topologically sorted list of all nodes N that satisfy both of
1047 these constraints:
1047 these constraints:
1048
1048
1049 1. N is a descendant of some node in 'roots'
1049 1. N is a descendant of some node in 'roots'
1050 2. N is an ancestor of some node in 'heads'
1050 2. N is an ancestor of some node in 'heads'
1051
1051
1052 Every node is considered to be both a descendant and an ancestor
1052 Every node is considered to be both a descendant and an ancestor
1053 of itself, so every reachable node in 'roots' and 'heads' will be
1053 of itself, so every reachable node in 'roots' and 'heads' will be
1054 included in 'nodes'.
1054 included in 'nodes'.
1055
1055
1056 'outroots' is the list of reachable nodes in 'roots', i.e., the
1056 'outroots' is the list of reachable nodes in 'roots', i.e., the
1057 subset of 'roots' that is returned in 'nodes'. Likewise,
1057 subset of 'roots' that is returned in 'nodes'. Likewise,
1058 'outheads' is the subset of 'heads' that is also in 'nodes'.
1058 'outheads' is the subset of 'heads' that is also in 'nodes'.
1059
1059
1060 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1060 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1061 unspecified, uses nullid as the only root. If 'heads' is
1061 unspecified, uses nullid as the only root. If 'heads' is
1062 unspecified, uses list of all of the revlog's heads."""
1062 unspecified, uses list of all of the revlog's heads."""
1063 nonodes = ([], [], [])
1063 nonodes = ([], [], [])
1064 if roots is not None:
1064 if roots is not None:
1065 roots = list(roots)
1065 roots = list(roots)
1066 if not roots:
1066 if not roots:
1067 return nonodes
1067 return nonodes
1068 lowestrev = min([self.rev(n) for n in roots])
1068 lowestrev = min([self.rev(n) for n in roots])
1069 else:
1069 else:
1070 roots = [nullid] # Everybody's a descendant of nullid
1070 roots = [nullid] # Everybody's a descendant of nullid
1071 lowestrev = nullrev
1071 lowestrev = nullrev
1072 if (lowestrev == nullrev) and (heads is None):
1072 if (lowestrev == nullrev) and (heads is None):
1073 # We want _all_ the nodes!
1073 # We want _all_ the nodes!
1074 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1074 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1075 if heads is None:
1075 if heads is None:
1076 # All nodes are ancestors, so the latest ancestor is the last
1076 # All nodes are ancestors, so the latest ancestor is the last
1077 # node.
1077 # node.
1078 highestrev = len(self) - 1
1078 highestrev = len(self) - 1
1079 # Set ancestors to None to signal that every node is an ancestor.
1079 # Set ancestors to None to signal that every node is an ancestor.
1080 ancestors = None
1080 ancestors = None
1081 # Set heads to an empty dictionary for later discovery of heads
1081 # Set heads to an empty dictionary for later discovery of heads
1082 heads = {}
1082 heads = {}
1083 else:
1083 else:
1084 heads = list(heads)
1084 heads = list(heads)
1085 if not heads:
1085 if not heads:
1086 return nonodes
1086 return nonodes
1087 ancestors = set()
1087 ancestors = set()
1088 # Turn heads into a dictionary so we can remove 'fake' heads.
1088 # Turn heads into a dictionary so we can remove 'fake' heads.
1089 # Also, later we will be using it to filter out the heads we can't
1089 # Also, later we will be using it to filter out the heads we can't
1090 # find from roots.
1090 # find from roots.
1091 heads = dict.fromkeys(heads, False)
1091 heads = dict.fromkeys(heads, False)
1092 # Start at the top and keep marking parents until we're done.
1092 # Start at the top and keep marking parents until we're done.
1093 nodestotag = set(heads)
1093 nodestotag = set(heads)
1094 # Remember where the top was so we can use it as a limit later.
1094 # Remember where the top was so we can use it as a limit later.
1095 highestrev = max([self.rev(n) for n in nodestotag])
1095 highestrev = max([self.rev(n) for n in nodestotag])
1096 while nodestotag:
1096 while nodestotag:
1097 # grab a node to tag
1097 # grab a node to tag
1098 n = nodestotag.pop()
1098 n = nodestotag.pop()
1099 # Never tag nullid
1099 # Never tag nullid
1100 if n == nullid:
1100 if n == nullid:
1101 continue
1101 continue
1102 # A node's revision number represents its place in a
1102 # A node's revision number represents its place in a
1103 # topologically sorted list of nodes.
1103 # topologically sorted list of nodes.
1104 r = self.rev(n)
1104 r = self.rev(n)
1105 if r >= lowestrev:
1105 if r >= lowestrev:
1106 if n not in ancestors:
1106 if n not in ancestors:
1107 # If we are possibly a descendant of one of the roots
1107 # If we are possibly a descendant of one of the roots
1108 # and we haven't already been marked as an ancestor
1108 # and we haven't already been marked as an ancestor
1109 ancestors.add(n) # Mark as ancestor
1109 ancestors.add(n) # Mark as ancestor
1110 # Add non-nullid parents to list of nodes to tag.
1110 # Add non-nullid parents to list of nodes to tag.
1111 nodestotag.update(
1111 nodestotag.update(
1112 [p for p in self.parents(n) if p != nullid]
1112 [p for p in self.parents(n) if p != nullid]
1113 )
1113 )
1114 elif n in heads: # We've seen it before, is it a fake head?
1114 elif n in heads: # We've seen it before, is it a fake head?
1115 # So it is, real heads should not be the ancestors of
1115 # So it is, real heads should not be the ancestors of
1116 # any other heads.
1116 # any other heads.
1117 heads.pop(n)
1117 heads.pop(n)
1118 if not ancestors:
1118 if not ancestors:
1119 return nonodes
1119 return nonodes
1120 # Now that we have our set of ancestors, we want to remove any
1120 # Now that we have our set of ancestors, we want to remove any
1121 # roots that are not ancestors.
1121 # roots that are not ancestors.
1122
1122
1123 # If one of the roots was nullid, everything is included anyway.
1123 # If one of the roots was nullid, everything is included anyway.
1124 if lowestrev > nullrev:
1124 if lowestrev > nullrev:
1125 # But, since we weren't, let's recompute the lowest rev to not
1125 # But, since we weren't, let's recompute the lowest rev to not
1126 # include roots that aren't ancestors.
1126 # include roots that aren't ancestors.
1127
1127
1128 # Filter out roots that aren't ancestors of heads
1128 # Filter out roots that aren't ancestors of heads
1129 roots = [root for root in roots if root in ancestors]
1129 roots = [root for root in roots if root in ancestors]
1130 # Recompute the lowest revision
1130 # Recompute the lowest revision
1131 if roots:
1131 if roots:
1132 lowestrev = min([self.rev(root) for root in roots])
1132 lowestrev = min([self.rev(root) for root in roots])
1133 else:
1133 else:
1134 # No more roots? Return empty list
1134 # No more roots? Return empty list
1135 return nonodes
1135 return nonodes
1136 else:
1136 else:
1137 # We are descending from nullid, and don't need to care about
1137 # We are descending from nullid, and don't need to care about
1138 # any other roots.
1138 # any other roots.
1139 lowestrev = nullrev
1139 lowestrev = nullrev
1140 roots = [nullid]
1140 roots = [nullid]
1141 # Transform our roots list into a set.
1141 # Transform our roots list into a set.
1142 descendants = set(roots)
1142 descendants = set(roots)
1143 # Also, keep the original roots so we can filter out roots that aren't
1143 # Also, keep the original roots so we can filter out roots that aren't
1144 # 'real' roots (i.e. are descended from other roots).
1144 # 'real' roots (i.e. are descended from other roots).
1145 roots = descendants.copy()
1145 roots = descendants.copy()
1146 # Our topologically sorted list of output nodes.
1146 # Our topologically sorted list of output nodes.
1147 orderedout = []
1147 orderedout = []
1148 # Don't start at nullid since we don't want nullid in our output list,
1148 # Don't start at nullid since we don't want nullid in our output list,
1149 # and if nullid shows up in descendants, empty parents will look like
1149 # and if nullid shows up in descendants, empty parents will look like
1150 # they're descendants.
1150 # they're descendants.
1151 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1151 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1152 n = self.node(r)
1152 n = self.node(r)
1153 isdescendant = False
1153 isdescendant = False
1154 if lowestrev == nullrev: # Everybody is a descendant of nullid
1154 if lowestrev == nullrev: # Everybody is a descendant of nullid
1155 isdescendant = True
1155 isdescendant = True
1156 elif n in descendants:
1156 elif n in descendants:
1157 # n is already a descendant
1157 # n is already a descendant
1158 isdescendant = True
1158 isdescendant = True
1159 # This check only needs to be done here because all the roots
1159 # This check only needs to be done here because all the roots
1160 # will start being marked is descendants before the loop.
1160 # will start being marked is descendants before the loop.
1161 if n in roots:
1161 if n in roots:
1162 # If n was a root, check if it's a 'real' root.
1162 # If n was a root, check if it's a 'real' root.
1163 p = tuple(self.parents(n))
1163 p = tuple(self.parents(n))
1164 # If any of its parents are descendants, it's not a root.
1164 # If any of its parents are descendants, it's not a root.
1165 if (p[0] in descendants) or (p[1] in descendants):
1165 if (p[0] in descendants) or (p[1] in descendants):
1166 roots.remove(n)
1166 roots.remove(n)
1167 else:
1167 else:
1168 p = tuple(self.parents(n))
1168 p = tuple(self.parents(n))
1169 # A node is a descendant if either of its parents are
1169 # A node is a descendant if either of its parents are
1170 # descendants. (We seeded the dependents list with the roots
1170 # descendants. (We seeded the dependents list with the roots
1171 # up there, remember?)
1171 # up there, remember?)
1172 if (p[0] in descendants) or (p[1] in descendants):
1172 if (p[0] in descendants) or (p[1] in descendants):
1173 descendants.add(n)
1173 descendants.add(n)
1174 isdescendant = True
1174 isdescendant = True
1175 if isdescendant and ((ancestors is None) or (n in ancestors)):
1175 if isdescendant and ((ancestors is None) or (n in ancestors)):
1176 # Only include nodes that are both descendants and ancestors.
1176 # Only include nodes that are both descendants and ancestors.
1177 orderedout.append(n)
1177 orderedout.append(n)
1178 if (ancestors is not None) and (n in heads):
1178 if (ancestors is not None) and (n in heads):
1179 # We're trying to figure out which heads are reachable
1179 # We're trying to figure out which heads are reachable
1180 # from roots.
1180 # from roots.
1181 # Mark this head as having been reached
1181 # Mark this head as having been reached
1182 heads[n] = True
1182 heads[n] = True
1183 elif ancestors is None:
1183 elif ancestors is None:
1184 # Otherwise, we're trying to discover the heads.
1184 # Otherwise, we're trying to discover the heads.
1185 # Assume this is a head because if it isn't, the next step
1185 # Assume this is a head because if it isn't, the next step
1186 # will eventually remove it.
1186 # will eventually remove it.
1187 heads[n] = True
1187 heads[n] = True
1188 # But, obviously its parents aren't.
1188 # But, obviously its parents aren't.
1189 for p in self.parents(n):
1189 for p in self.parents(n):
1190 heads.pop(p, None)
1190 heads.pop(p, None)
1191 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1191 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1192 roots = list(roots)
1192 roots = list(roots)
1193 assert orderedout
1193 assert orderedout
1194 assert roots
1194 assert roots
1195 assert heads
1195 assert heads
1196 return (orderedout, roots, heads)
1196 return (orderedout, roots, heads)
1197
1197
1198 def headrevs(self, revs=None):
1198 def headrevs(self, revs=None):
1199 if revs is None:
1199 if revs is None:
1200 try:
1200 try:
1201 return self.index.headrevs()
1201 return self.index.headrevs()
1202 except AttributeError:
1202 except AttributeError:
1203 return self._headrevs()
1203 return self._headrevs()
1204 if rustdagop is not None:
1204 if rustdagop is not None:
1205 return rustdagop.headrevs(self.index, revs)
1205 return rustdagop.headrevs(self.index, revs)
1206 return dagop.headrevs(revs, self._uncheckedparentrevs)
1206 return dagop.headrevs(revs, self._uncheckedparentrevs)
1207
1207
1208 def computephases(self, roots):
1208 def computephases(self, roots):
1209 return self.index.computephasesmapsets(roots)
1209 return self.index.computephasesmapsets(roots)
1210
1210
1211 def _headrevs(self):
1211 def _headrevs(self):
1212 count = len(self)
1212 count = len(self)
1213 if not count:
1213 if not count:
1214 return [nullrev]
1214 return [nullrev]
1215 # we won't iter over filtered rev so nobody is a head at start
1215 # we won't iter over filtered rev so nobody is a head at start
1216 ishead = [0] * (count + 1)
1216 ishead = [0] * (count + 1)
1217 index = self.index
1217 index = self.index
1218 for r in self:
1218 for r in self:
1219 ishead[r] = 1 # I may be an head
1219 ishead[r] = 1 # I may be an head
1220 e = index[r]
1220 e = index[r]
1221 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1221 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1222 return [r for r, val in enumerate(ishead) if val]
1222 return [r for r, val in enumerate(ishead) if val]
1223
1223
1224 def heads(self, start=None, stop=None):
1224 def heads(self, start=None, stop=None):
1225 """return the list of all nodes that have no children
1225 """return the list of all nodes that have no children
1226
1226
1227 if start is specified, only heads that are descendants of
1227 if start is specified, only heads that are descendants of
1228 start will be returned
1228 start will be returned
1229 if stop is specified, it will consider all the revs from stop
1229 if stop is specified, it will consider all the revs from stop
1230 as if they had no children
1230 as if they had no children
1231 """
1231 """
1232 if start is None and stop is None:
1232 if start is None and stop is None:
1233 if not len(self):
1233 if not len(self):
1234 return [nullid]
1234 return [nullid]
1235 return [self.node(r) for r in self.headrevs()]
1235 return [self.node(r) for r in self.headrevs()]
1236
1236
1237 if start is None:
1237 if start is None:
1238 start = nullrev
1238 start = nullrev
1239 else:
1239 else:
1240 start = self.rev(start)
1240 start = self.rev(start)
1241
1241
1242 stoprevs = set(self.rev(n) for n in stop or [])
1242 stoprevs = set(self.rev(n) for n in stop or [])
1243
1243
1244 revs = dagop.headrevssubset(
1244 revs = dagop.headrevssubset(
1245 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1245 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1246 )
1246 )
1247
1247
1248 return [self.node(rev) for rev in revs]
1248 return [self.node(rev) for rev in revs]
1249
1249
1250 def children(self, node):
1250 def children(self, node):
1251 """find the children of a given node"""
1251 """find the children of a given node"""
1252 c = []
1252 c = []
1253 p = self.rev(node)
1253 p = self.rev(node)
1254 for r in self.revs(start=p + 1):
1254 for r in self.revs(start=p + 1):
1255 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1255 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1256 if prevs:
1256 if prevs:
1257 for pr in prevs:
1257 for pr in prevs:
1258 if pr == p:
1258 if pr == p:
1259 c.append(self.node(r))
1259 c.append(self.node(r))
1260 elif p == nullrev:
1260 elif p == nullrev:
1261 c.append(self.node(r))
1261 c.append(self.node(r))
1262 return c
1262 return c
1263
1263
1264 def commonancestorsheads(self, a, b):
1264 def commonancestorsheads(self, a, b):
1265 """calculate all the heads of the common ancestors of nodes a and b"""
1265 """calculate all the heads of the common ancestors of nodes a and b"""
1266 a, b = self.rev(a), self.rev(b)
1266 a, b = self.rev(a), self.rev(b)
1267 ancs = self._commonancestorsheads(a, b)
1267 ancs = self._commonancestorsheads(a, b)
1268 return pycompat.maplist(self.node, ancs)
1268 return pycompat.maplist(self.node, ancs)
1269
1269
1270 def _commonancestorsheads(self, *revs):
1270 def _commonancestorsheads(self, *revs):
1271 """calculate all the heads of the common ancestors of revs"""
1271 """calculate all the heads of the common ancestors of revs"""
1272 try:
1272 try:
1273 ancs = self.index.commonancestorsheads(*revs)
1273 ancs = self.index.commonancestorsheads(*revs)
1274 except (AttributeError, OverflowError): # C implementation failed
1274 except (AttributeError, OverflowError): # C implementation failed
1275 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1275 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1276 return ancs
1276 return ancs
1277
1277
1278 def isancestor(self, a, b):
1278 def isancestor(self, a, b):
1279 """return True if node a is an ancestor of node b
1279 """return True if node a is an ancestor of node b
1280
1280
1281 A revision is considered an ancestor of itself."""
1281 A revision is considered an ancestor of itself."""
1282 a, b = self.rev(a), self.rev(b)
1282 a, b = self.rev(a), self.rev(b)
1283 return self.isancestorrev(a, b)
1283 return self.isancestorrev(a, b)
1284
1284
1285 def isancestorrev(self, a, b):
1285 def isancestorrev(self, a, b):
1286 """return True if revision a is an ancestor of revision b
1286 """return True if revision a is an ancestor of revision b
1287
1287
1288 A revision is considered an ancestor of itself.
1288 A revision is considered an ancestor of itself.
1289
1289
1290 The implementation of this is trivial but the use of
1290 The implementation of this is trivial but the use of
1291 reachableroots is not."""
1291 reachableroots is not."""
1292 if a == nullrev:
1292 if a == nullrev:
1293 return True
1293 return True
1294 elif a == b:
1294 elif a == b:
1295 return True
1295 return True
1296 elif a > b:
1296 elif a > b:
1297 return False
1297 return False
1298 return bool(self.reachableroots(a, [b], [a], includepath=False))
1298 return bool(self.reachableroots(a, [b], [a], includepath=False))
1299
1299
1300 def reachableroots(self, minroot, heads, roots, includepath=False):
1300 def reachableroots(self, minroot, heads, roots, includepath=False):
1301 """return (heads(::(<roots> and <roots>::<heads>)))
1301 """return (heads(::(<roots> and <roots>::<heads>)))
1302
1302
1303 If includepath is True, return (<roots>::<heads>)."""
1303 If includepath is True, return (<roots>::<heads>)."""
1304 try:
1304 try:
1305 return self.index.reachableroots2(
1305 return self.index.reachableroots2(
1306 minroot, heads, roots, includepath
1306 minroot, heads, roots, includepath
1307 )
1307 )
1308 except AttributeError:
1308 except AttributeError:
1309 return dagop._reachablerootspure(
1309 return dagop._reachablerootspure(
1310 self.parentrevs, minroot, roots, heads, includepath
1310 self.parentrevs, minroot, roots, heads, includepath
1311 )
1311 )
1312
1312
1313 def ancestor(self, a, b):
1313 def ancestor(self, a, b):
1314 """calculate the "best" common ancestor of nodes a and b"""
1314 """calculate the "best" common ancestor of nodes a and b"""
1315
1315
1316 a, b = self.rev(a), self.rev(b)
1316 a, b = self.rev(a), self.rev(b)
1317 try:
1317 try:
1318 ancs = self.index.ancestors(a, b)
1318 ancs = self.index.ancestors(a, b)
1319 except (AttributeError, OverflowError):
1319 except (AttributeError, OverflowError):
1320 ancs = ancestor.ancestors(self.parentrevs, a, b)
1320 ancs = ancestor.ancestors(self.parentrevs, a, b)
1321 if ancs:
1321 if ancs:
1322 # choose a consistent winner when there's a tie
1322 # choose a consistent winner when there's a tie
1323 return min(map(self.node, ancs))
1323 return min(map(self.node, ancs))
1324 return nullid
1324 return nullid
1325
1325
1326 def _match(self, id):
1326 def _match(self, id):
1327 if isinstance(id, int):
1327 if isinstance(id, int):
1328 # rev
1328 # rev
1329 return self.node(id)
1329 return self.node(id)
1330 if len(id) == 20:
1330 if len(id) == 20:
1331 # possibly a binary node
1331 # possibly a binary node
1332 # odds of a binary node being all hex in ASCII are 1 in 10**25
1332 # odds of a binary node being all hex in ASCII are 1 in 10**25
1333 try:
1333 try:
1334 node = id
1334 node = id
1335 self.rev(node) # quick search the index
1335 self.rev(node) # quick search the index
1336 return node
1336 return node
1337 except error.LookupError:
1337 except error.LookupError:
1338 pass # may be partial hex id
1338 pass # may be partial hex id
1339 try:
1339 try:
1340 # str(rev)
1340 # str(rev)
1341 rev = int(id)
1341 rev = int(id)
1342 if b"%d" % rev != id:
1342 if b"%d" % rev != id:
1343 raise ValueError
1343 raise ValueError
1344 if rev < 0:
1344 if rev < 0:
1345 rev = len(self) + rev
1345 rev = len(self) + rev
1346 if rev < 0 or rev >= len(self):
1346 if rev < 0 or rev >= len(self):
1347 raise ValueError
1347 raise ValueError
1348 return self.node(rev)
1348 return self.node(rev)
1349 except (ValueError, OverflowError):
1349 except (ValueError, OverflowError):
1350 pass
1350 pass
1351 if len(id) == 40:
1351 if len(id) == 40:
1352 try:
1352 try:
1353 # a full hex nodeid?
1353 # a full hex nodeid?
1354 node = bin(id)
1354 node = bin(id)
1355 self.rev(node)
1355 self.rev(node)
1356 return node
1356 return node
1357 except (TypeError, error.LookupError):
1357 except (TypeError, error.LookupError):
1358 pass
1358 pass
1359
1359
1360 def _partialmatch(self, id):
1360 def _partialmatch(self, id):
1361 # we don't care wdirfilenodeids as they should be always full hash
1361 # we don't care wdirfilenodeids as they should be always full hash
1362 maybewdir = wdirhex.startswith(id)
1362 maybewdir = wdirhex.startswith(id)
1363 try:
1363 try:
1364 partial = self.index.partialmatch(id)
1364 partial = self.index.partialmatch(id)
1365 if partial and self.hasnode(partial):
1365 if partial and self.hasnode(partial):
1366 if maybewdir:
1366 if maybewdir:
1367 # single 'ff...' match in radix tree, ambiguous with wdir
1367 # single 'ff...' match in radix tree, ambiguous with wdir
1368 raise error.RevlogError
1368 raise error.RevlogError
1369 return partial
1369 return partial
1370 if maybewdir:
1370 if maybewdir:
1371 # no 'ff...' match in radix tree, wdir identified
1371 # no 'ff...' match in radix tree, wdir identified
1372 raise error.WdirUnsupported
1372 raise error.WdirUnsupported
1373 return None
1373 return None
1374 except error.RevlogError:
1374 except error.RevlogError:
1375 # parsers.c radix tree lookup gave multiple matches
1375 # parsers.c radix tree lookup gave multiple matches
1376 # fast path: for unfiltered changelog, radix tree is accurate
1376 # fast path: for unfiltered changelog, radix tree is accurate
1377 if not getattr(self, 'filteredrevs', None):
1377 if not getattr(self, 'filteredrevs', None):
1378 raise error.AmbiguousPrefixLookupError(
1378 raise error.AmbiguousPrefixLookupError(
1379 id, self.indexfile, _(b'ambiguous identifier')
1379 id, self.indexfile, _(b'ambiguous identifier')
1380 )
1380 )
1381 # fall through to slow path that filters hidden revisions
1381 # fall through to slow path that filters hidden revisions
1382 except (AttributeError, ValueError):
1382 except (AttributeError, ValueError):
1383 # we are pure python, or key was too short to search radix tree
1383 # we are pure python, or key was too short to search radix tree
1384 pass
1384 pass
1385
1385
1386 if id in self._pcache:
1386 if id in self._pcache:
1387 return self._pcache[id]
1387 return self._pcache[id]
1388
1388
1389 if len(id) <= 40:
1389 if len(id) <= 40:
1390 try:
1390 try:
1391 # hex(node)[:...]
1391 # hex(node)[:...]
1392 l = len(id) // 2 # grab an even number of digits
1392 l = len(id) // 2 # grab an even number of digits
1393 prefix = bin(id[: l * 2])
1393 prefix = bin(id[: l * 2])
1394 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1394 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1395 nl = [
1395 nl = [
1396 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1396 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1397 ]
1397 ]
1398 if nullhex.startswith(id):
1398 if nullhex.startswith(id):
1399 nl.append(nullid)
1399 nl.append(nullid)
1400 if len(nl) > 0:
1400 if len(nl) > 0:
1401 if len(nl) == 1 and not maybewdir:
1401 if len(nl) == 1 and not maybewdir:
1402 self._pcache[id] = nl[0]
1402 self._pcache[id] = nl[0]
1403 return nl[0]
1403 return nl[0]
1404 raise error.AmbiguousPrefixLookupError(
1404 raise error.AmbiguousPrefixLookupError(
1405 id, self.indexfile, _(b'ambiguous identifier')
1405 id, self.indexfile, _(b'ambiguous identifier')
1406 )
1406 )
1407 if maybewdir:
1407 if maybewdir:
1408 raise error.WdirUnsupported
1408 raise error.WdirUnsupported
1409 return None
1409 return None
1410 except TypeError:
1410 except TypeError:
1411 pass
1411 pass
1412
1412
1413 def lookup(self, id):
1413 def lookup(self, id):
1414 """locate a node based on:
1414 """locate a node based on:
1415 - revision number or str(revision number)
1415 - revision number or str(revision number)
1416 - nodeid or subset of hex nodeid
1416 - nodeid or subset of hex nodeid
1417 """
1417 """
1418 n = self._match(id)
1418 n = self._match(id)
1419 if n is not None:
1419 if n is not None:
1420 return n
1420 return n
1421 n = self._partialmatch(id)
1421 n = self._partialmatch(id)
1422 if n:
1422 if n:
1423 return n
1423 return n
1424
1424
1425 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1425 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1426
1426
1427 def shortest(self, node, minlength=1):
1427 def shortest(self, node, minlength=1):
1428 """Find the shortest unambiguous prefix that matches node."""
1428 """Find the shortest unambiguous prefix that matches node."""
1429
1429
1430 def isvalid(prefix):
1430 def isvalid(prefix):
1431 try:
1431 try:
1432 matchednode = self._partialmatch(prefix)
1432 matchednode = self._partialmatch(prefix)
1433 except error.AmbiguousPrefixLookupError:
1433 except error.AmbiguousPrefixLookupError:
1434 return False
1434 return False
1435 except error.WdirUnsupported:
1435 except error.WdirUnsupported:
1436 # single 'ff...' match
1436 # single 'ff...' match
1437 return True
1437 return True
1438 if matchednode is None:
1438 if matchednode is None:
1439 raise error.LookupError(node, self.indexfile, _(b'no node'))
1439 raise error.LookupError(node, self.indexfile, _(b'no node'))
1440 return True
1440 return True
1441
1441
1442 def maybewdir(prefix):
1442 def maybewdir(prefix):
1443 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1443 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1444
1444
1445 hexnode = hex(node)
1445 hexnode = hex(node)
1446
1446
1447 def disambiguate(hexnode, minlength):
1447 def disambiguate(hexnode, minlength):
1448 """Disambiguate against wdirid."""
1448 """Disambiguate against wdirid."""
1449 for length in range(minlength, 41):
1449 for length in range(minlength, 41):
1450 prefix = hexnode[:length]
1450 prefix = hexnode[:length]
1451 if not maybewdir(prefix):
1451 if not maybewdir(prefix):
1452 return prefix
1452 return prefix
1453
1453
1454 if not getattr(self, 'filteredrevs', None):
1454 if not getattr(self, 'filteredrevs', None):
1455 try:
1455 try:
1456 length = max(self.index.shortest(node), minlength)
1456 length = max(self.index.shortest(node), minlength)
1457 return disambiguate(hexnode, length)
1457 return disambiguate(hexnode, length)
1458 except error.RevlogError:
1458 except error.RevlogError:
1459 if node != wdirid:
1459 if node != wdirid:
1460 raise error.LookupError(node, self.indexfile, _(b'no node'))
1460 raise error.LookupError(node, self.indexfile, _(b'no node'))
1461 except AttributeError:
1461 except AttributeError:
1462 # Fall through to pure code
1462 # Fall through to pure code
1463 pass
1463 pass
1464
1464
1465 if node == wdirid:
1465 if node == wdirid:
1466 for length in range(minlength, 41):
1466 for length in range(minlength, 41):
1467 prefix = hexnode[:length]
1467 prefix = hexnode[:length]
1468 if isvalid(prefix):
1468 if isvalid(prefix):
1469 return prefix
1469 return prefix
1470
1470
1471 for length in range(minlength, 41):
1471 for length in range(minlength, 41):
1472 prefix = hexnode[:length]
1472 prefix = hexnode[:length]
1473 if isvalid(prefix):
1473 if isvalid(prefix):
1474 return disambiguate(hexnode, length)
1474 return disambiguate(hexnode, length)
1475
1475
1476 def cmp(self, node, text):
1476 def cmp(self, node, text):
1477 """compare text with a given file revision
1477 """compare text with a given file revision
1478
1478
1479 returns True if text is different than what is stored.
1479 returns True if text is different than what is stored.
1480 """
1480 """
1481 p1, p2 = self.parents(node)
1481 p1, p2 = self.parents(node)
1482 return storageutil.hashrevisionsha1(text, p1, p2) != node
1482 return storageutil.hashrevisionsha1(text, p1, p2) != node
1483
1483
1484 def _cachesegment(self, offset, data):
1484 def _cachesegment(self, offset, data):
1485 """Add a segment to the revlog cache.
1485 """Add a segment to the revlog cache.
1486
1486
1487 Accepts an absolute offset and the data that is at that location.
1487 Accepts an absolute offset and the data that is at that location.
1488 """
1488 """
1489 o, d = self._chunkcache
1489 o, d = self._chunkcache
1490 # try to add to existing cache
1490 # try to add to existing cache
1491 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1491 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1492 self._chunkcache = o, d + data
1492 self._chunkcache = o, d + data
1493 else:
1493 else:
1494 self._chunkcache = offset, data
1494 self._chunkcache = offset, data
1495
1495
1496 def _readsegment(self, offset, length, df=None):
1496 def _readsegment(self, offset, length, df=None):
1497 """Load a segment of raw data from the revlog.
1497 """Load a segment of raw data from the revlog.
1498
1498
1499 Accepts an absolute offset, length to read, and an optional existing
1499 Accepts an absolute offset, length to read, and an optional existing
1500 file handle to read from.
1500 file handle to read from.
1501
1501
1502 If an existing file handle is passed, it will be seeked and the
1502 If an existing file handle is passed, it will be seeked and the
1503 original seek position will NOT be restored.
1503 original seek position will NOT be restored.
1504
1504
1505 Returns a str or buffer of raw byte data.
1505 Returns a str or buffer of raw byte data.
1506
1506
1507 Raises if the requested number of bytes could not be read.
1507 Raises if the requested number of bytes could not be read.
1508 """
1508 """
1509 # Cache data both forward and backward around the requested
1509 # Cache data both forward and backward around the requested
1510 # data, in a fixed size window. This helps speed up operations
1510 # data, in a fixed size window. This helps speed up operations
1511 # involving reading the revlog backwards.
1511 # involving reading the revlog backwards.
1512 cachesize = self._chunkcachesize
1512 cachesize = self._chunkcachesize
1513 realoffset = offset & ~(cachesize - 1)
1513 realoffset = offset & ~(cachesize - 1)
1514 reallength = (
1514 reallength = (
1515 (offset + length + cachesize) & ~(cachesize - 1)
1515 (offset + length + cachesize) & ~(cachesize - 1)
1516 ) - realoffset
1516 ) - realoffset
1517 with self._datareadfp(df) as df:
1517 with self._datareadfp(df) as df:
1518 df.seek(realoffset)
1518 df.seek(realoffset)
1519 d = df.read(reallength)
1519 d = df.read(reallength)
1520
1520
1521 self._cachesegment(realoffset, d)
1521 self._cachesegment(realoffset, d)
1522 if offset != realoffset or reallength != length:
1522 if offset != realoffset or reallength != length:
1523 startoffset = offset - realoffset
1523 startoffset = offset - realoffset
1524 if len(d) - startoffset < length:
1524 if len(d) - startoffset < length:
1525 raise error.RevlogError(
1525 raise error.RevlogError(
1526 _(
1526 _(
1527 b'partial read of revlog %s; expected %d bytes from '
1527 b'partial read of revlog %s; expected %d bytes from '
1528 b'offset %d, got %d'
1528 b'offset %d, got %d'
1529 )
1529 )
1530 % (
1530 % (
1531 self.indexfile if self._inline else self.datafile,
1531 self.indexfile if self._inline else self.datafile,
1532 length,
1532 length,
1533 realoffset,
1533 realoffset,
1534 len(d) - startoffset,
1534 len(d) - startoffset,
1535 )
1535 )
1536 )
1536 )
1537
1537
1538 return util.buffer(d, startoffset, length)
1538 return util.buffer(d, startoffset, length)
1539
1539
1540 if len(d) < length:
1540 if len(d) < length:
1541 raise error.RevlogError(
1541 raise error.RevlogError(
1542 _(
1542 _(
1543 b'partial read of revlog %s; expected %d bytes from offset '
1543 b'partial read of revlog %s; expected %d bytes from offset '
1544 b'%d, got %d'
1544 b'%d, got %d'
1545 )
1545 )
1546 % (
1546 % (
1547 self.indexfile if self._inline else self.datafile,
1547 self.indexfile if self._inline else self.datafile,
1548 length,
1548 length,
1549 offset,
1549 offset,
1550 len(d),
1550 len(d),
1551 )
1551 )
1552 )
1552 )
1553
1553
1554 return d
1554 return d
1555
1555
1556 def _getsegment(self, offset, length, df=None):
1556 def _getsegment(self, offset, length, df=None):
1557 """Obtain a segment of raw data from the revlog.
1557 """Obtain a segment of raw data from the revlog.
1558
1558
1559 Accepts an absolute offset, length of bytes to obtain, and an
1559 Accepts an absolute offset, length of bytes to obtain, and an
1560 optional file handle to the already-opened revlog. If the file
1560 optional file handle to the already-opened revlog. If the file
1561 handle is used, it's original seek position will not be preserved.
1561 handle is used, it's original seek position will not be preserved.
1562
1562
1563 Requests for data may be returned from a cache.
1563 Requests for data may be returned from a cache.
1564
1564
1565 Returns a str or a buffer instance of raw byte data.
1565 Returns a str or a buffer instance of raw byte data.
1566 """
1566 """
1567 o, d = self._chunkcache
1567 o, d = self._chunkcache
1568 l = len(d)
1568 l = len(d)
1569
1569
1570 # is it in the cache?
1570 # is it in the cache?
1571 cachestart = offset - o
1571 cachestart = offset - o
1572 cacheend = cachestart + length
1572 cacheend = cachestart + length
1573 if cachestart >= 0 and cacheend <= l:
1573 if cachestart >= 0 and cacheend <= l:
1574 if cachestart == 0 and cacheend == l:
1574 if cachestart == 0 and cacheend == l:
1575 return d # avoid a copy
1575 return d # avoid a copy
1576 return util.buffer(d, cachestart, cacheend - cachestart)
1576 return util.buffer(d, cachestart, cacheend - cachestart)
1577
1577
1578 return self._readsegment(offset, length, df=df)
1578 return self._readsegment(offset, length, df=df)
1579
1579
1580 def _getsegmentforrevs(self, startrev, endrev, df=None):
1580 def _getsegmentforrevs(self, startrev, endrev, df=None):
1581 """Obtain a segment of raw data corresponding to a range of revisions.
1581 """Obtain a segment of raw data corresponding to a range of revisions.
1582
1582
1583 Accepts the start and end revisions and an optional already-open
1583 Accepts the start and end revisions and an optional already-open
1584 file handle to be used for reading. If the file handle is read, its
1584 file handle to be used for reading. If the file handle is read, its
1585 seek position will not be preserved.
1585 seek position will not be preserved.
1586
1586
1587 Requests for data may be satisfied by a cache.
1587 Requests for data may be satisfied by a cache.
1588
1588
1589 Returns a 2-tuple of (offset, data) for the requested range of
1589 Returns a 2-tuple of (offset, data) for the requested range of
1590 revisions. Offset is the integer offset from the beginning of the
1590 revisions. Offset is the integer offset from the beginning of the
1591 revlog and data is a str or buffer of the raw byte data.
1591 revlog and data is a str or buffer of the raw byte data.
1592
1592
1593 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1593 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1594 to determine where each revision's data begins and ends.
1594 to determine where each revision's data begins and ends.
1595 """
1595 """
1596 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1596 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1597 # (functions are expensive).
1597 # (functions are expensive).
1598 index = self.index
1598 index = self.index
1599 istart = index[startrev]
1599 istart = index[startrev]
1600 start = int(istart[0] >> 16)
1600 start = int(istart[0] >> 16)
1601 if startrev == endrev:
1601 if startrev == endrev:
1602 end = start + istart[1]
1602 end = start + istart[1]
1603 else:
1603 else:
1604 iend = index[endrev]
1604 iend = index[endrev]
1605 end = int(iend[0] >> 16) + iend[1]
1605 end = int(iend[0] >> 16) + iend[1]
1606
1606
1607 if self._inline:
1607 if self._inline:
1608 start += (startrev + 1) * self._io.size
1608 start += (startrev + 1) * self._io.size
1609 end += (endrev + 1) * self._io.size
1609 end += (endrev + 1) * self._io.size
1610 length = end - start
1610 length = end - start
1611
1611
1612 return start, self._getsegment(start, length, df=df)
1612 return start, self._getsegment(start, length, df=df)
1613
1613
1614 def _chunk(self, rev, df=None):
1614 def _chunk(self, rev, df=None):
1615 """Obtain a single decompressed chunk for a revision.
1615 """Obtain a single decompressed chunk for a revision.
1616
1616
1617 Accepts an integer revision and an optional already-open file handle
1617 Accepts an integer revision and an optional already-open file handle
1618 to be used for reading. If used, the seek position of the file will not
1618 to be used for reading. If used, the seek position of the file will not
1619 be preserved.
1619 be preserved.
1620
1620
1621 Returns a str holding uncompressed data for the requested revision.
1621 Returns a str holding uncompressed data for the requested revision.
1622 """
1622 """
1623 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1623 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1624
1624
1625 def _chunks(self, revs, df=None, targetsize=None):
1625 def _chunks(self, revs, df=None, targetsize=None):
1626 """Obtain decompressed chunks for the specified revisions.
1626 """Obtain decompressed chunks for the specified revisions.
1627
1627
1628 Accepts an iterable of numeric revisions that are assumed to be in
1628 Accepts an iterable of numeric revisions that are assumed to be in
1629 ascending order. Also accepts an optional already-open file handle
1629 ascending order. Also accepts an optional already-open file handle
1630 to be used for reading. If used, the seek position of the file will
1630 to be used for reading. If used, the seek position of the file will
1631 not be preserved.
1631 not be preserved.
1632
1632
1633 This function is similar to calling ``self._chunk()`` multiple times,
1633 This function is similar to calling ``self._chunk()`` multiple times,
1634 but is faster.
1634 but is faster.
1635
1635
1636 Returns a list with decompressed data for each requested revision.
1636 Returns a list with decompressed data for each requested revision.
1637 """
1637 """
1638 if not revs:
1638 if not revs:
1639 return []
1639 return []
1640 start = self.start
1640 start = self.start
1641 length = self.length
1641 length = self.length
1642 inline = self._inline
1642 inline = self._inline
1643 iosize = self._io.size
1643 iosize = self._io.size
1644 buffer = util.buffer
1644 buffer = util.buffer
1645
1645
1646 l = []
1646 l = []
1647 ladd = l.append
1647 ladd = l.append
1648
1648
1649 if not self._withsparseread:
1649 if not self._withsparseread:
1650 slicedchunks = (revs,)
1650 slicedchunks = (revs,)
1651 else:
1651 else:
1652 slicedchunks = deltautil.slicechunk(
1652 slicedchunks = deltautil.slicechunk(
1653 self, revs, targetsize=targetsize
1653 self, revs, targetsize=targetsize
1654 )
1654 )
1655
1655
1656 for revschunk in slicedchunks:
1656 for revschunk in slicedchunks:
1657 firstrev = revschunk[0]
1657 firstrev = revschunk[0]
1658 # Skip trailing revisions with empty diff
1658 # Skip trailing revisions with empty diff
1659 for lastrev in revschunk[::-1]:
1659 for lastrev in revschunk[::-1]:
1660 if length(lastrev) != 0:
1660 if length(lastrev) != 0:
1661 break
1661 break
1662
1662
1663 try:
1663 try:
1664 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1664 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1665 except OverflowError:
1665 except OverflowError:
1666 # issue4215 - we can't cache a run of chunks greater than
1666 # issue4215 - we can't cache a run of chunks greater than
1667 # 2G on Windows
1667 # 2G on Windows
1668 return [self._chunk(rev, df=df) for rev in revschunk]
1668 return [self._chunk(rev, df=df) for rev in revschunk]
1669
1669
1670 decomp = self.decompress
1670 decomp = self.decompress
1671 for rev in revschunk:
1671 for rev in revschunk:
1672 chunkstart = start(rev)
1672 chunkstart = start(rev)
1673 if inline:
1673 if inline:
1674 chunkstart += (rev + 1) * iosize
1674 chunkstart += (rev + 1) * iosize
1675 chunklength = length(rev)
1675 chunklength = length(rev)
1676 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1676 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1677
1677
1678 return l
1678 return l
1679
1679
1680 def _chunkclear(self):
1680 def _chunkclear(self):
1681 """Clear the raw chunk cache."""
1681 """Clear the raw chunk cache."""
1682 self._chunkcache = (0, b'')
1682 self._chunkcache = (0, b'')
1683
1683
1684 def deltaparent(self, rev):
1684 def deltaparent(self, rev):
1685 """return deltaparent of the given revision"""
1685 """return deltaparent of the given revision"""
1686 base = self.index[rev][3]
1686 base = self.index[rev][3]
1687 if base == rev:
1687 if base == rev:
1688 return nullrev
1688 return nullrev
1689 elif self._generaldelta:
1689 elif self._generaldelta:
1690 return base
1690 return base
1691 else:
1691 else:
1692 return rev - 1
1692 return rev - 1
1693
1693
1694 def issnapshot(self, rev):
1694 def issnapshot(self, rev):
1695 """tells whether rev is a snapshot
1695 """tells whether rev is a snapshot
1696 """
1696 """
1697 if not self._sparserevlog:
1697 if not self._sparserevlog:
1698 return self.deltaparent(rev) == nullrev
1698 return self.deltaparent(rev) == nullrev
1699 elif util.safehasattr(self.index, b'issnapshot'):
1699 elif util.safehasattr(self.index, b'issnapshot'):
1700 # directly assign the method to cache the testing and access
1700 # directly assign the method to cache the testing and access
1701 self.issnapshot = self.index.issnapshot
1701 self.issnapshot = self.index.issnapshot
1702 return self.issnapshot(rev)
1702 return self.issnapshot(rev)
1703 if rev == nullrev:
1703 if rev == nullrev:
1704 return True
1704 return True
1705 entry = self.index[rev]
1705 entry = self.index[rev]
1706 base = entry[3]
1706 base = entry[3]
1707 if base == rev:
1707 if base == rev:
1708 return True
1708 return True
1709 if base == nullrev:
1709 if base == nullrev:
1710 return True
1710 return True
1711 p1 = entry[5]
1711 p1 = entry[5]
1712 p2 = entry[6]
1712 p2 = entry[6]
1713 if base == p1 or base == p2:
1713 if base == p1 or base == p2:
1714 return False
1714 return False
1715 return self.issnapshot(base)
1715 return self.issnapshot(base)
1716
1716
1717 def snapshotdepth(self, rev):
1717 def snapshotdepth(self, rev):
1718 """number of snapshot in the chain before this one"""
1718 """number of snapshot in the chain before this one"""
1719 if not self.issnapshot(rev):
1719 if not self.issnapshot(rev):
1720 raise error.ProgrammingError(b'revision %d not a snapshot')
1720 raise error.ProgrammingError(b'revision %d not a snapshot')
1721 return len(self._deltachain(rev)[0]) - 1
1721 return len(self._deltachain(rev)[0]) - 1
1722
1722
1723 def revdiff(self, rev1, rev2):
1723 def revdiff(self, rev1, rev2):
1724 """return or calculate a delta between two revisions
1724 """return or calculate a delta between two revisions
1725
1725
1726 The delta calculated is in binary form and is intended to be written to
1726 The delta calculated is in binary form and is intended to be written to
1727 revlog data directly. So this function needs raw revision data.
1727 revlog data directly. So this function needs raw revision data.
1728 """
1728 """
1729 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1729 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1730 return bytes(self._chunk(rev2))
1730 return bytes(self._chunk(rev2))
1731
1731
1732 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1732 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1733
1733
1734 def _processflags(self, text, flags, operation, raw=False):
1734 def _processflags(self, text, flags, operation, raw=False):
1735 """deprecated entry point to access flag processors"""
1735 """deprecated entry point to access flag processors"""
1736 msg = b'_processflag(...) use the specialized variant'
1736 msg = b'_processflag(...) use the specialized variant'
1737 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1737 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1738 if raw:
1738 if raw:
1739 return text, flagutil.processflagsraw(self, text, flags)
1739 return text, flagutil.processflagsraw(self, text, flags)
1740 elif operation == b'read':
1740 elif operation == b'read':
1741 return flagutil.processflagsread(self, text, flags)
1741 return flagutil.processflagsread(self, text, flags)
1742 else: # write operation
1742 else: # write operation
1743 return flagutil.processflagswrite(self, text, flags)
1743 return flagutil.processflagswrite(self, text, flags)
1744
1744
1745 def revision(self, nodeorrev, _df=None, raw=False):
1745 def revision(self, nodeorrev, _df=None, raw=False):
1746 """return an uncompressed revision of a given node or revision
1746 """return an uncompressed revision of a given node or revision
1747 number.
1747 number.
1748
1748
1749 _df - an existing file handle to read from. (internal-only)
1749 _df - an existing file handle to read from. (internal-only)
1750 raw - an optional argument specifying if the revision data is to be
1750 raw - an optional argument specifying if the revision data is to be
1751 treated as raw data when applying flag transforms. 'raw' should be set
1751 treated as raw data when applying flag transforms. 'raw' should be set
1752 to True when generating changegroups or in debug commands.
1752 to True when generating changegroups or in debug commands.
1753 """
1753 """
1754 if raw:
1754 if raw:
1755 msg = (
1755 msg = (
1756 b'revlog.revision(..., raw=True) is deprecated, '
1756 b'revlog.revision(..., raw=True) is deprecated, '
1757 b'use revlog.rawdata(...)'
1757 b'use revlog.rawdata(...)'
1758 )
1758 )
1759 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1759 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1760 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1760 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1761
1761
1762 def sidedata(self, nodeorrev, _df=None):
1762 def sidedata(self, nodeorrev, _df=None):
1763 """a map of extra data related to the changeset but not part of the hash
1763 """a map of extra data related to the changeset but not part of the hash
1764
1764
1765 This function currently return a dictionary. However, more advanced
1765 This function currently return a dictionary. However, more advanced
1766 mapping object will likely be used in the future for a more
1766 mapping object will likely be used in the future for a more
1767 efficient/lazy code.
1767 efficient/lazy code.
1768 """
1768 """
1769 return self._revisiondata(nodeorrev, _df)[1]
1769 return self._revisiondata(nodeorrev, _df)[1]
1770
1770
1771 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1771 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1772 # deal with <nodeorrev> argument type
1772 # deal with <nodeorrev> argument type
1773 if isinstance(nodeorrev, int):
1773 if isinstance(nodeorrev, int):
1774 rev = nodeorrev
1774 rev = nodeorrev
1775 node = self.node(rev)
1775 node = self.node(rev)
1776 else:
1776 else:
1777 node = nodeorrev
1777 node = nodeorrev
1778 rev = None
1778 rev = None
1779
1779
1780 # fast path the special `nullid` rev
1780 # fast path the special `nullid` rev
1781 if node == nullid:
1781 if node == nullid:
1782 return b"", {}
1782 return b"", {}
1783
1783
1784 # ``rawtext`` is the text as stored inside the revlog. Might be the
1784 # ``rawtext`` is the text as stored inside the revlog. Might be the
1785 # revision or might need to be processed to retrieve the revision.
1785 # revision or might need to be processed to retrieve the revision.
1786 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1786 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1787
1787
1788 if raw and validated:
1788 if raw and validated:
1789 # if we don't want to process the raw text and that raw
1789 # if we don't want to process the raw text and that raw
1790 # text is cached, we can exit early.
1790 # text is cached, we can exit early.
1791 return rawtext, {}
1791 return rawtext, {}
1792 if rev is None:
1792 if rev is None:
1793 rev = self.rev(node)
1793 rev = self.rev(node)
1794 # the revlog's flag for this revision
1794 # the revlog's flag for this revision
1795 # (usually alter its state or content)
1795 # (usually alter its state or content)
1796 flags = self.flags(rev)
1796 flags = self.flags(rev)
1797
1797
1798 if validated and flags == REVIDX_DEFAULT_FLAGS:
1798 if validated and flags == REVIDX_DEFAULT_FLAGS:
1799 # no extra flags set, no flag processor runs, text = rawtext
1799 # no extra flags set, no flag processor runs, text = rawtext
1800 return rawtext, {}
1800 return rawtext, {}
1801
1801
1802 sidedata = {}
1802 sidedata = {}
1803 if raw:
1803 if raw:
1804 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1804 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1805 text = rawtext
1805 text = rawtext
1806 else:
1806 else:
1807 try:
1807 try:
1808 r = flagutil.processflagsread(self, rawtext, flags)
1808 r = flagutil.processflagsread(self, rawtext, flags)
1809 except error.SidedataHashError as exc:
1809 except error.SidedataHashError as exc:
1810 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1810 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1811 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1811 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1812 raise error.RevlogError(msg)
1812 raise error.RevlogError(msg)
1813 text, validatehash, sidedata = r
1813 text, validatehash, sidedata = r
1814 if validatehash:
1814 if validatehash:
1815 self.checkhash(text, node, rev=rev)
1815 self.checkhash(text, node, rev=rev)
1816 if not validated:
1816 if not validated:
1817 self._revisioncache = (node, rev, rawtext)
1817 self._revisioncache = (node, rev, rawtext)
1818
1818
1819 return text, sidedata
1819 return text, sidedata
1820
1820
1821 def _rawtext(self, node, rev, _df=None):
1821 def _rawtext(self, node, rev, _df=None):
1822 """return the possibly unvalidated rawtext for a revision
1822 """return the possibly unvalidated rawtext for a revision
1823
1823
1824 returns (rev, rawtext, validated)
1824 returns (rev, rawtext, validated)
1825 """
1825 """
1826
1826
1827 # revision in the cache (could be useful to apply delta)
1827 # revision in the cache (could be useful to apply delta)
1828 cachedrev = None
1828 cachedrev = None
1829 # An intermediate text to apply deltas to
1829 # An intermediate text to apply deltas to
1830 basetext = None
1830 basetext = None
1831
1831
1832 # Check if we have the entry in cache
1832 # Check if we have the entry in cache
1833 # The cache entry looks like (node, rev, rawtext)
1833 # The cache entry looks like (node, rev, rawtext)
1834 if self._revisioncache:
1834 if self._revisioncache:
1835 if self._revisioncache[0] == node:
1835 if self._revisioncache[0] == node:
1836 return (rev, self._revisioncache[2], True)
1836 return (rev, self._revisioncache[2], True)
1837 cachedrev = self._revisioncache[1]
1837 cachedrev = self._revisioncache[1]
1838
1838
1839 if rev is None:
1839 if rev is None:
1840 rev = self.rev(node)
1840 rev = self.rev(node)
1841
1841
1842 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1842 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1843 if stopped:
1843 if stopped:
1844 basetext = self._revisioncache[2]
1844 basetext = self._revisioncache[2]
1845
1845
1846 # drop cache to save memory, the caller is expected to
1846 # drop cache to save memory, the caller is expected to
1847 # update self._revisioncache after validating the text
1847 # update self._revisioncache after validating the text
1848 self._revisioncache = None
1848 self._revisioncache = None
1849
1849
1850 targetsize = None
1850 targetsize = None
1851 rawsize = self.index[rev][2]
1851 rawsize = self.index[rev][2]
1852 if 0 <= rawsize:
1852 if 0 <= rawsize:
1853 targetsize = 4 * rawsize
1853 targetsize = 4 * rawsize
1854
1854
1855 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1855 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1856 if basetext is None:
1856 if basetext is None:
1857 basetext = bytes(bins[0])
1857 basetext = bytes(bins[0])
1858 bins = bins[1:]
1858 bins = bins[1:]
1859
1859
1860 rawtext = mdiff.patches(basetext, bins)
1860 rawtext = mdiff.patches(basetext, bins)
1861 del basetext # let us have a chance to free memory early
1861 del basetext # let us have a chance to free memory early
1862 return (rev, rawtext, False)
1862 return (rev, rawtext, False)
1863
1863
1864 def rawdata(self, nodeorrev, _df=None):
1864 def rawdata(self, nodeorrev, _df=None):
1865 """return an uncompressed raw data of a given node or revision number.
1865 """return an uncompressed raw data of a given node or revision number.
1866
1866
1867 _df - an existing file handle to read from. (internal-only)
1867 _df - an existing file handle to read from. (internal-only)
1868 """
1868 """
1869 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1869 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1870
1870
1871 def hash(self, text, p1, p2):
1871 def hash(self, text, p1, p2):
1872 """Compute a node hash.
1872 """Compute a node hash.
1873
1873
1874 Available as a function so that subclasses can replace the hash
1874 Available as a function so that subclasses can replace the hash
1875 as needed.
1875 as needed.
1876 """
1876 """
1877 return storageutil.hashrevisionsha1(text, p1, p2)
1877 return storageutil.hashrevisionsha1(text, p1, p2)
1878
1878
1879 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1879 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1880 """Check node hash integrity.
1880 """Check node hash integrity.
1881
1881
1882 Available as a function so that subclasses can extend hash mismatch
1882 Available as a function so that subclasses can extend hash mismatch
1883 behaviors as needed.
1883 behaviors as needed.
1884 """
1884 """
1885 try:
1885 try:
1886 if p1 is None and p2 is None:
1886 if p1 is None and p2 is None:
1887 p1, p2 = self.parents(node)
1887 p1, p2 = self.parents(node)
1888 if node != self.hash(text, p1, p2):
1888 if node != self.hash(text, p1, p2):
1889 # Clear the revision cache on hash failure. The revision cache
1889 # Clear the revision cache on hash failure. The revision cache
1890 # only stores the raw revision and clearing the cache does have
1890 # only stores the raw revision and clearing the cache does have
1891 # the side-effect that we won't have a cache hit when the raw
1891 # the side-effect that we won't have a cache hit when the raw
1892 # revision data is accessed. But this case should be rare and
1892 # revision data is accessed. But this case should be rare and
1893 # it is extra work to teach the cache about the hash
1893 # it is extra work to teach the cache about the hash
1894 # verification state.
1894 # verification state.
1895 if self._revisioncache and self._revisioncache[0] == node:
1895 if self._revisioncache and self._revisioncache[0] == node:
1896 self._revisioncache = None
1896 self._revisioncache = None
1897
1897
1898 revornode = rev
1898 revornode = rev
1899 if revornode is None:
1899 if revornode is None:
1900 revornode = templatefilters.short(hex(node))
1900 revornode = templatefilters.short(hex(node))
1901 raise error.RevlogError(
1901 raise error.RevlogError(
1902 _(b"integrity check failed on %s:%s")
1902 _(b"integrity check failed on %s:%s")
1903 % (self.indexfile, pycompat.bytestr(revornode))
1903 % (self.indexfile, pycompat.bytestr(revornode))
1904 )
1904 )
1905 except error.RevlogError:
1905 except error.RevlogError:
1906 if self._censorable and storageutil.iscensoredtext(text):
1906 if self._censorable and storageutil.iscensoredtext(text):
1907 raise error.CensoredNodeError(self.indexfile, node, text)
1907 raise error.CensoredNodeError(self.indexfile, node, text)
1908 raise
1908 raise
1909
1909
1910 def _enforceinlinesize(self, tr, fp=None):
1910 def _enforceinlinesize(self, tr, fp=None):
1911 """Check if the revlog is too big for inline and convert if so.
1911 """Check if the revlog is too big for inline and convert if so.
1912
1912
1913 This should be called after revisions are added to the revlog. If the
1913 This should be called after revisions are added to the revlog. If the
1914 revlog has grown too large to be an inline revlog, it will convert it
1914 revlog has grown too large to be an inline revlog, it will convert it
1915 to use multiple index and data files.
1915 to use multiple index and data files.
1916 """
1916 """
1917 tiprev = len(self) - 1
1917 tiprev = len(self) - 1
1918 if (
1918 if (
1919 not self._inline
1919 not self._inline
1920 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1920 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1921 ):
1921 ):
1922 return
1922 return
1923
1923
1924 trinfo = tr.find(self.indexfile)
1924 trinfo = tr.find(self.indexfile)
1925 if trinfo is None:
1925 if trinfo is None:
1926 raise error.RevlogError(
1926 raise error.RevlogError(
1927 _(b"%s not found in the transaction") % self.indexfile
1927 _(b"%s not found in the transaction") % self.indexfile
1928 )
1928 )
1929
1929
1930 trindex = trinfo[2]
1930 trindex = trinfo[2]
1931 if trindex is not None:
1931 if trindex is not None:
1932 dataoff = self.start(trindex)
1932 dataoff = self.start(trindex)
1933 else:
1933 else:
1934 # revlog was stripped at start of transaction, use all leftover data
1934 # revlog was stripped at start of transaction, use all leftover data
1935 trindex = len(self) - 1
1935 trindex = len(self) - 1
1936 dataoff = self.end(tiprev)
1936 dataoff = self.end(tiprev)
1937
1937
1938 tr.add(self.datafile, dataoff)
1938 tr.add(self.datafile, dataoff)
1939
1939
1940 if fp:
1940 if fp:
1941 fp.flush()
1941 fp.flush()
1942 fp.close()
1942 fp.close()
1943 # We can't use the cached file handle after close(). So prevent
1943 # We can't use the cached file handle after close(). So prevent
1944 # its usage.
1944 # its usage.
1945 self._writinghandles = None
1945 self._writinghandles = None
1946
1946
1947 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1947 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1948 for r in self:
1948 for r in self:
1949 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1949 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1950
1950
1951 with self._indexfp(b'w') as fp:
1951 with self._indexfp(b'w') as fp:
1952 self.version &= ~FLAG_INLINE_DATA
1952 self.version &= ~FLAG_INLINE_DATA
1953 self._inline = False
1953 self._inline = False
1954 io = self._io
1954 io = self._io
1955 for i in self:
1955 for i in self:
1956 e = io.packentry(self.index[i], self.node, self.version, i)
1956 e = io.packentry(self.index[i], self.node, self.version, i)
1957 fp.write(e)
1957 fp.write(e)
1958
1958
1959 # the temp file replace the real index when we exit the context
1959 # the temp file replace the real index when we exit the context
1960 # manager
1960 # manager
1961
1961
1962 tr.replace(self.indexfile, trindex * self._io.size)
1962 tr.replace(self.indexfile, trindex * self._io.size)
1963 self._chunkclear()
1963 self._chunkclear()
1964
1964
1965 def _nodeduplicatecallback(self, transaction, node):
1965 def _nodeduplicatecallback(self, transaction, node):
1966 """called when trying to add a node already stored.
1966 """called when trying to add a node already stored.
1967 """
1967 """
1968
1968
1969 def addrevision(
1969 def addrevision(
1970 self,
1970 self,
1971 text,
1971 text,
1972 transaction,
1972 transaction,
1973 link,
1973 link,
1974 p1,
1974 p1,
1975 p2,
1975 p2,
1976 cachedelta=None,
1976 cachedelta=None,
1977 node=None,
1977 node=None,
1978 flags=REVIDX_DEFAULT_FLAGS,
1978 flags=REVIDX_DEFAULT_FLAGS,
1979 deltacomputer=None,
1979 deltacomputer=None,
1980 sidedata=None,
1980 sidedata=None,
1981 ):
1981 ):
1982 """add a revision to the log
1982 """add a revision to the log
1983
1983
1984 text - the revision data to add
1984 text - the revision data to add
1985 transaction - the transaction object used for rollback
1985 transaction - the transaction object used for rollback
1986 link - the linkrev data to add
1986 link - the linkrev data to add
1987 p1, p2 - the parent nodeids of the revision
1987 p1, p2 - the parent nodeids of the revision
1988 cachedelta - an optional precomputed delta
1988 cachedelta - an optional precomputed delta
1989 node - nodeid of revision; typically node is not specified, and it is
1989 node - nodeid of revision; typically node is not specified, and it is
1990 computed by default as hash(text, p1, p2), however subclasses might
1990 computed by default as hash(text, p1, p2), however subclasses might
1991 use different hashing method (and override checkhash() in such case)
1991 use different hashing method (and override checkhash() in such case)
1992 flags - the known flags to set on the revision
1992 flags - the known flags to set on the revision
1993 deltacomputer - an optional deltacomputer instance shared between
1993 deltacomputer - an optional deltacomputer instance shared between
1994 multiple calls
1994 multiple calls
1995 """
1995 """
1996 if link == nullrev:
1996 if link == nullrev:
1997 raise error.RevlogError(
1997 raise error.RevlogError(
1998 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1998 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1999 )
1999 )
2000
2000
2001 if sidedata is None:
2001 if sidedata is None:
2002 sidedata = {}
2002 sidedata = {}
2003 flags = flags & ~REVIDX_SIDEDATA
2003 flags = flags & ~REVIDX_SIDEDATA
2004 elif not self.hassidedata:
2004 elif not self.hassidedata:
2005 raise error.ProgrammingError(
2005 raise error.ProgrammingError(
2006 _(b"trying to add sidedata to a revlog who don't support them")
2006 _(b"trying to add sidedata to a revlog who don't support them")
2007 )
2007 )
2008 else:
2008 else:
2009 flags |= REVIDX_SIDEDATA
2009 flags |= REVIDX_SIDEDATA
2010
2010
2011 if flags:
2011 if flags:
2012 node = node or self.hash(text, p1, p2)
2012 node = node or self.hash(text, p1, p2)
2013
2013
2014 rawtext, validatehash = flagutil.processflagswrite(
2014 rawtext, validatehash = flagutil.processflagswrite(
2015 self, text, flags, sidedata=sidedata
2015 self, text, flags, sidedata=sidedata
2016 )
2016 )
2017
2017
2018 # If the flag processor modifies the revision data, ignore any provided
2018 # If the flag processor modifies the revision data, ignore any provided
2019 # cachedelta.
2019 # cachedelta.
2020 if rawtext != text:
2020 if rawtext != text:
2021 cachedelta = None
2021 cachedelta = None
2022
2022
2023 if len(rawtext) > _maxentrysize:
2023 if len(rawtext) > _maxentrysize:
2024 raise error.RevlogError(
2024 raise error.RevlogError(
2025 _(
2025 _(
2026 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2026 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2027 )
2027 )
2028 % (self.indexfile, len(rawtext))
2028 % (self.indexfile, len(rawtext))
2029 )
2029 )
2030
2030
2031 node = node or self.hash(rawtext, p1, p2)
2031 node = node or self.hash(rawtext, p1, p2)
2032 if self.index.has_node(node):
2032 if self.index.has_node(node):
2033 return node
2033 return node
2034
2034
2035 if validatehash:
2035 if validatehash:
2036 self.checkhash(rawtext, node, p1=p1, p2=p2)
2036 self.checkhash(rawtext, node, p1=p1, p2=p2)
2037
2037
2038 return self.addrawrevision(
2038 return self.addrawrevision(
2039 rawtext,
2039 rawtext,
2040 transaction,
2040 transaction,
2041 link,
2041 link,
2042 p1,
2042 p1,
2043 p2,
2043 p2,
2044 node,
2044 node,
2045 flags,
2045 flags,
2046 cachedelta=cachedelta,
2046 cachedelta=cachedelta,
2047 deltacomputer=deltacomputer,
2047 deltacomputer=deltacomputer,
2048 )
2048 )
2049
2049
2050 def addrawrevision(
2050 def addrawrevision(
2051 self,
2051 self,
2052 rawtext,
2052 rawtext,
2053 transaction,
2053 transaction,
2054 link,
2054 link,
2055 p1,
2055 p1,
2056 p2,
2056 p2,
2057 node,
2057 node,
2058 flags,
2058 flags,
2059 cachedelta=None,
2059 cachedelta=None,
2060 deltacomputer=None,
2060 deltacomputer=None,
2061 ):
2061 ):
2062 """add a raw revision with known flags, node and parents
2062 """add a raw revision with known flags, node and parents
2063 useful when reusing a revision not stored in this revlog (ex: received
2063 useful when reusing a revision not stored in this revlog (ex: received
2064 over wire, or read from an external bundle).
2064 over wire, or read from an external bundle).
2065 """
2065 """
2066 dfh = None
2066 dfh = None
2067 if not self._inline:
2067 if not self._inline:
2068 dfh = self._datafp(b"a+")
2068 dfh = self._datafp(b"a+")
2069 ifh = self._indexfp(b"a+")
2069 ifh = self._indexfp(b"a+")
2070 try:
2070 try:
2071 return self._addrevision(
2071 return self._addrevision(
2072 node,
2072 node,
2073 rawtext,
2073 rawtext,
2074 transaction,
2074 transaction,
2075 link,
2075 link,
2076 p1,
2076 p1,
2077 p2,
2077 p2,
2078 flags,
2078 flags,
2079 cachedelta,
2079 cachedelta,
2080 ifh,
2080 ifh,
2081 dfh,
2081 dfh,
2082 deltacomputer=deltacomputer,
2082 deltacomputer=deltacomputer,
2083 )
2083 )
2084 finally:
2084 finally:
2085 if dfh:
2085 if dfh:
2086 dfh.close()
2086 dfh.close()
2087 ifh.close()
2087 ifh.close()
2088
2088
2089 def compress(self, data):
2089 def compress(self, data):
2090 """Generate a possibly-compressed representation of data."""
2090 """Generate a possibly-compressed representation of data."""
2091 if not data:
2091 if not data:
2092 return b'', data
2092 return b'', data
2093
2093
2094 compressed = self._compressor.compress(data)
2094 compressed = self._compressor.compress(data)
2095
2095
2096 if compressed:
2096 if compressed:
2097 # The revlog compressor added the header in the returned data.
2097 # The revlog compressor added the header in the returned data.
2098 return b'', compressed
2098 return b'', compressed
2099
2099
2100 if data[0:1] == b'\0':
2100 if data[0:1] == b'\0':
2101 return b'', data
2101 return b'', data
2102 return b'u', data
2102 return b'u', data
2103
2103
2104 def decompress(self, data):
2104 def decompress(self, data):
2105 """Decompress a revlog chunk.
2105 """Decompress a revlog chunk.
2106
2106
2107 The chunk is expected to begin with a header identifying the
2107 The chunk is expected to begin with a header identifying the
2108 format type so it can be routed to an appropriate decompressor.
2108 format type so it can be routed to an appropriate decompressor.
2109 """
2109 """
2110 if not data:
2110 if not data:
2111 return data
2111 return data
2112
2112
2113 # Revlogs are read much more frequently than they are written and many
2113 # Revlogs are read much more frequently than they are written and many
2114 # chunks only take microseconds to decompress, so performance is
2114 # chunks only take microseconds to decompress, so performance is
2115 # important here.
2115 # important here.
2116 #
2116 #
2117 # We can make a few assumptions about revlogs:
2117 # We can make a few assumptions about revlogs:
2118 #
2118 #
2119 # 1) the majority of chunks will be compressed (as opposed to inline
2119 # 1) the majority of chunks will be compressed (as opposed to inline
2120 # raw data).
2120 # raw data).
2121 # 2) decompressing *any* data will likely by at least 10x slower than
2121 # 2) decompressing *any* data will likely by at least 10x slower than
2122 # returning raw inline data.
2122 # returning raw inline data.
2123 # 3) we want to prioritize common and officially supported compression
2123 # 3) we want to prioritize common and officially supported compression
2124 # engines
2124 # engines
2125 #
2125 #
2126 # It follows that we want to optimize for "decompress compressed data
2126 # It follows that we want to optimize for "decompress compressed data
2127 # when encoded with common and officially supported compression engines"
2127 # when encoded with common and officially supported compression engines"
2128 # case over "raw data" and "data encoded by less common or non-official
2128 # case over "raw data" and "data encoded by less common or non-official
2129 # compression engines." That is why we have the inline lookup first
2129 # compression engines." That is why we have the inline lookup first
2130 # followed by the compengines lookup.
2130 # followed by the compengines lookup.
2131 #
2131 #
2132 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2132 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2133 # compressed chunks. And this matters for changelog and manifest reads.
2133 # compressed chunks. And this matters for changelog and manifest reads.
2134 t = data[0:1]
2134 t = data[0:1]
2135
2135
2136 if t == b'x':
2136 if t == b'x':
2137 try:
2137 try:
2138 return _zlibdecompress(data)
2138 return _zlibdecompress(data)
2139 except zlib.error as e:
2139 except zlib.error as e:
2140 raise error.RevlogError(
2140 raise error.RevlogError(
2141 _(b'revlog decompress error: %s')
2141 _(b'revlog decompress error: %s')
2142 % stringutil.forcebytestr(e)
2142 % stringutil.forcebytestr(e)
2143 )
2143 )
2144 # '\0' is more common than 'u' so it goes first.
2144 # '\0' is more common than 'u' so it goes first.
2145 elif t == b'\0':
2145 elif t == b'\0':
2146 return data
2146 return data
2147 elif t == b'u':
2147 elif t == b'u':
2148 return util.buffer(data, 1)
2148 return util.buffer(data, 1)
2149
2149
2150 try:
2150 try:
2151 compressor = self._decompressors[t]
2151 compressor = self._decompressors[t]
2152 except KeyError:
2152 except KeyError:
2153 try:
2153 try:
2154 engine = util.compengines.forrevlogheader(t)
2154 engine = util.compengines.forrevlogheader(t)
2155 compressor = engine.revlogcompressor(self._compengineopts)
2155 compressor = engine.revlogcompressor(self._compengineopts)
2156 self._decompressors[t] = compressor
2156 self._decompressors[t] = compressor
2157 except KeyError:
2157 except KeyError:
2158 raise error.RevlogError(_(b'unknown compression type %r') % t)
2158 raise error.RevlogError(_(b'unknown compression type %r') % t)
2159
2159
2160 return compressor.decompress(data)
2160 return compressor.decompress(data)
2161
2161
2162 def _addrevision(
2162 def _addrevision(
2163 self,
2163 self,
2164 node,
2164 node,
2165 rawtext,
2165 rawtext,
2166 transaction,
2166 transaction,
2167 link,
2167 link,
2168 p1,
2168 p1,
2169 p2,
2169 p2,
2170 flags,
2170 flags,
2171 cachedelta,
2171 cachedelta,
2172 ifh,
2172 ifh,
2173 dfh,
2173 dfh,
2174 alwayscache=False,
2174 alwayscache=False,
2175 deltacomputer=None,
2175 deltacomputer=None,
2176 ):
2176 ):
2177 """internal function to add revisions to the log
2177 """internal function to add revisions to the log
2178
2178
2179 see addrevision for argument descriptions.
2179 see addrevision for argument descriptions.
2180
2180
2181 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2181 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2182
2182
2183 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2183 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2184 be used.
2184 be used.
2185
2185
2186 invariants:
2186 invariants:
2187 - rawtext is optional (can be None); if not set, cachedelta must be set.
2187 - rawtext is optional (can be None); if not set, cachedelta must be set.
2188 if both are set, they must correspond to each other.
2188 if both are set, they must correspond to each other.
2189 """
2189 """
2190 if node == nullid:
2190 if node == nullid:
2191 raise error.RevlogError(
2191 raise error.RevlogError(
2192 _(b"%s: attempt to add null revision") % self.indexfile
2192 _(b"%s: attempt to add null revision") % self.indexfile
2193 )
2193 )
2194 if node == wdirid or node in wdirfilenodeids:
2194 if node == wdirid or node in wdirfilenodeids:
2195 raise error.RevlogError(
2195 raise error.RevlogError(
2196 _(b"%s: attempt to add wdir revision") % self.indexfile
2196 _(b"%s: attempt to add wdir revision") % self.indexfile
2197 )
2197 )
2198
2198
2199 if self._inline:
2199 if self._inline:
2200 fh = ifh
2200 fh = ifh
2201 else:
2201 else:
2202 fh = dfh
2202 fh = dfh
2203
2203
2204 btext = [rawtext]
2204 btext = [rawtext]
2205
2205
2206 curr = len(self)
2206 curr = len(self)
2207 prev = curr - 1
2207 prev = curr - 1
2208 offset = self.end(prev)
2208 offset = self.end(prev)
2209 p1r, p2r = self.rev(p1), self.rev(p2)
2209 p1r, p2r = self.rev(p1), self.rev(p2)
2210
2210
2211 # full versions are inserted when the needed deltas
2211 # full versions are inserted when the needed deltas
2212 # become comparable to the uncompressed text
2212 # become comparable to the uncompressed text
2213 if rawtext is None:
2213 if rawtext is None:
2214 # need rawtext size, before changed by flag processors, which is
2214 # need rawtext size, before changed by flag processors, which is
2215 # the non-raw size. use revlog explicitly to avoid filelog's extra
2215 # the non-raw size. use revlog explicitly to avoid filelog's extra
2216 # logic that might remove metadata size.
2216 # logic that might remove metadata size.
2217 textlen = mdiff.patchedsize(
2217 textlen = mdiff.patchedsize(
2218 revlog.size(self, cachedelta[0]), cachedelta[1]
2218 revlog.size(self, cachedelta[0]), cachedelta[1]
2219 )
2219 )
2220 else:
2220 else:
2221 textlen = len(rawtext)
2221 textlen = len(rawtext)
2222
2222
2223 if deltacomputer is None:
2223 if deltacomputer is None:
2224 deltacomputer = deltautil.deltacomputer(self)
2224 deltacomputer = deltautil.deltacomputer(self)
2225
2225
2226 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2226 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2227
2227
2228 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2228 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2229
2229
2230 e = (
2230 e = (
2231 offset_type(offset, flags),
2231 offset_type(offset, flags),
2232 deltainfo.deltalen,
2232 deltainfo.deltalen,
2233 textlen,
2233 textlen,
2234 deltainfo.base,
2234 deltainfo.base,
2235 link,
2235 link,
2236 p1r,
2236 p1r,
2237 p2r,
2237 p2r,
2238 node,
2238 node,
2239 )
2239 )
2240 self.index.append(e)
2240 self.index.append(e)
2241
2241
2242 entry = self._io.packentry(e, self.node, self.version, curr)
2242 entry = self._io.packentry(e, self.node, self.version, curr)
2243 self._writeentry(
2243 self._writeentry(
2244 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2244 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2245 )
2245 )
2246
2246
2247 rawtext = btext[0]
2247 rawtext = btext[0]
2248
2248
2249 if alwayscache and rawtext is None:
2249 if alwayscache and rawtext is None:
2250 rawtext = deltacomputer.buildtext(revinfo, fh)
2250 rawtext = deltacomputer.buildtext(revinfo, fh)
2251
2251
2252 if type(rawtext) == bytes: # only accept immutable objects
2252 if type(rawtext) == bytes: # only accept immutable objects
2253 self._revisioncache = (node, curr, rawtext)
2253 self._revisioncache = (node, curr, rawtext)
2254 self._chainbasecache[curr] = deltainfo.chainbase
2254 self._chainbasecache[curr] = deltainfo.chainbase
2255 return node
2255 return node
2256
2256
2257 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2257 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2258 # Files opened in a+ mode have inconsistent behavior on various
2258 # Files opened in a+ mode have inconsistent behavior on various
2259 # platforms. Windows requires that a file positioning call be made
2259 # platforms. Windows requires that a file positioning call be made
2260 # when the file handle transitions between reads and writes. See
2260 # when the file handle transitions between reads and writes. See
2261 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2261 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2262 # platforms, Python or the platform itself can be buggy. Some versions
2262 # platforms, Python or the platform itself can be buggy. Some versions
2263 # of Solaris have been observed to not append at the end of the file
2263 # of Solaris have been observed to not append at the end of the file
2264 # if the file was seeked to before the end. See issue4943 for more.
2264 # if the file was seeked to before the end. See issue4943 for more.
2265 #
2265 #
2266 # We work around this issue by inserting a seek() before writing.
2266 # We work around this issue by inserting a seek() before writing.
2267 # Note: This is likely not necessary on Python 3. However, because
2267 # Note: This is likely not necessary on Python 3. However, because
2268 # the file handle is reused for reads and may be seeked there, we need
2268 # the file handle is reused for reads and may be seeked there, we need
2269 # to be careful before changing this.
2269 # to be careful before changing this.
2270 ifh.seek(0, os.SEEK_END)
2270 ifh.seek(0, os.SEEK_END)
2271 if dfh:
2271 if dfh:
2272 dfh.seek(0, os.SEEK_END)
2272 dfh.seek(0, os.SEEK_END)
2273
2273
2274 curr = len(self) - 1
2274 curr = len(self) - 1
2275 if not self._inline:
2275 if not self._inline:
2276 transaction.add(self.datafile, offset)
2276 transaction.add(self.datafile, offset)
2277 transaction.add(self.indexfile, curr * len(entry))
2277 transaction.add(self.indexfile, curr * len(entry))
2278 if data[0]:
2278 if data[0]:
2279 dfh.write(data[0])
2279 dfh.write(data[0])
2280 dfh.write(data[1])
2280 dfh.write(data[1])
2281 ifh.write(entry)
2281 ifh.write(entry)
2282 else:
2282 else:
2283 offset += curr * self._io.size
2283 offset += curr * self._io.size
2284 transaction.add(self.indexfile, offset, curr)
2284 transaction.add(self.indexfile, offset, curr)
2285 ifh.write(entry)
2285 ifh.write(entry)
2286 ifh.write(data[0])
2286 ifh.write(data[0])
2287 ifh.write(data[1])
2287 ifh.write(data[1])
2288 self._enforceinlinesize(transaction, ifh)
2288 self._enforceinlinesize(transaction, ifh)
2289
2289
2290 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2290 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2291 """
2291 """
2292 add a delta group
2292 add a delta group
2293
2293
2294 given a set of deltas, add them to the revision log. the
2294 given a set of deltas, add them to the revision log. the
2295 first delta is against its parent, which should be in our
2295 first delta is against its parent, which should be in our
2296 log, the rest are against the previous delta.
2296 log, the rest are against the previous delta.
2297
2297
2298 If ``addrevisioncb`` is defined, it will be called with arguments of
2298 If ``addrevisioncb`` is defined, it will be called with arguments of
2299 this revlog and the node that was added.
2299 this revlog and the node that was added.
2300 """
2300 """
2301
2301
2302 if self._writinghandles:
2302 if self._writinghandles:
2303 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2303 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2304
2304
2305 nodes = []
2305 nodes = []
2306
2306
2307 r = len(self)
2307 r = len(self)
2308 end = 0
2308 end = 0
2309 if r:
2309 if r:
2310 end = self.end(r - 1)
2310 end = self.end(r - 1)
2311 ifh = self._indexfp(b"a+")
2311 ifh = self._indexfp(b"a+")
2312 isize = r * self._io.size
2312 isize = r * self._io.size
2313 if self._inline:
2313 if self._inline:
2314 transaction.add(self.indexfile, end + isize, r)
2314 transaction.add(self.indexfile, end + isize, r)
2315 dfh = None
2315 dfh = None
2316 else:
2316 else:
2317 transaction.add(self.indexfile, isize, r)
2317 transaction.add(self.indexfile, isize, r)
2318 transaction.add(self.datafile, end)
2318 transaction.add(self.datafile, end)
2319 dfh = self._datafp(b"a+")
2319 dfh = self._datafp(b"a+")
2320
2320
2321 def flush():
2321 def flush():
2322 if dfh:
2322 if dfh:
2323 dfh.flush()
2323 dfh.flush()
2324 ifh.flush()
2324 ifh.flush()
2325
2325
2326 self._writinghandles = (ifh, dfh)
2326 self._writinghandles = (ifh, dfh)
2327
2327
2328 try:
2328 try:
2329 deltacomputer = deltautil.deltacomputer(self)
2329 deltacomputer = deltautil.deltacomputer(self)
2330 # loop through our set of deltas
2330 # loop through our set of deltas
2331 for data in deltas:
2331 for data in deltas:
2332 node, p1, p2, linknode, deltabase, delta, flags = data
2332 node, p1, p2, linknode, deltabase, delta, flags = data
2333 link = linkmapper(linknode)
2333 link = linkmapper(linknode)
2334 flags = flags or REVIDX_DEFAULT_FLAGS
2334 flags = flags or REVIDX_DEFAULT_FLAGS
2335
2335
2336 nodes.append(node)
2336 nodes.append(node)
2337
2337
2338 if self.index.has_node(node):
2338 if self.index.has_node(node):
2339 self._nodeduplicatecallback(transaction, node)
2339 self._nodeduplicatecallback(transaction, node)
2340 # this can happen if two branches make the same change
2340 # this can happen if two branches make the same change
2341 continue
2341 continue
2342
2342
2343 for p in (p1, p2):
2343 for p in (p1, p2):
2344 if not self.index.has_node(p):
2344 if not self.index.has_node(p):
2345 raise error.LookupError(
2345 raise error.LookupError(
2346 p, self.indexfile, _(b'unknown parent')
2346 p, self.indexfile, _(b'unknown parent')
2347 )
2347 )
2348
2348
2349 if not self.index.has_node(deltabase):
2349 if not self.index.has_node(deltabase):
2350 raise error.LookupError(
2350 raise error.LookupError(
2351 deltabase, self.indexfile, _(b'unknown delta base')
2351 deltabase, self.indexfile, _(b'unknown delta base')
2352 )
2352 )
2353
2353
2354 baserev = self.rev(deltabase)
2354 baserev = self.rev(deltabase)
2355
2355
2356 if baserev != nullrev and self.iscensored(baserev):
2356 if baserev != nullrev and self.iscensored(baserev):
2357 # if base is censored, delta must be full replacement in a
2357 # if base is censored, delta must be full replacement in a
2358 # single patch operation
2358 # single patch operation
2359 hlen = struct.calcsize(b">lll")
2359 hlen = struct.calcsize(b">lll")
2360 oldlen = self.rawsize(baserev)
2360 oldlen = self.rawsize(baserev)
2361 newlen = len(delta) - hlen
2361 newlen = len(delta) - hlen
2362 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2362 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2363 raise error.CensoredBaseError(
2363 raise error.CensoredBaseError(
2364 self.indexfile, self.node(baserev)
2364 self.indexfile, self.node(baserev)
2365 )
2365 )
2366
2366
2367 if not flags and self._peek_iscensored(baserev, delta, flush):
2367 if not flags and self._peek_iscensored(baserev, delta, flush):
2368 flags |= REVIDX_ISCENSORED
2368 flags |= REVIDX_ISCENSORED
2369
2369
2370 # We assume consumers of addrevisioncb will want to retrieve
2370 # We assume consumers of addrevisioncb will want to retrieve
2371 # the added revision, which will require a call to
2371 # the added revision, which will require a call to
2372 # revision(). revision() will fast path if there is a cache
2372 # revision(). revision() will fast path if there is a cache
2373 # hit. So, we tell _addrevision() to always cache in this case.
2373 # hit. So, we tell _addrevision() to always cache in this case.
2374 # We're only using addgroup() in the context of changegroup
2374 # We're only using addgroup() in the context of changegroup
2375 # generation so the revision data can always be handled as raw
2375 # generation so the revision data can always be handled as raw
2376 # by the flagprocessor.
2376 # by the flagprocessor.
2377 self._addrevision(
2377 self._addrevision(
2378 node,
2378 node,
2379 None,
2379 None,
2380 transaction,
2380 transaction,
2381 link,
2381 link,
2382 p1,
2382 p1,
2383 p2,
2383 p2,
2384 flags,
2384 flags,
2385 (baserev, delta),
2385 (baserev, delta),
2386 ifh,
2386 ifh,
2387 dfh,
2387 dfh,
2388 alwayscache=bool(addrevisioncb),
2388 alwayscache=bool(addrevisioncb),
2389 deltacomputer=deltacomputer,
2389 deltacomputer=deltacomputer,
2390 )
2390 )
2391
2391
2392 if addrevisioncb:
2392 if addrevisioncb:
2393 addrevisioncb(self, node)
2393 addrevisioncb(self, node)
2394
2394
2395 if not dfh and not self._inline:
2395 if not dfh and not self._inline:
2396 # addrevision switched from inline to conventional
2396 # addrevision switched from inline to conventional
2397 # reopen the index
2397 # reopen the index
2398 ifh.close()
2398 ifh.close()
2399 dfh = self._datafp(b"a+")
2399 dfh = self._datafp(b"a+")
2400 ifh = self._indexfp(b"a+")
2400 ifh = self._indexfp(b"a+")
2401 self._writinghandles = (ifh, dfh)
2401 self._writinghandles = (ifh, dfh)
2402 finally:
2402 finally:
2403 self._writinghandles = None
2403 self._writinghandles = None
2404
2404
2405 if dfh:
2405 if dfh:
2406 dfh.close()
2406 dfh.close()
2407 ifh.close()
2407 ifh.close()
2408
2408
2409 return nodes
2409 return nodes
2410
2410
2411 def iscensored(self, rev):
2411 def iscensored(self, rev):
2412 """Check if a file revision is censored."""
2412 """Check if a file revision is censored."""
2413 if not self._censorable:
2413 if not self._censorable:
2414 return False
2414 return False
2415
2415
2416 return self.flags(rev) & REVIDX_ISCENSORED
2416 return self.flags(rev) & REVIDX_ISCENSORED
2417
2417
2418 def _peek_iscensored(self, baserev, delta, flush):
2418 def _peek_iscensored(self, baserev, delta, flush):
2419 """Quickly check if a delta produces a censored revision."""
2419 """Quickly check if a delta produces a censored revision."""
2420 if not self._censorable:
2420 if not self._censorable:
2421 return False
2421 return False
2422
2422
2423 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2423 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2424
2424
2425 def getstrippoint(self, minlink):
2425 def getstrippoint(self, minlink):
2426 """find the minimum rev that must be stripped to strip the linkrev
2426 """find the minimum rev that must be stripped to strip the linkrev
2427
2427
2428 Returns a tuple containing the minimum rev and a set of all revs that
2428 Returns a tuple containing the minimum rev and a set of all revs that
2429 have linkrevs that will be broken by this strip.
2429 have linkrevs that will be broken by this strip.
2430 """
2430 """
2431 return storageutil.resolvestripinfo(
2431 return storageutil.resolvestripinfo(
2432 minlink,
2432 minlink,
2433 len(self) - 1,
2433 len(self) - 1,
2434 self.headrevs(),
2434 self.headrevs(),
2435 self.linkrev,
2435 self.linkrev,
2436 self.parentrevs,
2436 self.parentrevs,
2437 )
2437 )
2438
2438
2439 def strip(self, minlink, transaction):
2439 def strip(self, minlink, transaction):
2440 """truncate the revlog on the first revision with a linkrev >= minlink
2440 """truncate the revlog on the first revision with a linkrev >= minlink
2441
2441
2442 This function is called when we're stripping revision minlink and
2442 This function is called when we're stripping revision minlink and
2443 its descendants from the repository.
2443 its descendants from the repository.
2444
2444
2445 We have to remove all revisions with linkrev >= minlink, because
2445 We have to remove all revisions with linkrev >= minlink, because
2446 the equivalent changelog revisions will be renumbered after the
2446 the equivalent changelog revisions will be renumbered after the
2447 strip.
2447 strip.
2448
2448
2449 So we truncate the revlog on the first of these revisions, and
2449 So we truncate the revlog on the first of these revisions, and
2450 trust that the caller has saved the revisions that shouldn't be
2450 trust that the caller has saved the revisions that shouldn't be
2451 removed and that it'll re-add them after this truncation.
2451 removed and that it'll re-add them after this truncation.
2452 """
2452 """
2453 if len(self) == 0:
2453 if len(self) == 0:
2454 return
2454 return
2455
2455
2456 rev, _ = self.getstrippoint(minlink)
2456 rev, _ = self.getstrippoint(minlink)
2457 if rev == len(self):
2457 if rev == len(self):
2458 return
2458 return
2459
2459
2460 # first truncate the files on disk
2460 # first truncate the files on disk
2461 end = self.start(rev)
2461 end = self.start(rev)
2462 if not self._inline:
2462 if not self._inline:
2463 transaction.add(self.datafile, end)
2463 transaction.add(self.datafile, end)
2464 end = rev * self._io.size
2464 end = rev * self._io.size
2465 else:
2465 else:
2466 end += rev * self._io.size
2466 end += rev * self._io.size
2467
2467
2468 transaction.add(self.indexfile, end)
2468 transaction.add(self.indexfile, end)
2469
2469
2470 # then reset internal state in memory to forget those revisions
2470 # then reset internal state in memory to forget those revisions
2471 self._revisioncache = None
2471 self._revisioncache = None
2472 self._chaininfocache = {}
2472 self._chaininfocache = {}
2473 self._chunkclear()
2473 self._chunkclear()
2474
2474
2475 del self.index[rev:-1]
2475 del self.index[rev:-1]
2476
2476
2477 def checksize(self):
2477 def checksize(self):
2478 """Check size of index and data files
2478 """Check size of index and data files
2479
2479
2480 return a (dd, di) tuple.
2480 return a (dd, di) tuple.
2481 - dd: extra bytes for the "data" file
2481 - dd: extra bytes for the "data" file
2482 - di: extra bytes for the "index" file
2482 - di: extra bytes for the "index" file
2483
2483
2484 A healthy revlog will return (0, 0).
2484 A healthy revlog will return (0, 0).
2485 """
2485 """
2486 expected = 0
2486 expected = 0
2487 if len(self):
2487 if len(self):
2488 expected = max(0, self.end(len(self) - 1))
2488 expected = max(0, self.end(len(self) - 1))
2489
2489
2490 try:
2490 try:
2491 with self._datafp() as f:
2491 with self._datafp() as f:
2492 f.seek(0, io.SEEK_END)
2492 f.seek(0, io.SEEK_END)
2493 actual = f.tell()
2493 actual = f.tell()
2494 dd = actual - expected
2494 dd = actual - expected
2495 except IOError as inst:
2495 except IOError as inst:
2496 if inst.errno != errno.ENOENT:
2496 if inst.errno != errno.ENOENT:
2497 raise
2497 raise
2498 dd = 0
2498 dd = 0
2499
2499
2500 try:
2500 try:
2501 f = self.opener(self.indexfile)
2501 f = self.opener(self.indexfile)
2502 f.seek(0, io.SEEK_END)
2502 f.seek(0, io.SEEK_END)
2503 actual = f.tell()
2503 actual = f.tell()
2504 f.close()
2504 f.close()
2505 s = self._io.size
2505 s = self._io.size
2506 i = max(0, actual // s)
2506 i = max(0, actual // s)
2507 di = actual - (i * s)
2507 di = actual - (i * s)
2508 if self._inline:
2508 if self._inline:
2509 databytes = 0
2509 databytes = 0
2510 for r in self:
2510 for r in self:
2511 databytes += max(0, self.length(r))
2511 databytes += max(0, self.length(r))
2512 dd = 0
2512 dd = 0
2513 di = actual - len(self) * s - databytes
2513 di = actual - len(self) * s - databytes
2514 except IOError as inst:
2514 except IOError as inst:
2515 if inst.errno != errno.ENOENT:
2515 if inst.errno != errno.ENOENT:
2516 raise
2516 raise
2517 di = 0
2517 di = 0
2518
2518
2519 return (dd, di)
2519 return (dd, di)
2520
2520
2521 def files(self):
2521 def files(self):
2522 res = [self.indexfile]
2522 res = [self.indexfile]
2523 if not self._inline:
2523 if not self._inline:
2524 res.append(self.datafile)
2524 res.append(self.datafile)
2525 return res
2525 return res
2526
2526
2527 def emitrevisions(
2527 def emitrevisions(
2528 self,
2528 self,
2529 nodes,
2529 nodes,
2530 nodesorder=None,
2530 nodesorder=None,
2531 revisiondata=False,
2531 revisiondata=False,
2532 assumehaveparentrevisions=False,
2532 assumehaveparentrevisions=False,
2533 deltamode=repository.CG_DELTAMODE_STD,
2533 deltamode=repository.CG_DELTAMODE_STD,
2534 ):
2534 ):
2535 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2535 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2536 raise error.ProgrammingError(
2536 raise error.ProgrammingError(
2537 b'unhandled value for nodesorder: %s' % nodesorder
2537 b'unhandled value for nodesorder: %s' % nodesorder
2538 )
2538 )
2539
2539
2540 if nodesorder is None and not self._generaldelta:
2540 if nodesorder is None and not self._generaldelta:
2541 nodesorder = b'storage'
2541 nodesorder = b'storage'
2542
2542
2543 if (
2543 if (
2544 not self._storedeltachains
2544 not self._storedeltachains
2545 and deltamode != repository.CG_DELTAMODE_PREV
2545 and deltamode != repository.CG_DELTAMODE_PREV
2546 ):
2546 ):
2547 deltamode = repository.CG_DELTAMODE_FULL
2547 deltamode = repository.CG_DELTAMODE_FULL
2548
2548
2549 return storageutil.emitrevisions(
2549 return storageutil.emitrevisions(
2550 self,
2550 self,
2551 nodes,
2551 nodes,
2552 nodesorder,
2552 nodesorder,
2553 revlogrevisiondelta,
2553 revlogrevisiondelta,
2554 deltaparentfn=self.deltaparent,
2554 deltaparentfn=self.deltaparent,
2555 candeltafn=self.candelta,
2555 candeltafn=self.candelta,
2556 rawsizefn=self.rawsize,
2556 rawsizefn=self.rawsize,
2557 revdifffn=self.revdiff,
2557 revdifffn=self.revdiff,
2558 flagsfn=self.flags,
2558 flagsfn=self.flags,
2559 deltamode=deltamode,
2559 deltamode=deltamode,
2560 revisiondata=revisiondata,
2560 revisiondata=revisiondata,
2561 assumehaveparentrevisions=assumehaveparentrevisions,
2561 assumehaveparentrevisions=assumehaveparentrevisions,
2562 )
2562 )
2563
2563
2564 DELTAREUSEALWAYS = b'always'
2564 DELTAREUSEALWAYS = b'always'
2565 DELTAREUSESAMEREVS = b'samerevs'
2565 DELTAREUSESAMEREVS = b'samerevs'
2566 DELTAREUSENEVER = b'never'
2566 DELTAREUSENEVER = b'never'
2567
2567
2568 DELTAREUSEFULLADD = b'fulladd'
2568 DELTAREUSEFULLADD = b'fulladd'
2569
2569
2570 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2570 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2571
2571
2572 def clone(
2572 def clone(
2573 self,
2573 self,
2574 tr,
2574 tr,
2575 destrevlog,
2575 destrevlog,
2576 addrevisioncb=None,
2576 addrevisioncb=None,
2577 deltareuse=DELTAREUSESAMEREVS,
2577 deltareuse=DELTAREUSESAMEREVS,
2578 forcedeltabothparents=None,
2578 forcedeltabothparents=None,
2579 sidedatacompanion=None,
2579 sidedatacompanion=None,
2580 ):
2580 ):
2581 """Copy this revlog to another, possibly with format changes.
2581 """Copy this revlog to another, possibly with format changes.
2582
2582
2583 The destination revlog will contain the same revisions and nodes.
2583 The destination revlog will contain the same revisions and nodes.
2584 However, it may not be bit-for-bit identical due to e.g. delta encoding
2584 However, it may not be bit-for-bit identical due to e.g. delta encoding
2585 differences.
2585 differences.
2586
2586
2587 The ``deltareuse`` argument control how deltas from the existing revlog
2587 The ``deltareuse`` argument control how deltas from the existing revlog
2588 are preserved in the destination revlog. The argument can have the
2588 are preserved in the destination revlog. The argument can have the
2589 following values:
2589 following values:
2590
2590
2591 DELTAREUSEALWAYS
2591 DELTAREUSEALWAYS
2592 Deltas will always be reused (if possible), even if the destination
2592 Deltas will always be reused (if possible), even if the destination
2593 revlog would not select the same revisions for the delta. This is the
2593 revlog would not select the same revisions for the delta. This is the
2594 fastest mode of operation.
2594 fastest mode of operation.
2595 DELTAREUSESAMEREVS
2595 DELTAREUSESAMEREVS
2596 Deltas will be reused if the destination revlog would pick the same
2596 Deltas will be reused if the destination revlog would pick the same
2597 revisions for the delta. This mode strikes a balance between speed
2597 revisions for the delta. This mode strikes a balance between speed
2598 and optimization.
2598 and optimization.
2599 DELTAREUSENEVER
2599 DELTAREUSENEVER
2600 Deltas will never be reused. This is the slowest mode of execution.
2600 Deltas will never be reused. This is the slowest mode of execution.
2601 This mode can be used to recompute deltas (e.g. if the diff/delta
2601 This mode can be used to recompute deltas (e.g. if the diff/delta
2602 algorithm changes).
2602 algorithm changes).
2603 DELTAREUSEFULLADD
2603 DELTAREUSEFULLADD
2604 Revision will be re-added as if their were new content. This is
2604 Revision will be re-added as if their were new content. This is
2605 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2605 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2606 eg: large file detection and handling.
2606 eg: large file detection and handling.
2607
2607
2608 Delta computation can be slow, so the choice of delta reuse policy can
2608 Delta computation can be slow, so the choice of delta reuse policy can
2609 significantly affect run time.
2609 significantly affect run time.
2610
2610
2611 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2611 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2612 two extremes. Deltas will be reused if they are appropriate. But if the
2612 two extremes. Deltas will be reused if they are appropriate. But if the
2613 delta could choose a better revision, it will do so. This means if you
2613 delta could choose a better revision, it will do so. This means if you
2614 are converting a non-generaldelta revlog to a generaldelta revlog,
2614 are converting a non-generaldelta revlog to a generaldelta revlog,
2615 deltas will be recomputed if the delta's parent isn't a parent of the
2615 deltas will be recomputed if the delta's parent isn't a parent of the
2616 revision.
2616 revision.
2617
2617
2618 In addition to the delta policy, the ``forcedeltabothparents``
2618 In addition to the delta policy, the ``forcedeltabothparents``
2619 argument controls whether to force compute deltas against both parents
2619 argument controls whether to force compute deltas against both parents
2620 for merges. By default, the current default is used.
2620 for merges. By default, the current default is used.
2621
2621
2622 If not None, the `sidedatacompanion` is callable that accept two
2622 If not None, the `sidedatacompanion` is callable that accept two
2623 arguments:
2623 arguments:
2624
2624
2625 (srcrevlog, rev)
2625 (srcrevlog, rev)
2626
2626
2627 and return a triplet that control changes to sidedata content from the
2627 and return a triplet that control changes to sidedata content from the
2628 old revision to the new clone result:
2628 old revision to the new clone result:
2629
2629
2630 (dropall, filterout, update)
2630 (dropall, filterout, update)
2631
2631
2632 * if `dropall` is True, all sidedata should be dropped
2632 * if `dropall` is True, all sidedata should be dropped
2633 * `filterout` is a set of sidedata keys that should be dropped
2633 * `filterout` is a set of sidedata keys that should be dropped
2634 * `update` is a mapping of additionnal/new key -> value
2634 * `update` is a mapping of additionnal/new key -> value
2635 """
2635 """
2636 if deltareuse not in self.DELTAREUSEALL:
2636 if deltareuse not in self.DELTAREUSEALL:
2637 raise ValueError(
2637 raise ValueError(
2638 _(b'value for deltareuse invalid: %s') % deltareuse
2638 _(b'value for deltareuse invalid: %s') % deltareuse
2639 )
2639 )
2640
2640
2641 if len(destrevlog):
2641 if len(destrevlog):
2642 raise ValueError(_(b'destination revlog is not empty'))
2642 raise ValueError(_(b'destination revlog is not empty'))
2643
2643
2644 if getattr(self, 'filteredrevs', None):
2644 if getattr(self, 'filteredrevs', None):
2645 raise ValueError(_(b'source revlog has filtered revisions'))
2645 raise ValueError(_(b'source revlog has filtered revisions'))
2646 if getattr(destrevlog, 'filteredrevs', None):
2646 if getattr(destrevlog, 'filteredrevs', None):
2647 raise ValueError(_(b'destination revlog has filtered revisions'))
2647 raise ValueError(_(b'destination revlog has filtered revisions'))
2648
2648
2649 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2649 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2650 # if possible.
2650 # if possible.
2651 oldlazydelta = destrevlog._lazydelta
2651 oldlazydelta = destrevlog._lazydelta
2652 oldlazydeltabase = destrevlog._lazydeltabase
2652 oldlazydeltabase = destrevlog._lazydeltabase
2653 oldamd = destrevlog._deltabothparents
2653 oldamd = destrevlog._deltabothparents
2654
2654
2655 try:
2655 try:
2656 if deltareuse == self.DELTAREUSEALWAYS:
2656 if deltareuse == self.DELTAREUSEALWAYS:
2657 destrevlog._lazydeltabase = True
2657 destrevlog._lazydeltabase = True
2658 destrevlog._lazydelta = True
2658 destrevlog._lazydelta = True
2659 elif deltareuse == self.DELTAREUSESAMEREVS:
2659 elif deltareuse == self.DELTAREUSESAMEREVS:
2660 destrevlog._lazydeltabase = False
2660 destrevlog._lazydeltabase = False
2661 destrevlog._lazydelta = True
2661 destrevlog._lazydelta = True
2662 elif deltareuse == self.DELTAREUSENEVER:
2662 elif deltareuse == self.DELTAREUSENEVER:
2663 destrevlog._lazydeltabase = False
2663 destrevlog._lazydeltabase = False
2664 destrevlog._lazydelta = False
2664 destrevlog._lazydelta = False
2665
2665
2666 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2666 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2667
2667
2668 self._clone(
2668 self._clone(
2669 tr,
2669 tr,
2670 destrevlog,
2670 destrevlog,
2671 addrevisioncb,
2671 addrevisioncb,
2672 deltareuse,
2672 deltareuse,
2673 forcedeltabothparents,
2673 forcedeltabothparents,
2674 sidedatacompanion,
2674 sidedatacompanion,
2675 )
2675 )
2676
2676
2677 finally:
2677 finally:
2678 destrevlog._lazydelta = oldlazydelta
2678 destrevlog._lazydelta = oldlazydelta
2679 destrevlog._lazydeltabase = oldlazydeltabase
2679 destrevlog._lazydeltabase = oldlazydeltabase
2680 destrevlog._deltabothparents = oldamd
2680 destrevlog._deltabothparents = oldamd
2681
2681
2682 def _clone(
2682 def _clone(
2683 self,
2683 self,
2684 tr,
2684 tr,
2685 destrevlog,
2685 destrevlog,
2686 addrevisioncb,
2686 addrevisioncb,
2687 deltareuse,
2687 deltareuse,
2688 forcedeltabothparents,
2688 forcedeltabothparents,
2689 sidedatacompanion,
2689 sidedatacompanion,
2690 ):
2690 ):
2691 """perform the core duty of `revlog.clone` after parameter processing"""
2691 """perform the core duty of `revlog.clone` after parameter processing"""
2692 deltacomputer = deltautil.deltacomputer(destrevlog)
2692 deltacomputer = deltautil.deltacomputer(destrevlog)
2693 index = self.index
2693 index = self.index
2694 for rev in self:
2694 for rev in self:
2695 entry = index[rev]
2695 entry = index[rev]
2696
2696
2697 # Some classes override linkrev to take filtered revs into
2697 # Some classes override linkrev to take filtered revs into
2698 # account. Use raw entry from index.
2698 # account. Use raw entry from index.
2699 flags = entry[0] & 0xFFFF
2699 flags = entry[0] & 0xFFFF
2700 linkrev = entry[4]
2700 linkrev = entry[4]
2701 p1 = index[entry[5]][7]
2701 p1 = index[entry[5]][7]
2702 p2 = index[entry[6]][7]
2702 p2 = index[entry[6]][7]
2703 node = entry[7]
2703 node = entry[7]
2704
2704
2705 sidedataactions = (False, [], {})
2705 sidedataactions = (False, [], {})
2706 if sidedatacompanion is not None:
2706 if sidedatacompanion is not None:
2707 sidedataactions = sidedatacompanion(self, rev)
2707 sidedataactions = sidedatacompanion(self, rev)
2708
2708
2709 # (Possibly) reuse the delta from the revlog if allowed and
2709 # (Possibly) reuse the delta from the revlog if allowed and
2710 # the revlog chunk is a delta.
2710 # the revlog chunk is a delta.
2711 cachedelta = None
2711 cachedelta = None
2712 rawtext = None
2712 rawtext = None
2713 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2713 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2714 dropall, filterout, update = sidedataactions
2714 dropall, filterout, update = sidedataactions
2715 text, sidedata = self._revisiondata(rev)
2715 text, sidedata = self._revisiondata(rev)
2716 if dropall:
2716 if dropall:
2717 sidedata = {}
2717 sidedata = {}
2718 for key in filterout:
2718 for key in filterout:
2719 sidedata.pop(key, None)
2719 sidedata.pop(key, None)
2720 sidedata.update(update)
2720 sidedata.update(update)
2721 if not sidedata:
2721 if not sidedata:
2722 sidedata = None
2722 sidedata = None
2723 destrevlog.addrevision(
2723 destrevlog.addrevision(
2724 text,
2724 text,
2725 tr,
2725 tr,
2726 linkrev,
2726 linkrev,
2727 p1,
2727 p1,
2728 p2,
2728 p2,
2729 cachedelta=cachedelta,
2729 cachedelta=cachedelta,
2730 node=node,
2730 node=node,
2731 flags=flags,
2731 flags=flags,
2732 deltacomputer=deltacomputer,
2732 deltacomputer=deltacomputer,
2733 sidedata=sidedata,
2733 sidedata=sidedata,
2734 )
2734 )
2735 else:
2735 else:
2736 if destrevlog._lazydelta:
2736 if destrevlog._lazydelta:
2737 dp = self.deltaparent(rev)
2737 dp = self.deltaparent(rev)
2738 if dp != nullrev:
2738 if dp != nullrev:
2739 cachedelta = (dp, bytes(self._chunk(rev)))
2739 cachedelta = (dp, bytes(self._chunk(rev)))
2740
2740
2741 if not cachedelta:
2741 if not cachedelta:
2742 rawtext = self.rawdata(rev)
2742 rawtext = self.rawdata(rev)
2743
2743
2744 ifh = destrevlog.opener(
2744 ifh = destrevlog.opener(
2745 destrevlog.indexfile, b'a+', checkambig=False
2745 destrevlog.indexfile, b'a+', checkambig=False
2746 )
2746 )
2747 dfh = None
2747 dfh = None
2748 if not destrevlog._inline:
2748 if not destrevlog._inline:
2749 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2749 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2750 try:
2750 try:
2751 destrevlog._addrevision(
2751 destrevlog._addrevision(
2752 node,
2752 node,
2753 rawtext,
2753 rawtext,
2754 tr,
2754 tr,
2755 linkrev,
2755 linkrev,
2756 p1,
2756 p1,
2757 p2,
2757 p2,
2758 flags,
2758 flags,
2759 cachedelta,
2759 cachedelta,
2760 ifh,
2760 ifh,
2761 dfh,
2761 dfh,
2762 deltacomputer=deltacomputer,
2762 deltacomputer=deltacomputer,
2763 )
2763 )
2764 finally:
2764 finally:
2765 if dfh:
2765 if dfh:
2766 dfh.close()
2766 dfh.close()
2767 ifh.close()
2767 ifh.close()
2768
2768
2769 if addrevisioncb:
2769 if addrevisioncb:
2770 addrevisioncb(self, rev, node)
2770 addrevisioncb(self, rev, node)
2771
2771
2772 def censorrevision(self, tr, censornode, tombstone=b''):
2772 def censorrevision(self, tr, censornode, tombstone=b''):
2773 if (self.version & 0xFFFF) == REVLOGV0:
2773 if (self.version & 0xFFFF) == REVLOGV0:
2774 raise error.RevlogError(
2774 raise error.RevlogError(
2775 _(b'cannot censor with version %d revlogs') % self.version
2775 _(b'cannot censor with version %d revlogs') % self.version
2776 )
2776 )
2777
2777
2778 censorrev = self.rev(censornode)
2778 censorrev = self.rev(censornode)
2779 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2779 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2780
2780
2781 if len(tombstone) > self.rawsize(censorrev):
2781 if len(tombstone) > self.rawsize(censorrev):
2782 raise error.Abort(
2782 raise error.Abort(
2783 _(b'censor tombstone must be no longer than censored data')
2783 _(b'censor tombstone must be no longer than censored data')
2784 )
2784 )
2785
2785
2786 # Rewriting the revlog in place is hard. Our strategy for censoring is
2786 # Rewriting the revlog in place is hard. Our strategy for censoring is
2787 # to create a new revlog, copy all revisions to it, then replace the
2787 # to create a new revlog, copy all revisions to it, then replace the
2788 # revlogs on transaction close.
2788 # revlogs on transaction close.
2789
2789
2790 newindexfile = self.indexfile + b'.tmpcensored'
2790 newindexfile = self.indexfile + b'.tmpcensored'
2791 newdatafile = self.datafile + b'.tmpcensored'
2791 newdatafile = self.datafile + b'.tmpcensored'
2792
2792
2793 # This is a bit dangerous. We could easily have a mismatch of state.
2793 # This is a bit dangerous. We could easily have a mismatch of state.
2794 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2794 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2795 newrl.version = self.version
2795 newrl.version = self.version
2796 newrl._generaldelta = self._generaldelta
2796 newrl._generaldelta = self._generaldelta
2797 newrl._io = self._io
2797 newrl._io = self._io
2798
2798
2799 for rev in self.revs():
2799 for rev in self.revs():
2800 node = self.node(rev)
2800 node = self.node(rev)
2801 p1, p2 = self.parents(node)
2801 p1, p2 = self.parents(node)
2802
2802
2803 if rev == censorrev:
2803 if rev == censorrev:
2804 newrl.addrawrevision(
2804 newrl.addrawrevision(
2805 tombstone,
2805 tombstone,
2806 tr,
2806 tr,
2807 self.linkrev(censorrev),
2807 self.linkrev(censorrev),
2808 p1,
2808 p1,
2809 p2,
2809 p2,
2810 censornode,
2810 censornode,
2811 REVIDX_ISCENSORED,
2811 REVIDX_ISCENSORED,
2812 )
2812 )
2813
2813
2814 if newrl.deltaparent(rev) != nullrev:
2814 if newrl.deltaparent(rev) != nullrev:
2815 raise error.Abort(
2815 raise error.Abort(
2816 _(
2816 _(
2817 b'censored revision stored as delta; '
2817 b'censored revision stored as delta; '
2818 b'cannot censor'
2818 b'cannot censor'
2819 ),
2819 ),
2820 hint=_(
2820 hint=_(
2821 b'censoring of revlogs is not '
2821 b'censoring of revlogs is not '
2822 b'fully implemented; please report '
2822 b'fully implemented; please report '
2823 b'this bug'
2823 b'this bug'
2824 ),
2824 ),
2825 )
2825 )
2826 continue
2826 continue
2827
2827
2828 if self.iscensored(rev):
2828 if self.iscensored(rev):
2829 if self.deltaparent(rev) != nullrev:
2829 if self.deltaparent(rev) != nullrev:
2830 raise error.Abort(
2830 raise error.Abort(
2831 _(
2831 _(
2832 b'cannot censor due to censored '
2832 b'cannot censor due to censored '
2833 b'revision having delta stored'
2833 b'revision having delta stored'
2834 )
2834 )
2835 )
2835 )
2836 rawtext = self._chunk(rev)
2836 rawtext = self._chunk(rev)
2837 else:
2837 else:
2838 rawtext = self.rawdata(rev)
2838 rawtext = self.rawdata(rev)
2839
2839
2840 newrl.addrawrevision(
2840 newrl.addrawrevision(
2841 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2841 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2842 )
2842 )
2843
2843
2844 tr.addbackup(self.indexfile, location=b'store')
2844 tr.addbackup(self.indexfile, location=b'store')
2845 if not self._inline:
2845 if not self._inline:
2846 tr.addbackup(self.datafile, location=b'store')
2846 tr.addbackup(self.datafile, location=b'store')
2847
2847
2848 self.opener.rename(newrl.indexfile, self.indexfile)
2848 self.opener.rename(newrl.indexfile, self.indexfile)
2849 if not self._inline:
2849 if not self._inline:
2850 self.opener.rename(newrl.datafile, self.datafile)
2850 self.opener.rename(newrl.datafile, self.datafile)
2851
2851
2852 self.clearcaches()
2852 self.clearcaches()
2853 self._loadindex()
2853 self._loadindex()
2854
2854
2855 def verifyintegrity(self, state):
2855 def verifyintegrity(self, state):
2856 """Verifies the integrity of the revlog.
2856 """Verifies the integrity of the revlog.
2857
2857
2858 Yields ``revlogproblem`` instances describing problems that are
2858 Yields ``revlogproblem`` instances describing problems that are
2859 found.
2859 found.
2860 """
2860 """
2861 dd, di = self.checksize()
2861 dd, di = self.checksize()
2862 if dd:
2862 if dd:
2863 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2863 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2864 if di:
2864 if di:
2865 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2865 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2866
2866
2867 version = self.version & 0xFFFF
2867 version = self.version & 0xFFFF
2868
2868
2869 # The verifier tells us what version revlog we should be.
2869 # The verifier tells us what version revlog we should be.
2870 if version != state[b'expectedversion']:
2870 if version != state[b'expectedversion']:
2871 yield revlogproblem(
2871 yield revlogproblem(
2872 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2872 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2873 % (self.indexfile, version, state[b'expectedversion'])
2873 % (self.indexfile, version, state[b'expectedversion'])
2874 )
2874 )
2875
2875
2876 state[b'skipread'] = set()
2876 state[b'skipread'] = set()
2877
2877
2878 for rev in self:
2878 for rev in self:
2879 node = self.node(rev)
2879 node = self.node(rev)
2880
2880
2881 # Verify contents. 4 cases to care about:
2881 # Verify contents. 4 cases to care about:
2882 #
2882 #
2883 # common: the most common case
2883 # common: the most common case
2884 # rename: with a rename
2884 # rename: with a rename
2885 # meta: file content starts with b'\1\n', the metadata
2885 # meta: file content starts with b'\1\n', the metadata
2886 # header defined in filelog.py, but without a rename
2886 # header defined in filelog.py, but without a rename
2887 # ext: content stored externally
2887 # ext: content stored externally
2888 #
2888 #
2889 # More formally, their differences are shown below:
2889 # More formally, their differences are shown below:
2890 #
2890 #
2891 # | common | rename | meta | ext
2891 # | common | rename | meta | ext
2892 # -------------------------------------------------------
2892 # -------------------------------------------------------
2893 # flags() | 0 | 0 | 0 | not 0
2893 # flags() | 0 | 0 | 0 | not 0
2894 # renamed() | False | True | False | ?
2894 # renamed() | False | True | False | ?
2895 # rawtext[0:2]=='\1\n'| False | True | True | ?
2895 # rawtext[0:2]=='\1\n'| False | True | True | ?
2896 #
2896 #
2897 # "rawtext" means the raw text stored in revlog data, which
2897 # "rawtext" means the raw text stored in revlog data, which
2898 # could be retrieved by "rawdata(rev)". "text"
2898 # could be retrieved by "rawdata(rev)". "text"
2899 # mentioned below is "revision(rev)".
2899 # mentioned below is "revision(rev)".
2900 #
2900 #
2901 # There are 3 different lengths stored physically:
2901 # There are 3 different lengths stored physically:
2902 # 1. L1: rawsize, stored in revlog index
2902 # 1. L1: rawsize, stored in revlog index
2903 # 2. L2: len(rawtext), stored in revlog data
2903 # 2. L2: len(rawtext), stored in revlog data
2904 # 3. L3: len(text), stored in revlog data if flags==0, or
2904 # 3. L3: len(text), stored in revlog data if flags==0, or
2905 # possibly somewhere else if flags!=0
2905 # possibly somewhere else if flags!=0
2906 #
2906 #
2907 # L1 should be equal to L2. L3 could be different from them.
2907 # L1 should be equal to L2. L3 could be different from them.
2908 # "text" may or may not affect commit hash depending on flag
2908 # "text" may or may not affect commit hash depending on flag
2909 # processors (see flagutil.addflagprocessor).
2909 # processors (see flagutil.addflagprocessor).
2910 #
2910 #
2911 # | common | rename | meta | ext
2911 # | common | rename | meta | ext
2912 # -------------------------------------------------
2912 # -------------------------------------------------
2913 # rawsize() | L1 | L1 | L1 | L1
2913 # rawsize() | L1 | L1 | L1 | L1
2914 # size() | L1 | L2-LM | L1(*) | L1 (?)
2914 # size() | L1 | L2-LM | L1(*) | L1 (?)
2915 # len(rawtext) | L2 | L2 | L2 | L2
2915 # len(rawtext) | L2 | L2 | L2 | L2
2916 # len(text) | L2 | L2 | L2 | L3
2916 # len(text) | L2 | L2 | L2 | L3
2917 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2917 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2918 #
2918 #
2919 # LM: length of metadata, depending on rawtext
2919 # LM: length of metadata, depending on rawtext
2920 # (*): not ideal, see comment in filelog.size
2920 # (*): not ideal, see comment in filelog.size
2921 # (?): could be "- len(meta)" if the resolved content has
2921 # (?): could be "- len(meta)" if the resolved content has
2922 # rename metadata
2922 # rename metadata
2923 #
2923 #
2924 # Checks needed to be done:
2924 # Checks needed to be done:
2925 # 1. length check: L1 == L2, in all cases.
2925 # 1. length check: L1 == L2, in all cases.
2926 # 2. hash check: depending on flag processor, we may need to
2926 # 2. hash check: depending on flag processor, we may need to
2927 # use either "text" (external), or "rawtext" (in revlog).
2927 # use either "text" (external), or "rawtext" (in revlog).
2928
2928
2929 try:
2929 try:
2930 skipflags = state.get(b'skipflags', 0)
2930 skipflags = state.get(b'skipflags', 0)
2931 if skipflags:
2931 if skipflags:
2932 skipflags &= self.flags(rev)
2932 skipflags &= self.flags(rev)
2933
2933
2934 _verify_revision(self, skipflags, state, node)
2934 _verify_revision(self, skipflags, state, node)
2935
2935
2936 l1 = self.rawsize(rev)
2936 l1 = self.rawsize(rev)
2937 l2 = len(self.rawdata(node))
2937 l2 = len(self.rawdata(node))
2938
2938
2939 if l1 != l2:
2939 if l1 != l2:
2940 yield revlogproblem(
2940 yield revlogproblem(
2941 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2941 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2942 node=node,
2942 node=node,
2943 )
2943 )
2944
2944
2945 except error.CensoredNodeError:
2945 except error.CensoredNodeError:
2946 if state[b'erroroncensored']:
2946 if state[b'erroroncensored']:
2947 yield revlogproblem(
2947 yield revlogproblem(
2948 error=_(b'censored file data'), node=node
2948 error=_(b'censored file data'), node=node
2949 )
2949 )
2950 state[b'skipread'].add(node)
2950 state[b'skipread'].add(node)
2951 except Exception as e:
2951 except Exception as e:
2952 yield revlogproblem(
2952 yield revlogproblem(
2953 error=_(b'unpacking %s: %s')
2953 error=_(b'unpacking %s: %s')
2954 % (short(node), stringutil.forcebytestr(e)),
2954 % (short(node), stringutil.forcebytestr(e)),
2955 node=node,
2955 node=node,
2956 )
2956 )
2957 state[b'skipread'].add(node)
2957 state[b'skipread'].add(node)
2958
2958
2959 def storageinfo(
2959 def storageinfo(
2960 self,
2960 self,
2961 exclusivefiles=False,
2961 exclusivefiles=False,
2962 sharedfiles=False,
2962 sharedfiles=False,
2963 revisionscount=False,
2963 revisionscount=False,
2964 trackedsize=False,
2964 trackedsize=False,
2965 storedsize=False,
2965 storedsize=False,
2966 ):
2966 ):
2967 d = {}
2967 d = {}
2968
2968
2969 if exclusivefiles:
2969 if exclusivefiles:
2970 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2970 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2971 if not self._inline:
2971 if not self._inline:
2972 d[b'exclusivefiles'].append((self.opener, self.datafile))
2972 d[b'exclusivefiles'].append((self.opener, self.datafile))
2973
2973
2974 if sharedfiles:
2974 if sharedfiles:
2975 d[b'sharedfiles'] = []
2975 d[b'sharedfiles'] = []
2976
2976
2977 if revisionscount:
2977 if revisionscount:
2978 d[b'revisionscount'] = len(self)
2978 d[b'revisionscount'] = len(self)
2979
2979
2980 if trackedsize:
2980 if trackedsize:
2981 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2981 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2982
2982
2983 if storedsize:
2983 if storedsize:
2984 d[b'storedsize'] = sum(
2984 d[b'storedsize'] = sum(
2985 self.opener.stat(path).st_size for path in self.files()
2985 self.opener.stat(path).st_size for path in self.files()
2986 )
2986 )
2987
2987
2988 return d
2988 return d
@@ -1,14 +1,8 b''
1 # mercurial.revlogutils -- basic utilities for revlog
1 # mercurial.revlogutils -- basic utilities for revlog
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9 from .. import error
10
11
12 class NodeMap(dict):
13 def __missing__(self, x):
14 raise error.RevlogError(b'unknown node: %s' % x)
General Comments 0
You need to be logged in to leave comments. Login now