##// END OF EJS Templates
revlog: move nodemap update within the index code...
marmoute -
r43931:dcf9826c default
parent child Browse files
Show More
@@ -1,203 +1,205 b''
1 # parsers.py - Python implementation of parsers.c
1 # parsers.py - Python implementation of parsers.c
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11 import zlib
11 import zlib
12
12
13 from ..node import nullid, nullrev
13 from ..node import nullid, nullrev
14 from .. import (
14 from .. import (
15 pycompat,
15 pycompat,
16 revlogutils,
16 revlogutils,
17 util,
17 util,
18 )
18 )
19
19
20 stringio = pycompat.bytesio
20 stringio = pycompat.bytesio
21
21
22
22
23 _pack = struct.pack
23 _pack = struct.pack
24 _unpack = struct.unpack
24 _unpack = struct.unpack
25 _compress = zlib.compress
25 _compress = zlib.compress
26 _decompress = zlib.decompress
26 _decompress = zlib.decompress
27
27
28 # Some code below makes tuples directly because it's more convenient. However,
28 # Some code below makes tuples directly because it's more convenient. However,
29 # code outside this module should always use dirstatetuple.
29 # code outside this module should always use dirstatetuple.
30 def dirstatetuple(*x):
30 def dirstatetuple(*x):
31 # x is a tuple
31 # x is a tuple
32 return x
32 return x
33
33
34
34
35 indexformatng = b">Qiiiiii20s12x"
35 indexformatng = b">Qiiiiii20s12x"
36 indexfirst = struct.calcsize(b'Q')
36 indexfirst = struct.calcsize(b'Q')
37 sizeint = struct.calcsize(b'i')
37 sizeint = struct.calcsize(b'i')
38 indexsize = struct.calcsize(indexformatng)
38 indexsize = struct.calcsize(indexformatng)
39
39
40
40
41 def gettype(q):
41 def gettype(q):
42 return int(q & 0xFFFF)
42 return int(q & 0xFFFF)
43
43
44
44
45 def offset_type(offset, type):
45 def offset_type(offset, type):
46 return int(int(offset) << 16 | type)
46 return int(int(offset) << 16 | type)
47
47
48
48
49 class BaseIndexObject(object):
49 class BaseIndexObject(object):
50 @util.propertycache
50 @util.propertycache
51 def nodemap(self):
51 def nodemap(self):
52 nodemap = revlogutils.NodeMap({nullid: nullrev})
52 nodemap = revlogutils.NodeMap({nullid: nullrev})
53 for r in range(0, len(self)):
53 for r in range(0, len(self)):
54 n = self[r][7]
54 n = self[r][7]
55 nodemap[n] = r
55 nodemap[n] = r
56 return nodemap
56 return nodemap
57
57
58 def clearcaches(self):
58 def clearcaches(self):
59 self.__dict__.pop('nodemap', None)
59 self.__dict__.pop('nodemap', None)
60
60
61 def __len__(self):
61 def __len__(self):
62 return self._lgt + len(self._extra)
62 return self._lgt + len(self._extra)
63
63
64 def append(self, tup):
64 def append(self, tup):
65 if 'nodemap' in vars(self):
66 self.nodemap[tup[7]] = len(self)
65 self._extra.append(tup)
67 self._extra.append(tup)
66
68
67 def _check_index(self, i):
69 def _check_index(self, i):
68 if not isinstance(i, int):
70 if not isinstance(i, int):
69 raise TypeError(b"expecting int indexes")
71 raise TypeError(b"expecting int indexes")
70 if i < 0 or i >= len(self):
72 if i < 0 or i >= len(self):
71 raise IndexError
73 raise IndexError
72
74
73 def __getitem__(self, i):
75 def __getitem__(self, i):
74 if i == -1:
76 if i == -1:
75 return (0, 0, 0, -1, -1, -1, -1, nullid)
77 return (0, 0, 0, -1, -1, -1, -1, nullid)
76 self._check_index(i)
78 self._check_index(i)
77 if i >= self._lgt:
79 if i >= self._lgt:
78 return self._extra[i - self._lgt]
80 return self._extra[i - self._lgt]
79 index = self._calculate_index(i)
81 index = self._calculate_index(i)
80 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
82 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
81 if i == 0:
83 if i == 0:
82 e = list(r)
84 e = list(r)
83 type = gettype(e[0])
85 type = gettype(e[0])
84 e[0] = offset_type(0, type)
86 e[0] = offset_type(0, type)
85 return tuple(e)
87 return tuple(e)
86 return r
88 return r
87
89
88
90
89 class IndexObject(BaseIndexObject):
91 class IndexObject(BaseIndexObject):
90 def __init__(self, data):
92 def __init__(self, data):
91 assert len(data) % indexsize == 0
93 assert len(data) % indexsize == 0
92 self._data = data
94 self._data = data
93 self._lgt = len(data) // indexsize
95 self._lgt = len(data) // indexsize
94 self._extra = []
96 self._extra = []
95
97
96 def _calculate_index(self, i):
98 def _calculate_index(self, i):
97 return i * indexsize
99 return i * indexsize
98
100
99 def __delitem__(self, i):
101 def __delitem__(self, i):
100 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
102 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
101 raise ValueError(b"deleting slices only supports a:-1 with step 1")
103 raise ValueError(b"deleting slices only supports a:-1 with step 1")
102 i = i.start
104 i = i.start
103 self._check_index(i)
105 self._check_index(i)
104 if i < self._lgt:
106 if i < self._lgt:
105 self._data = self._data[: i * indexsize]
107 self._data = self._data[: i * indexsize]
106 self._lgt = i
108 self._lgt = i
107 self._extra = []
109 self._extra = []
108 else:
110 else:
109 self._extra = self._extra[: i - self._lgt]
111 self._extra = self._extra[: i - self._lgt]
110
112
111
113
112 class InlinedIndexObject(BaseIndexObject):
114 class InlinedIndexObject(BaseIndexObject):
113 def __init__(self, data, inline=0):
115 def __init__(self, data, inline=0):
114 self._data = data
116 self._data = data
115 self._lgt = self._inline_scan(None)
117 self._lgt = self._inline_scan(None)
116 self._inline_scan(self._lgt)
118 self._inline_scan(self._lgt)
117 self._extra = []
119 self._extra = []
118
120
119 def _inline_scan(self, lgt):
121 def _inline_scan(self, lgt):
120 off = 0
122 off = 0
121 if lgt is not None:
123 if lgt is not None:
122 self._offsets = [0] * lgt
124 self._offsets = [0] * lgt
123 count = 0
125 count = 0
124 while off <= len(self._data) - indexsize:
126 while off <= len(self._data) - indexsize:
125 (s,) = struct.unpack(
127 (s,) = struct.unpack(
126 b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
128 b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
127 )
129 )
128 if lgt is not None:
130 if lgt is not None:
129 self._offsets[count] = off
131 self._offsets[count] = off
130 count += 1
132 count += 1
131 off += indexsize + s
133 off += indexsize + s
132 if off != len(self._data):
134 if off != len(self._data):
133 raise ValueError(b"corrupted data")
135 raise ValueError(b"corrupted data")
134 return count
136 return count
135
137
136 def __delitem__(self, i):
138 def __delitem__(self, i):
137 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
139 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
138 raise ValueError(b"deleting slices only supports a:-1 with step 1")
140 raise ValueError(b"deleting slices only supports a:-1 with step 1")
139 i = i.start
141 i = i.start
140 self._check_index(i)
142 self._check_index(i)
141 if i < self._lgt:
143 if i < self._lgt:
142 self._offsets = self._offsets[:i]
144 self._offsets = self._offsets[:i]
143 self._lgt = i
145 self._lgt = i
144 self._extra = []
146 self._extra = []
145 else:
147 else:
146 self._extra = self._extra[: i - self._lgt]
148 self._extra = self._extra[: i - self._lgt]
147
149
148 def _calculate_index(self, i):
150 def _calculate_index(self, i):
149 return self._offsets[i]
151 return self._offsets[i]
150
152
151
153
152 def parse_index2(data, inline):
154 def parse_index2(data, inline):
153 if not inline:
155 if not inline:
154 return IndexObject(data), None
156 return IndexObject(data), None
155 return InlinedIndexObject(data, inline), (0, data)
157 return InlinedIndexObject(data, inline), (0, data)
156
158
157
159
158 def parse_dirstate(dmap, copymap, st):
160 def parse_dirstate(dmap, copymap, st):
159 parents = [st[:20], st[20:40]]
161 parents = [st[:20], st[20:40]]
160 # dereference fields so they will be local in loop
162 # dereference fields so they will be local in loop
161 format = b">cllll"
163 format = b">cllll"
162 e_size = struct.calcsize(format)
164 e_size = struct.calcsize(format)
163 pos1 = 40
165 pos1 = 40
164 l = len(st)
166 l = len(st)
165
167
166 # the inner loop
168 # the inner loop
167 while pos1 < l:
169 while pos1 < l:
168 pos2 = pos1 + e_size
170 pos2 = pos1 + e_size
169 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
171 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
170 pos1 = pos2 + e[4]
172 pos1 = pos2 + e[4]
171 f = st[pos2:pos1]
173 f = st[pos2:pos1]
172 if b'\0' in f:
174 if b'\0' in f:
173 f, c = f.split(b'\0')
175 f, c = f.split(b'\0')
174 copymap[f] = c
176 copymap[f] = c
175 dmap[f] = e[:4]
177 dmap[f] = e[:4]
176 return parents
178 return parents
177
179
178
180
179 def pack_dirstate(dmap, copymap, pl, now):
181 def pack_dirstate(dmap, copymap, pl, now):
180 now = int(now)
182 now = int(now)
181 cs = stringio()
183 cs = stringio()
182 write = cs.write
184 write = cs.write
183 write(b"".join(pl))
185 write(b"".join(pl))
184 for f, e in pycompat.iteritems(dmap):
186 for f, e in pycompat.iteritems(dmap):
185 if e[0] == b'n' and e[3] == now:
187 if e[0] == b'n' and e[3] == now:
186 # The file was last modified "simultaneously" with the current
188 # The file was last modified "simultaneously" with the current
187 # write to dirstate (i.e. within the same second for file-
189 # write to dirstate (i.e. within the same second for file-
188 # systems with a granularity of 1 sec). This commonly happens
190 # systems with a granularity of 1 sec). This commonly happens
189 # for at least a couple of files on 'update'.
191 # for at least a couple of files on 'update'.
190 # The user could change the file without changing its size
192 # The user could change the file without changing its size
191 # within the same second. Invalidate the file's mtime in
193 # within the same second. Invalidate the file's mtime in
192 # dirstate, forcing future 'status' calls to compare the
194 # dirstate, forcing future 'status' calls to compare the
193 # contents of the file if the size is the same. This prevents
195 # contents of the file if the size is the same. This prevents
194 # mistakenly treating such files as clean.
196 # mistakenly treating such files as clean.
195 e = dirstatetuple(e[0], e[1], e[2], -1)
197 e = dirstatetuple(e[0], e[1], e[2], -1)
196 dmap[f] = e
198 dmap[f] = e
197
199
198 if f in copymap:
200 if f in copymap:
199 f = b"%s\0%s" % (f, copymap[f])
201 f = b"%s\0%s" % (f, copymap[f])
200 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
202 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
201 write(e)
203 write(e)
202 write(f)
204 write(f)
203 return cs.getvalue()
205 return cs.getvalue()
@@ -1,2952 +1,2955 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_SIDEDATA,
58 REVIDX_SIDEDATA,
59 )
59 )
60 from .thirdparty import attr
60 from .thirdparty import attr
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 revlogutils,
68 revlogutils,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .interfaces import (
72 from .interfaces import (
73 repository,
73 repository,
74 util as interfaceutil,
74 util as interfaceutil,
75 )
75 )
76 from .revlogutils import (
76 from .revlogutils import (
77 deltas as deltautil,
77 deltas as deltautil,
78 flagutil,
78 flagutil,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88 REVLOGV0
88 REVLOGV0
89 REVLOGV1
89 REVLOGV1
90 REVLOGV2
90 REVLOGV2
91 FLAG_INLINE_DATA
91 FLAG_INLINE_DATA
92 FLAG_GENERALDELTA
92 FLAG_GENERALDELTA
93 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_VERSION
95 REVLOG_DEFAULT_VERSION
96 REVLOGV1_FLAGS
96 REVLOGV1_FLAGS
97 REVLOGV2_FLAGS
97 REVLOGV2_FLAGS
98 REVIDX_ISCENSORED
98 REVIDX_ISCENSORED
99 REVIDX_ELLIPSIS
99 REVIDX_ELLIPSIS
100 REVIDX_SIDEDATA
100 REVIDX_SIDEDATA
101 REVIDX_EXTSTORED
101 REVIDX_EXTSTORED
102 REVIDX_DEFAULT_FLAGS
102 REVIDX_DEFAULT_FLAGS
103 REVIDX_FLAGS_ORDER
103 REVIDX_FLAGS_ORDER
104 REVIDX_RAWTEXT_CHANGING_FLAGS
104 REVIDX_RAWTEXT_CHANGING_FLAGS
105
105
106 parsers = policy.importmod('parsers')
106 parsers = policy.importmod('parsers')
107 rustancestor = policy.importrust('ancestor')
107 rustancestor = policy.importrust('ancestor')
108 rustdagop = policy.importrust('dagop')
108 rustdagop = policy.importrust('dagop')
109
109
110 # Aliased for performance.
110 # Aliased for performance.
111 _zlibdecompress = zlib.decompress
111 _zlibdecompress = zlib.decompress
112
112
113 # max size of revlog with inline data
113 # max size of revlog with inline data
114 _maxinline = 131072
114 _maxinline = 131072
115 _chunksize = 1048576
115 _chunksize = 1048576
116
116
117 # Flag processors for REVIDX_ELLIPSIS.
117 # Flag processors for REVIDX_ELLIPSIS.
118 def ellipsisreadprocessor(rl, text):
118 def ellipsisreadprocessor(rl, text):
119 return text, False, {}
119 return text, False, {}
120
120
121
121
122 def ellipsiswriteprocessor(rl, text, sidedata):
122 def ellipsiswriteprocessor(rl, text, sidedata):
123 return text, False
123 return text, False
124
124
125
125
126 def ellipsisrawprocessor(rl, text):
126 def ellipsisrawprocessor(rl, text):
127 return False
127 return False
128
128
129
129
130 ellipsisprocessor = (
130 ellipsisprocessor = (
131 ellipsisreadprocessor,
131 ellipsisreadprocessor,
132 ellipsiswriteprocessor,
132 ellipsiswriteprocessor,
133 ellipsisrawprocessor,
133 ellipsisrawprocessor,
134 )
134 )
135
135
136
136
137 def getoffset(q):
137 def getoffset(q):
138 return int(q >> 16)
138 return int(q >> 16)
139
139
140
140
141 def gettype(q):
141 def gettype(q):
142 return int(q & 0xFFFF)
142 return int(q & 0xFFFF)
143
143
144
144
145 def offset_type(offset, type):
145 def offset_type(offset, type):
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 raise ValueError(b'unknown revlog index flags')
147 raise ValueError(b'unknown revlog index flags')
148 return int(int(offset) << 16 | type)
148 return int(int(offset) << 16 | type)
149
149
150
150
151 @attr.s(slots=True, frozen=True)
151 @attr.s(slots=True, frozen=True)
152 class _revisioninfo(object):
152 class _revisioninfo(object):
153 """Information about a revision that allows building its fulltext
153 """Information about a revision that allows building its fulltext
154 node: expected hash of the revision
154 node: expected hash of the revision
155 p1, p2: parent revs of the revision
155 p1, p2: parent revs of the revision
156 btext: built text cache consisting of a one-element list
156 btext: built text cache consisting of a one-element list
157 cachedelta: (baserev, uncompressed_delta) or None
157 cachedelta: (baserev, uncompressed_delta) or None
158 flags: flags associated to the revision storage
158 flags: flags associated to the revision storage
159
159
160 One of btext[0] or cachedelta must be set.
160 One of btext[0] or cachedelta must be set.
161 """
161 """
162
162
163 node = attr.ib()
163 node = attr.ib()
164 p1 = attr.ib()
164 p1 = attr.ib()
165 p2 = attr.ib()
165 p2 = attr.ib()
166 btext = attr.ib()
166 btext = attr.ib()
167 textlen = attr.ib()
167 textlen = attr.ib()
168 cachedelta = attr.ib()
168 cachedelta = attr.ib()
169 flags = attr.ib()
169 flags = attr.ib()
170
170
171
171
172 @interfaceutil.implementer(repository.irevisiondelta)
172 @interfaceutil.implementer(repository.irevisiondelta)
173 @attr.s(slots=True)
173 @attr.s(slots=True)
174 class revlogrevisiondelta(object):
174 class revlogrevisiondelta(object):
175 node = attr.ib()
175 node = attr.ib()
176 p1node = attr.ib()
176 p1node = attr.ib()
177 p2node = attr.ib()
177 p2node = attr.ib()
178 basenode = attr.ib()
178 basenode = attr.ib()
179 flags = attr.ib()
179 flags = attr.ib()
180 baserevisionsize = attr.ib()
180 baserevisionsize = attr.ib()
181 revision = attr.ib()
181 revision = attr.ib()
182 delta = attr.ib()
182 delta = attr.ib()
183 linknode = attr.ib(default=None)
183 linknode = attr.ib(default=None)
184
184
185
185
186 @interfaceutil.implementer(repository.iverifyproblem)
186 @interfaceutil.implementer(repository.iverifyproblem)
187 @attr.s(frozen=True)
187 @attr.s(frozen=True)
188 class revlogproblem(object):
188 class revlogproblem(object):
189 warning = attr.ib(default=None)
189 warning = attr.ib(default=None)
190 error = attr.ib(default=None)
190 error = attr.ib(default=None)
191 node = attr.ib(default=None)
191 node = attr.ib(default=None)
192
192
193
193
194 # index v0:
194 # index v0:
195 # 4 bytes: offset
195 # 4 bytes: offset
196 # 4 bytes: compressed length
196 # 4 bytes: compressed length
197 # 4 bytes: base rev
197 # 4 bytes: base rev
198 # 4 bytes: link rev
198 # 4 bytes: link rev
199 # 20 bytes: parent 1 nodeid
199 # 20 bytes: parent 1 nodeid
200 # 20 bytes: parent 2 nodeid
200 # 20 bytes: parent 2 nodeid
201 # 20 bytes: nodeid
201 # 20 bytes: nodeid
202 indexformatv0 = struct.Struct(b">4l20s20s20s")
202 indexformatv0 = struct.Struct(b">4l20s20s20s")
203 indexformatv0_pack = indexformatv0.pack
203 indexformatv0_pack = indexformatv0.pack
204 indexformatv0_unpack = indexformatv0.unpack
204 indexformatv0_unpack = indexformatv0.unpack
205
205
206
206
207 class revlogoldindex(list):
207 class revlogoldindex(list):
208 @util.propertycache
208 @util.propertycache
209 def nodemap(self):
209 def nodemap(self):
210 nodemap = revlogutils.NodeMap({nullid: nullrev})
210 nodemap = revlogutils.NodeMap({nullid: nullrev})
211 for r in range(0, len(self)):
211 for r in range(0, len(self)):
212 n = self[r][7]
212 n = self[r][7]
213 nodemap[n] = r
213 nodemap[n] = r
214 return nodemap
214 return nodemap
215
215
216 def append(self, tup):
217 self.nodemap[tup[7]] = len(self)
218 super(revlogoldindex, self).append(tup)
219
216 def clearcaches(self):
220 def clearcaches(self):
217 self.__dict__.pop('nodemap', None)
221 self.__dict__.pop('nodemap', None)
218
222
219 def __getitem__(self, i):
223 def __getitem__(self, i):
220 if i == -1:
224 if i == -1:
221 return (0, 0, 0, -1, -1, -1, -1, nullid)
225 return (0, 0, 0, -1, -1, -1, -1, nullid)
222 return list.__getitem__(self, i)
226 return list.__getitem__(self, i)
223
227
224
228
225 class revlogoldio(object):
229 class revlogoldio(object):
226 def __init__(self):
230 def __init__(self):
227 self.size = indexformatv0.size
231 self.size = indexformatv0.size
228
232
229 def parseindex(self, data, inline):
233 def parseindex(self, data, inline):
230 s = self.size
234 s = self.size
231 index = []
235 index = []
232 nodemap = revlogutils.NodeMap({nullid: nullrev})
236 nodemap = revlogutils.NodeMap({nullid: nullrev})
233 n = off = 0
237 n = off = 0
234 l = len(data)
238 l = len(data)
235 while off + s <= l:
239 while off + s <= l:
236 cur = data[off : off + s]
240 cur = data[off : off + s]
237 off += s
241 off += s
238 e = indexformatv0_unpack(cur)
242 e = indexformatv0_unpack(cur)
239 # transform to revlogv1 format
243 # transform to revlogv1 format
240 e2 = (
244 e2 = (
241 offset_type(e[0], 0),
245 offset_type(e[0], 0),
242 e[1],
246 e[1],
243 -1,
247 -1,
244 e[2],
248 e[2],
245 e[3],
249 e[3],
246 nodemap.get(e[4], nullrev),
250 nodemap.get(e[4], nullrev),
247 nodemap.get(e[5], nullrev),
251 nodemap.get(e[5], nullrev),
248 e[6],
252 e[6],
249 )
253 )
250 index.append(e2)
254 index.append(e2)
251 nodemap[e[6]] = n
255 nodemap[e[6]] = n
252 n += 1
256 n += 1
253
257
254 index = revlogoldindex(index)
258 index = revlogoldindex(index)
255 return index, None
259 return index, None
256
260
257 def packentry(self, entry, node, version, rev):
261 def packentry(self, entry, node, version, rev):
258 if gettype(entry[0]):
262 if gettype(entry[0]):
259 raise error.RevlogError(
263 raise error.RevlogError(
260 _(b'index entry flags need revlog version 1')
264 _(b'index entry flags need revlog version 1')
261 )
265 )
262 e2 = (
266 e2 = (
263 getoffset(entry[0]),
267 getoffset(entry[0]),
264 entry[1],
268 entry[1],
265 entry[3],
269 entry[3],
266 entry[4],
270 entry[4],
267 node(entry[5]),
271 node(entry[5]),
268 node(entry[6]),
272 node(entry[6]),
269 entry[7],
273 entry[7],
270 )
274 )
271 return indexformatv0_pack(*e2)
275 return indexformatv0_pack(*e2)
272
276
273
277
274 # index ng:
278 # index ng:
275 # 6 bytes: offset
279 # 6 bytes: offset
276 # 2 bytes: flags
280 # 2 bytes: flags
277 # 4 bytes: compressed length
281 # 4 bytes: compressed length
278 # 4 bytes: uncompressed length
282 # 4 bytes: uncompressed length
279 # 4 bytes: base rev
283 # 4 bytes: base rev
280 # 4 bytes: link rev
284 # 4 bytes: link rev
281 # 4 bytes: parent 1 rev
285 # 4 bytes: parent 1 rev
282 # 4 bytes: parent 2 rev
286 # 4 bytes: parent 2 rev
283 # 32 bytes: nodeid
287 # 32 bytes: nodeid
284 indexformatng = struct.Struct(b">Qiiiiii20s12x")
288 indexformatng = struct.Struct(b">Qiiiiii20s12x")
285 indexformatng_pack = indexformatng.pack
289 indexformatng_pack = indexformatng.pack
286 versionformat = struct.Struct(b">I")
290 versionformat = struct.Struct(b">I")
287 versionformat_pack = versionformat.pack
291 versionformat_pack = versionformat.pack
288 versionformat_unpack = versionformat.unpack
292 versionformat_unpack = versionformat.unpack
289
293
290 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
294 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
291 # signed integer)
295 # signed integer)
292 _maxentrysize = 0x7FFFFFFF
296 _maxentrysize = 0x7FFFFFFF
293
297
294
298
295 class revlogio(object):
299 class revlogio(object):
296 def __init__(self):
300 def __init__(self):
297 self.size = indexformatng.size
301 self.size = indexformatng.size
298
302
299 def parseindex(self, data, inline):
303 def parseindex(self, data, inline):
300 # call the C implementation to parse the index data
304 # call the C implementation to parse the index data
301 index, cache = parsers.parse_index2(data, inline)
305 index, cache = parsers.parse_index2(data, inline)
302 return index, cache
306 return index, cache
303
307
304 def packentry(self, entry, node, version, rev):
308 def packentry(self, entry, node, version, rev):
305 p = indexformatng_pack(*entry)
309 p = indexformatng_pack(*entry)
306 if rev == 0:
310 if rev == 0:
307 p = versionformat_pack(version) + p[4:]
311 p = versionformat_pack(version) + p[4:]
308 return p
312 return p
309
313
310
314
311 class revlog(object):
315 class revlog(object):
312 """
316 """
313 the underlying revision storage object
317 the underlying revision storage object
314
318
315 A revlog consists of two parts, an index and the revision data.
319 A revlog consists of two parts, an index and the revision data.
316
320
317 The index is a file with a fixed record size containing
321 The index is a file with a fixed record size containing
318 information on each revision, including its nodeid (hash), the
322 information on each revision, including its nodeid (hash), the
319 nodeids of its parents, the position and offset of its data within
323 nodeids of its parents, the position and offset of its data within
320 the data file, and the revision it's based on. Finally, each entry
324 the data file, and the revision it's based on. Finally, each entry
321 contains a linkrev entry that can serve as a pointer to external
325 contains a linkrev entry that can serve as a pointer to external
322 data.
326 data.
323
327
324 The revision data itself is a linear collection of data chunks.
328 The revision data itself is a linear collection of data chunks.
325 Each chunk represents a revision and is usually represented as a
329 Each chunk represents a revision and is usually represented as a
326 delta against the previous chunk. To bound lookup time, runs of
330 delta against the previous chunk. To bound lookup time, runs of
327 deltas are limited to about 2 times the length of the original
331 deltas are limited to about 2 times the length of the original
328 version data. This makes retrieval of a version proportional to
332 version data. This makes retrieval of a version proportional to
329 its size, or O(1) relative to the number of revisions.
333 its size, or O(1) relative to the number of revisions.
330
334
331 Both pieces of the revlog are written to in an append-only
335 Both pieces of the revlog are written to in an append-only
332 fashion, which means we never need to rewrite a file to insert or
336 fashion, which means we never need to rewrite a file to insert or
333 remove data, and can use some simple techniques to avoid the need
337 remove data, and can use some simple techniques to avoid the need
334 for locking while reading.
338 for locking while reading.
335
339
336 If checkambig, indexfile is opened with checkambig=True at
340 If checkambig, indexfile is opened with checkambig=True at
337 writing, to avoid file stat ambiguity.
341 writing, to avoid file stat ambiguity.
338
342
339 If mmaplargeindex is True, and an mmapindexthreshold is set, the
343 If mmaplargeindex is True, and an mmapindexthreshold is set, the
340 index will be mmapped rather than read if it is larger than the
344 index will be mmapped rather than read if it is larger than the
341 configured threshold.
345 configured threshold.
342
346
343 If censorable is True, the revlog can have censored revisions.
347 If censorable is True, the revlog can have censored revisions.
344
348
345 If `upperboundcomp` is not None, this is the expected maximal gain from
349 If `upperboundcomp` is not None, this is the expected maximal gain from
346 compression for the data content.
350 compression for the data content.
347 """
351 """
348
352
349 _flagserrorclass = error.RevlogError
353 _flagserrorclass = error.RevlogError
350
354
351 def __init__(
355 def __init__(
352 self,
356 self,
353 opener,
357 opener,
354 indexfile,
358 indexfile,
355 datafile=None,
359 datafile=None,
356 checkambig=False,
360 checkambig=False,
357 mmaplargeindex=False,
361 mmaplargeindex=False,
358 censorable=False,
362 censorable=False,
359 upperboundcomp=None,
363 upperboundcomp=None,
360 ):
364 ):
361 """
365 """
362 create a revlog object
366 create a revlog object
363
367
364 opener is a function that abstracts the file opening operation
368 opener is a function that abstracts the file opening operation
365 and can be used to implement COW semantics or the like.
369 and can be used to implement COW semantics or the like.
366
370
367 """
371 """
368 self.upperboundcomp = upperboundcomp
372 self.upperboundcomp = upperboundcomp
369 self.indexfile = indexfile
373 self.indexfile = indexfile
370 self.datafile = datafile or (indexfile[:-2] + b".d")
374 self.datafile = datafile or (indexfile[:-2] + b".d")
371 self.opener = opener
375 self.opener = opener
372 # When True, indexfile is opened with checkambig=True at writing, to
376 # When True, indexfile is opened with checkambig=True at writing, to
373 # avoid file stat ambiguity.
377 # avoid file stat ambiguity.
374 self._checkambig = checkambig
378 self._checkambig = checkambig
375 self._mmaplargeindex = mmaplargeindex
379 self._mmaplargeindex = mmaplargeindex
376 self._censorable = censorable
380 self._censorable = censorable
377 # 3-tuple of (node, rev, text) for a raw revision.
381 # 3-tuple of (node, rev, text) for a raw revision.
378 self._revisioncache = None
382 self._revisioncache = None
379 # Maps rev to chain base rev.
383 # Maps rev to chain base rev.
380 self._chainbasecache = util.lrucachedict(100)
384 self._chainbasecache = util.lrucachedict(100)
381 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
385 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
382 self._chunkcache = (0, b'')
386 self._chunkcache = (0, b'')
383 # How much data to read and cache into the raw revlog data cache.
387 # How much data to read and cache into the raw revlog data cache.
384 self._chunkcachesize = 65536
388 self._chunkcachesize = 65536
385 self._maxchainlen = None
389 self._maxchainlen = None
386 self._deltabothparents = True
390 self._deltabothparents = True
387 self.index = None
391 self.index = None
388 # Mapping of partial identifiers to full nodes.
392 # Mapping of partial identifiers to full nodes.
389 self._pcache = {}
393 self._pcache = {}
390 # Mapping of revision integer to full node.
394 # Mapping of revision integer to full node.
391 self._nodepos = None
395 self._nodepos = None
392 self._compengine = b'zlib'
396 self._compengine = b'zlib'
393 self._compengineopts = {}
397 self._compengineopts = {}
394 self._maxdeltachainspan = -1
398 self._maxdeltachainspan = -1
395 self._withsparseread = False
399 self._withsparseread = False
396 self._sparserevlog = False
400 self._sparserevlog = False
397 self._srdensitythreshold = 0.50
401 self._srdensitythreshold = 0.50
398 self._srmingapsize = 262144
402 self._srmingapsize = 262144
399
403
400 # Make copy of flag processors so each revlog instance can support
404 # Make copy of flag processors so each revlog instance can support
401 # custom flags.
405 # custom flags.
402 self._flagprocessors = dict(flagutil.flagprocessors)
406 self._flagprocessors = dict(flagutil.flagprocessors)
403
407
404 # 2-tuple of file handles being used for active writing.
408 # 2-tuple of file handles being used for active writing.
405 self._writinghandles = None
409 self._writinghandles = None
406
410
407 self._loadindex()
411 self._loadindex()
408
412
409 def _loadindex(self):
413 def _loadindex(self):
410 mmapindexthreshold = None
414 mmapindexthreshold = None
411 opts = self.opener.options
415 opts = self.opener.options
412
416
413 if b'revlogv2' in opts:
417 if b'revlogv2' in opts:
414 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
418 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
415 elif b'revlogv1' in opts:
419 elif b'revlogv1' in opts:
416 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
420 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
417 if b'generaldelta' in opts:
421 if b'generaldelta' in opts:
418 newversionflags |= FLAG_GENERALDELTA
422 newversionflags |= FLAG_GENERALDELTA
419 elif b'revlogv0' in self.opener.options:
423 elif b'revlogv0' in self.opener.options:
420 newversionflags = REVLOGV0
424 newversionflags = REVLOGV0
421 else:
425 else:
422 newversionflags = REVLOG_DEFAULT_VERSION
426 newversionflags = REVLOG_DEFAULT_VERSION
423
427
424 if b'chunkcachesize' in opts:
428 if b'chunkcachesize' in opts:
425 self._chunkcachesize = opts[b'chunkcachesize']
429 self._chunkcachesize = opts[b'chunkcachesize']
426 if b'maxchainlen' in opts:
430 if b'maxchainlen' in opts:
427 self._maxchainlen = opts[b'maxchainlen']
431 self._maxchainlen = opts[b'maxchainlen']
428 if b'deltabothparents' in opts:
432 if b'deltabothparents' in opts:
429 self._deltabothparents = opts[b'deltabothparents']
433 self._deltabothparents = opts[b'deltabothparents']
430 self._lazydelta = bool(opts.get(b'lazydelta', True))
434 self._lazydelta = bool(opts.get(b'lazydelta', True))
431 self._lazydeltabase = False
435 self._lazydeltabase = False
432 if self._lazydelta:
436 if self._lazydelta:
433 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
437 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
434 if b'compengine' in opts:
438 if b'compengine' in opts:
435 self._compengine = opts[b'compengine']
439 self._compengine = opts[b'compengine']
436 if b'zlib.level' in opts:
440 if b'zlib.level' in opts:
437 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
441 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
438 if b'zstd.level' in opts:
442 if b'zstd.level' in opts:
439 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
443 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
440 if b'maxdeltachainspan' in opts:
444 if b'maxdeltachainspan' in opts:
441 self._maxdeltachainspan = opts[b'maxdeltachainspan']
445 self._maxdeltachainspan = opts[b'maxdeltachainspan']
442 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
446 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
443 mmapindexthreshold = opts[b'mmapindexthreshold']
447 mmapindexthreshold = opts[b'mmapindexthreshold']
444 self.hassidedata = bool(opts.get(b'side-data', False))
448 self.hassidedata = bool(opts.get(b'side-data', False))
445 if self.hassidedata:
449 if self.hassidedata:
446 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
450 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
447 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
451 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
448 withsparseread = bool(opts.get(b'with-sparse-read', False))
452 withsparseread = bool(opts.get(b'with-sparse-read', False))
449 # sparse-revlog forces sparse-read
453 # sparse-revlog forces sparse-read
450 self._withsparseread = self._sparserevlog or withsparseread
454 self._withsparseread = self._sparserevlog or withsparseread
451 if b'sparse-read-density-threshold' in opts:
455 if b'sparse-read-density-threshold' in opts:
452 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
456 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
453 if b'sparse-read-min-gap-size' in opts:
457 if b'sparse-read-min-gap-size' in opts:
454 self._srmingapsize = opts[b'sparse-read-min-gap-size']
458 self._srmingapsize = opts[b'sparse-read-min-gap-size']
455 if opts.get(b'enableellipsis'):
459 if opts.get(b'enableellipsis'):
456 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
460 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
457
461
458 # revlog v0 doesn't have flag processors
462 # revlog v0 doesn't have flag processors
459 for flag, processor in pycompat.iteritems(
463 for flag, processor in pycompat.iteritems(
460 opts.get(b'flagprocessors', {})
464 opts.get(b'flagprocessors', {})
461 ):
465 ):
462 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
466 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
463
467
464 if self._chunkcachesize <= 0:
468 if self._chunkcachesize <= 0:
465 raise error.RevlogError(
469 raise error.RevlogError(
466 _(b'revlog chunk cache size %r is not greater than 0')
470 _(b'revlog chunk cache size %r is not greater than 0')
467 % self._chunkcachesize
471 % self._chunkcachesize
468 )
472 )
469 elif self._chunkcachesize & (self._chunkcachesize - 1):
473 elif self._chunkcachesize & (self._chunkcachesize - 1):
470 raise error.RevlogError(
474 raise error.RevlogError(
471 _(b'revlog chunk cache size %r is not a power of 2')
475 _(b'revlog chunk cache size %r is not a power of 2')
472 % self._chunkcachesize
476 % self._chunkcachesize
473 )
477 )
474
478
475 indexdata = b''
479 indexdata = b''
476 self._initempty = True
480 self._initempty = True
477 try:
481 try:
478 with self._indexfp() as f:
482 with self._indexfp() as f:
479 if (
483 if (
480 mmapindexthreshold is not None
484 mmapindexthreshold is not None
481 and self.opener.fstat(f).st_size >= mmapindexthreshold
485 and self.opener.fstat(f).st_size >= mmapindexthreshold
482 ):
486 ):
483 # TODO: should .close() to release resources without
487 # TODO: should .close() to release resources without
484 # relying on Python GC
488 # relying on Python GC
485 indexdata = util.buffer(util.mmapread(f))
489 indexdata = util.buffer(util.mmapread(f))
486 else:
490 else:
487 indexdata = f.read()
491 indexdata = f.read()
488 if len(indexdata) > 0:
492 if len(indexdata) > 0:
489 versionflags = versionformat_unpack(indexdata[:4])[0]
493 versionflags = versionformat_unpack(indexdata[:4])[0]
490 self._initempty = False
494 self._initempty = False
491 else:
495 else:
492 versionflags = newversionflags
496 versionflags = newversionflags
493 except IOError as inst:
497 except IOError as inst:
494 if inst.errno != errno.ENOENT:
498 if inst.errno != errno.ENOENT:
495 raise
499 raise
496
500
497 versionflags = newversionflags
501 versionflags = newversionflags
498
502
499 self.version = versionflags
503 self.version = versionflags
500
504
501 flags = versionflags & ~0xFFFF
505 flags = versionflags & ~0xFFFF
502 fmt = versionflags & 0xFFFF
506 fmt = versionflags & 0xFFFF
503
507
504 if fmt == REVLOGV0:
508 if fmt == REVLOGV0:
505 if flags:
509 if flags:
506 raise error.RevlogError(
510 raise error.RevlogError(
507 _(b'unknown flags (%#04x) in version %d revlog %s')
511 _(b'unknown flags (%#04x) in version %d revlog %s')
508 % (flags >> 16, fmt, self.indexfile)
512 % (flags >> 16, fmt, self.indexfile)
509 )
513 )
510
514
511 self._inline = False
515 self._inline = False
512 self._generaldelta = False
516 self._generaldelta = False
513
517
514 elif fmt == REVLOGV1:
518 elif fmt == REVLOGV1:
515 if flags & ~REVLOGV1_FLAGS:
519 if flags & ~REVLOGV1_FLAGS:
516 raise error.RevlogError(
520 raise error.RevlogError(
517 _(b'unknown flags (%#04x) in version %d revlog %s')
521 _(b'unknown flags (%#04x) in version %d revlog %s')
518 % (flags >> 16, fmt, self.indexfile)
522 % (flags >> 16, fmt, self.indexfile)
519 )
523 )
520
524
521 self._inline = versionflags & FLAG_INLINE_DATA
525 self._inline = versionflags & FLAG_INLINE_DATA
522 self._generaldelta = versionflags & FLAG_GENERALDELTA
526 self._generaldelta = versionflags & FLAG_GENERALDELTA
523
527
524 elif fmt == REVLOGV2:
528 elif fmt == REVLOGV2:
525 if flags & ~REVLOGV2_FLAGS:
529 if flags & ~REVLOGV2_FLAGS:
526 raise error.RevlogError(
530 raise error.RevlogError(
527 _(b'unknown flags (%#04x) in version %d revlog %s')
531 _(b'unknown flags (%#04x) in version %d revlog %s')
528 % (flags >> 16, fmt, self.indexfile)
532 % (flags >> 16, fmt, self.indexfile)
529 )
533 )
530
534
531 self._inline = versionflags & FLAG_INLINE_DATA
535 self._inline = versionflags & FLAG_INLINE_DATA
532 # generaldelta implied by version 2 revlogs.
536 # generaldelta implied by version 2 revlogs.
533 self._generaldelta = True
537 self._generaldelta = True
534
538
535 else:
539 else:
536 raise error.RevlogError(
540 raise error.RevlogError(
537 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
541 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
538 )
542 )
539 # sparse-revlog can't be on without general-delta (issue6056)
543 # sparse-revlog can't be on without general-delta (issue6056)
540 if not self._generaldelta:
544 if not self._generaldelta:
541 self._sparserevlog = False
545 self._sparserevlog = False
542
546
543 self._storedeltachains = True
547 self._storedeltachains = True
544
548
545 self._io = revlogio()
549 self._io = revlogio()
546 if self.version == REVLOGV0:
550 if self.version == REVLOGV0:
547 self._io = revlogoldio()
551 self._io = revlogoldio()
548 try:
552 try:
549 d = self._io.parseindex(indexdata, self._inline)
553 d = self._io.parseindex(indexdata, self._inline)
550 except (ValueError, IndexError):
554 except (ValueError, IndexError):
551 raise error.RevlogError(
555 raise error.RevlogError(
552 _(b"index %s is corrupted") % self.indexfile
556 _(b"index %s is corrupted") % self.indexfile
553 )
557 )
554 self.index, self._chunkcache = d
558 self.index, self._chunkcache = d
555 self.nodemap = self.index.nodemap
559 self.nodemap = self.index.nodemap
556 if not self._chunkcache:
560 if not self._chunkcache:
557 self._chunkclear()
561 self._chunkclear()
558 # revnum -> (chain-length, sum-delta-length)
562 # revnum -> (chain-length, sum-delta-length)
559 self._chaininfocache = {}
563 self._chaininfocache = {}
560 # revlog header -> revlog compressor
564 # revlog header -> revlog compressor
561 self._decompressors = {}
565 self._decompressors = {}
562
566
563 @util.propertycache
567 @util.propertycache
564 def _compressor(self):
568 def _compressor(self):
565 engine = util.compengines[self._compengine]
569 engine = util.compengines[self._compengine]
566 return engine.revlogcompressor(self._compengineopts)
570 return engine.revlogcompressor(self._compengineopts)
567
571
568 def _indexfp(self, mode=b'r'):
572 def _indexfp(self, mode=b'r'):
569 """file object for the revlog's index file"""
573 """file object for the revlog's index file"""
570 args = {'mode': mode}
574 args = {'mode': mode}
571 if mode != b'r':
575 if mode != b'r':
572 args['checkambig'] = self._checkambig
576 args['checkambig'] = self._checkambig
573 if mode == b'w':
577 if mode == b'w':
574 args['atomictemp'] = True
578 args['atomictemp'] = True
575 return self.opener(self.indexfile, **args)
579 return self.opener(self.indexfile, **args)
576
580
577 def _datafp(self, mode=b'r'):
581 def _datafp(self, mode=b'r'):
578 """file object for the revlog's data file"""
582 """file object for the revlog's data file"""
579 return self.opener(self.datafile, mode=mode)
583 return self.opener(self.datafile, mode=mode)
580
584
581 @contextlib.contextmanager
585 @contextlib.contextmanager
582 def _datareadfp(self, existingfp=None):
586 def _datareadfp(self, existingfp=None):
583 """file object suitable to read data"""
587 """file object suitable to read data"""
584 # Use explicit file handle, if given.
588 # Use explicit file handle, if given.
585 if existingfp is not None:
589 if existingfp is not None:
586 yield existingfp
590 yield existingfp
587
591
588 # Use a file handle being actively used for writes, if available.
592 # Use a file handle being actively used for writes, if available.
589 # There is some danger to doing this because reads will seek the
593 # There is some danger to doing this because reads will seek the
590 # file. However, _writeentry() performs a SEEK_END before all writes,
594 # file. However, _writeentry() performs a SEEK_END before all writes,
591 # so we should be safe.
595 # so we should be safe.
592 elif self._writinghandles:
596 elif self._writinghandles:
593 if self._inline:
597 if self._inline:
594 yield self._writinghandles[0]
598 yield self._writinghandles[0]
595 else:
599 else:
596 yield self._writinghandles[1]
600 yield self._writinghandles[1]
597
601
598 # Otherwise open a new file handle.
602 # Otherwise open a new file handle.
599 else:
603 else:
600 if self._inline:
604 if self._inline:
601 func = self._indexfp
605 func = self._indexfp
602 else:
606 else:
603 func = self._datafp
607 func = self._datafp
604 with func() as fp:
608 with func() as fp:
605 yield fp
609 yield fp
606
610
607 def tiprev(self):
611 def tiprev(self):
608 return len(self.index) - 1
612 return len(self.index) - 1
609
613
610 def tip(self):
614 def tip(self):
611 return self.node(self.tiprev())
615 return self.node(self.tiprev())
612
616
613 def __contains__(self, rev):
617 def __contains__(self, rev):
614 return 0 <= rev < len(self)
618 return 0 <= rev < len(self)
615
619
616 def __len__(self):
620 def __len__(self):
617 return len(self.index)
621 return len(self.index)
618
622
619 def __iter__(self):
623 def __iter__(self):
620 return iter(pycompat.xrange(len(self)))
624 return iter(pycompat.xrange(len(self)))
621
625
622 def revs(self, start=0, stop=None):
626 def revs(self, start=0, stop=None):
623 """iterate over all rev in this revlog (from start to stop)"""
627 """iterate over all rev in this revlog (from start to stop)"""
624 return storageutil.iterrevs(len(self), start=start, stop=stop)
628 return storageutil.iterrevs(len(self), start=start, stop=stop)
625
629
626 @util.propertycache
630 @util.propertycache
627 def nodemap(self):
631 def nodemap(self):
628 if self.index:
632 if self.index:
629 # populate mapping down to the initial node
633 # populate mapping down to the initial node
630 node0 = self.index[0][7] # get around changelog filtering
634 node0 = self.index[0][7] # get around changelog filtering
631 self.rev(node0)
635 self.rev(node0)
632 return self.index.nodemap
636 return self.index.nodemap
633
637
634 @property
638 @property
635 def _nodecache(self):
639 def _nodecache(self):
636 msg = "revlog._nodecache is deprecated, use revlog.index.nodemap"
640 msg = "revlog._nodecache is deprecated, use revlog.index.nodemap"
637 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
641 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
638 return self.index.nodemap
642 return self.index.nodemap
639
643
640 def hasnode(self, node):
644 def hasnode(self, node):
641 try:
645 try:
642 self.rev(node)
646 self.rev(node)
643 return True
647 return True
644 except KeyError:
648 except KeyError:
645 return False
649 return False
646
650
647 def candelta(self, baserev, rev):
651 def candelta(self, baserev, rev):
648 """whether two revisions (baserev, rev) can be delta-ed or not"""
652 """whether two revisions (baserev, rev) can be delta-ed or not"""
649 # Disable delta if either rev requires a content-changing flag
653 # Disable delta if either rev requires a content-changing flag
650 # processor (ex. LFS). This is because such flag processor can alter
654 # processor (ex. LFS). This is because such flag processor can alter
651 # the rawtext content that the delta will be based on, and two clients
655 # the rawtext content that the delta will be based on, and two clients
652 # could have a same revlog node with different flags (i.e. different
656 # could have a same revlog node with different flags (i.e. different
653 # rawtext contents) and the delta could be incompatible.
657 # rawtext contents) and the delta could be incompatible.
654 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
658 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
655 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
659 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
656 ):
660 ):
657 return False
661 return False
658 return True
662 return True
659
663
660 def clearcaches(self):
664 def clearcaches(self):
661 self._revisioncache = None
665 self._revisioncache = None
662 self._chainbasecache.clear()
666 self._chainbasecache.clear()
663 self._chunkcache = (0, b'')
667 self._chunkcache = (0, b'')
664 self._pcache = {}
668 self._pcache = {}
665 self.index.clearcaches()
669 self.index.clearcaches()
666
670
667 def rev(self, node):
671 def rev(self, node):
668 try:
672 try:
669 return self.index.nodemap[node]
673 return self.index.nodemap[node]
670 except TypeError:
674 except TypeError:
671 raise
675 raise
672 except error.RevlogError:
676 except error.RevlogError:
673 # parsers.c radix tree lookup failed
677 # parsers.c radix tree lookup failed
674 if node == wdirid or node in wdirfilenodeids:
678 if node == wdirid or node in wdirfilenodeids:
675 raise error.WdirUnsupported
679 raise error.WdirUnsupported
676 raise error.LookupError(node, self.indexfile, _(b'no node'))
680 raise error.LookupError(node, self.indexfile, _(b'no node'))
677
681
678 # Accessors for index entries.
682 # Accessors for index entries.
679
683
680 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
684 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
681 # are flags.
685 # are flags.
682 def start(self, rev):
686 def start(self, rev):
683 return int(self.index[rev][0] >> 16)
687 return int(self.index[rev][0] >> 16)
684
688
685 def flags(self, rev):
689 def flags(self, rev):
686 return self.index[rev][0] & 0xFFFF
690 return self.index[rev][0] & 0xFFFF
687
691
688 def length(self, rev):
692 def length(self, rev):
689 return self.index[rev][1]
693 return self.index[rev][1]
690
694
691 def rawsize(self, rev):
695 def rawsize(self, rev):
692 """return the length of the uncompressed text for a given revision"""
696 """return the length of the uncompressed text for a given revision"""
693 l = self.index[rev][2]
697 l = self.index[rev][2]
694 if l >= 0:
698 if l >= 0:
695 return l
699 return l
696
700
697 t = self.rawdata(rev)
701 t = self.rawdata(rev)
698 return len(t)
702 return len(t)
699
703
700 def size(self, rev):
704 def size(self, rev):
701 """length of non-raw text (processed by a "read" flag processor)"""
705 """length of non-raw text (processed by a "read" flag processor)"""
702 # fast path: if no "read" flag processor could change the content,
706 # fast path: if no "read" flag processor could change the content,
703 # size is rawsize. note: ELLIPSIS is known to not change the content.
707 # size is rawsize. note: ELLIPSIS is known to not change the content.
704 flags = self.flags(rev)
708 flags = self.flags(rev)
705 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
709 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
706 return self.rawsize(rev)
710 return self.rawsize(rev)
707
711
708 return len(self.revision(rev, raw=False))
712 return len(self.revision(rev, raw=False))
709
713
710 def chainbase(self, rev):
714 def chainbase(self, rev):
711 base = self._chainbasecache.get(rev)
715 base = self._chainbasecache.get(rev)
712 if base is not None:
716 if base is not None:
713 return base
717 return base
714
718
715 index = self.index
719 index = self.index
716 iterrev = rev
720 iterrev = rev
717 base = index[iterrev][3]
721 base = index[iterrev][3]
718 while base != iterrev:
722 while base != iterrev:
719 iterrev = base
723 iterrev = base
720 base = index[iterrev][3]
724 base = index[iterrev][3]
721
725
722 self._chainbasecache[rev] = base
726 self._chainbasecache[rev] = base
723 return base
727 return base
724
728
725 def linkrev(self, rev):
729 def linkrev(self, rev):
726 return self.index[rev][4]
730 return self.index[rev][4]
727
731
728 def parentrevs(self, rev):
732 def parentrevs(self, rev):
729 try:
733 try:
730 entry = self.index[rev]
734 entry = self.index[rev]
731 except IndexError:
735 except IndexError:
732 if rev == wdirrev:
736 if rev == wdirrev:
733 raise error.WdirUnsupported
737 raise error.WdirUnsupported
734 raise
738 raise
735
739
736 return entry[5], entry[6]
740 return entry[5], entry[6]
737
741
738 # fast parentrevs(rev) where rev isn't filtered
742 # fast parentrevs(rev) where rev isn't filtered
739 _uncheckedparentrevs = parentrevs
743 _uncheckedparentrevs = parentrevs
740
744
741 def node(self, rev):
745 def node(self, rev):
742 try:
746 try:
743 return self.index[rev][7]
747 return self.index[rev][7]
744 except IndexError:
748 except IndexError:
745 if rev == wdirrev:
749 if rev == wdirrev:
746 raise error.WdirUnsupported
750 raise error.WdirUnsupported
747 raise
751 raise
748
752
749 # Derived from index values.
753 # Derived from index values.
750
754
751 def end(self, rev):
755 def end(self, rev):
752 return self.start(rev) + self.length(rev)
756 return self.start(rev) + self.length(rev)
753
757
754 def parents(self, node):
758 def parents(self, node):
755 i = self.index
759 i = self.index
756 d = i[self.rev(node)]
760 d = i[self.rev(node)]
757 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
761 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
758
762
759 def chainlen(self, rev):
763 def chainlen(self, rev):
760 return self._chaininfo(rev)[0]
764 return self._chaininfo(rev)[0]
761
765
762 def _chaininfo(self, rev):
766 def _chaininfo(self, rev):
763 chaininfocache = self._chaininfocache
767 chaininfocache = self._chaininfocache
764 if rev in chaininfocache:
768 if rev in chaininfocache:
765 return chaininfocache[rev]
769 return chaininfocache[rev]
766 index = self.index
770 index = self.index
767 generaldelta = self._generaldelta
771 generaldelta = self._generaldelta
768 iterrev = rev
772 iterrev = rev
769 e = index[iterrev]
773 e = index[iterrev]
770 clen = 0
774 clen = 0
771 compresseddeltalen = 0
775 compresseddeltalen = 0
772 while iterrev != e[3]:
776 while iterrev != e[3]:
773 clen += 1
777 clen += 1
774 compresseddeltalen += e[1]
778 compresseddeltalen += e[1]
775 if generaldelta:
779 if generaldelta:
776 iterrev = e[3]
780 iterrev = e[3]
777 else:
781 else:
778 iterrev -= 1
782 iterrev -= 1
779 if iterrev in chaininfocache:
783 if iterrev in chaininfocache:
780 t = chaininfocache[iterrev]
784 t = chaininfocache[iterrev]
781 clen += t[0]
785 clen += t[0]
782 compresseddeltalen += t[1]
786 compresseddeltalen += t[1]
783 break
787 break
784 e = index[iterrev]
788 e = index[iterrev]
785 else:
789 else:
786 # Add text length of base since decompressing that also takes
790 # Add text length of base since decompressing that also takes
787 # work. For cache hits the length is already included.
791 # work. For cache hits the length is already included.
788 compresseddeltalen += e[1]
792 compresseddeltalen += e[1]
789 r = (clen, compresseddeltalen)
793 r = (clen, compresseddeltalen)
790 chaininfocache[rev] = r
794 chaininfocache[rev] = r
791 return r
795 return r
792
796
793 def _deltachain(self, rev, stoprev=None):
797 def _deltachain(self, rev, stoprev=None):
794 """Obtain the delta chain for a revision.
798 """Obtain the delta chain for a revision.
795
799
796 ``stoprev`` specifies a revision to stop at. If not specified, we
800 ``stoprev`` specifies a revision to stop at. If not specified, we
797 stop at the base of the chain.
801 stop at the base of the chain.
798
802
799 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
803 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
800 revs in ascending order and ``stopped`` is a bool indicating whether
804 revs in ascending order and ``stopped`` is a bool indicating whether
801 ``stoprev`` was hit.
805 ``stoprev`` was hit.
802 """
806 """
803 # Try C implementation.
807 # Try C implementation.
804 try:
808 try:
805 return self.index.deltachain(rev, stoprev, self._generaldelta)
809 return self.index.deltachain(rev, stoprev, self._generaldelta)
806 except AttributeError:
810 except AttributeError:
807 pass
811 pass
808
812
809 chain = []
813 chain = []
810
814
811 # Alias to prevent attribute lookup in tight loop.
815 # Alias to prevent attribute lookup in tight loop.
812 index = self.index
816 index = self.index
813 generaldelta = self._generaldelta
817 generaldelta = self._generaldelta
814
818
815 iterrev = rev
819 iterrev = rev
816 e = index[iterrev]
820 e = index[iterrev]
817 while iterrev != e[3] and iterrev != stoprev:
821 while iterrev != e[3] and iterrev != stoprev:
818 chain.append(iterrev)
822 chain.append(iterrev)
819 if generaldelta:
823 if generaldelta:
820 iterrev = e[3]
824 iterrev = e[3]
821 else:
825 else:
822 iterrev -= 1
826 iterrev -= 1
823 e = index[iterrev]
827 e = index[iterrev]
824
828
825 if iterrev == stoprev:
829 if iterrev == stoprev:
826 stopped = True
830 stopped = True
827 else:
831 else:
828 chain.append(iterrev)
832 chain.append(iterrev)
829 stopped = False
833 stopped = False
830
834
831 chain.reverse()
835 chain.reverse()
832 return chain, stopped
836 return chain, stopped
833
837
834 def ancestors(self, revs, stoprev=0, inclusive=False):
838 def ancestors(self, revs, stoprev=0, inclusive=False):
835 """Generate the ancestors of 'revs' in reverse revision order.
839 """Generate the ancestors of 'revs' in reverse revision order.
836 Does not generate revs lower than stoprev.
840 Does not generate revs lower than stoprev.
837
841
838 See the documentation for ancestor.lazyancestors for more details."""
842 See the documentation for ancestor.lazyancestors for more details."""
839
843
840 # first, make sure start revisions aren't filtered
844 # first, make sure start revisions aren't filtered
841 revs = list(revs)
845 revs = list(revs)
842 checkrev = self.node
846 checkrev = self.node
843 for r in revs:
847 for r in revs:
844 checkrev(r)
848 checkrev(r)
845 # and we're sure ancestors aren't filtered as well
849 # and we're sure ancestors aren't filtered as well
846
850
847 if rustancestor is not None:
851 if rustancestor is not None:
848 lazyancestors = rustancestor.LazyAncestors
852 lazyancestors = rustancestor.LazyAncestors
849 arg = self.index
853 arg = self.index
850 elif util.safehasattr(parsers, b'rustlazyancestors'):
854 elif util.safehasattr(parsers, b'rustlazyancestors'):
851 lazyancestors = ancestor.rustlazyancestors
855 lazyancestors = ancestor.rustlazyancestors
852 arg = self.index
856 arg = self.index
853 else:
857 else:
854 lazyancestors = ancestor.lazyancestors
858 lazyancestors = ancestor.lazyancestors
855 arg = self._uncheckedparentrevs
859 arg = self._uncheckedparentrevs
856 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
860 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
857
861
858 def descendants(self, revs):
862 def descendants(self, revs):
859 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
863 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
860
864
861 def findcommonmissing(self, common=None, heads=None):
865 def findcommonmissing(self, common=None, heads=None):
862 """Return a tuple of the ancestors of common and the ancestors of heads
866 """Return a tuple of the ancestors of common and the ancestors of heads
863 that are not ancestors of common. In revset terminology, we return the
867 that are not ancestors of common. In revset terminology, we return the
864 tuple:
868 tuple:
865
869
866 ::common, (::heads) - (::common)
870 ::common, (::heads) - (::common)
867
871
868 The list is sorted by revision number, meaning it is
872 The list is sorted by revision number, meaning it is
869 topologically sorted.
873 topologically sorted.
870
874
871 'heads' and 'common' are both lists of node IDs. If heads is
875 'heads' and 'common' are both lists of node IDs. If heads is
872 not supplied, uses all of the revlog's heads. If common is not
876 not supplied, uses all of the revlog's heads. If common is not
873 supplied, uses nullid."""
877 supplied, uses nullid."""
874 if common is None:
878 if common is None:
875 common = [nullid]
879 common = [nullid]
876 if heads is None:
880 if heads is None:
877 heads = self.heads()
881 heads = self.heads()
878
882
879 common = [self.rev(n) for n in common]
883 common = [self.rev(n) for n in common]
880 heads = [self.rev(n) for n in heads]
884 heads = [self.rev(n) for n in heads]
881
885
882 # we want the ancestors, but inclusive
886 # we want the ancestors, but inclusive
883 class lazyset(object):
887 class lazyset(object):
884 def __init__(self, lazyvalues):
888 def __init__(self, lazyvalues):
885 self.addedvalues = set()
889 self.addedvalues = set()
886 self.lazyvalues = lazyvalues
890 self.lazyvalues = lazyvalues
887
891
888 def __contains__(self, value):
892 def __contains__(self, value):
889 return value in self.addedvalues or value in self.lazyvalues
893 return value in self.addedvalues or value in self.lazyvalues
890
894
891 def __iter__(self):
895 def __iter__(self):
892 added = self.addedvalues
896 added = self.addedvalues
893 for r in added:
897 for r in added:
894 yield r
898 yield r
895 for r in self.lazyvalues:
899 for r in self.lazyvalues:
896 if not r in added:
900 if not r in added:
897 yield r
901 yield r
898
902
899 def add(self, value):
903 def add(self, value):
900 self.addedvalues.add(value)
904 self.addedvalues.add(value)
901
905
902 def update(self, values):
906 def update(self, values):
903 self.addedvalues.update(values)
907 self.addedvalues.update(values)
904
908
905 has = lazyset(self.ancestors(common))
909 has = lazyset(self.ancestors(common))
906 has.add(nullrev)
910 has.add(nullrev)
907 has.update(common)
911 has.update(common)
908
912
909 # take all ancestors from heads that aren't in has
913 # take all ancestors from heads that aren't in has
910 missing = set()
914 missing = set()
911 visit = collections.deque(r for r in heads if r not in has)
915 visit = collections.deque(r for r in heads if r not in has)
912 while visit:
916 while visit:
913 r = visit.popleft()
917 r = visit.popleft()
914 if r in missing:
918 if r in missing:
915 continue
919 continue
916 else:
920 else:
917 missing.add(r)
921 missing.add(r)
918 for p in self.parentrevs(r):
922 for p in self.parentrevs(r):
919 if p not in has:
923 if p not in has:
920 visit.append(p)
924 visit.append(p)
921 missing = list(missing)
925 missing = list(missing)
922 missing.sort()
926 missing.sort()
923 return has, [self.node(miss) for miss in missing]
927 return has, [self.node(miss) for miss in missing]
924
928
925 def incrementalmissingrevs(self, common=None):
929 def incrementalmissingrevs(self, common=None):
926 """Return an object that can be used to incrementally compute the
930 """Return an object that can be used to incrementally compute the
927 revision numbers of the ancestors of arbitrary sets that are not
931 revision numbers of the ancestors of arbitrary sets that are not
928 ancestors of common. This is an ancestor.incrementalmissingancestors
932 ancestors of common. This is an ancestor.incrementalmissingancestors
929 object.
933 object.
930
934
931 'common' is a list of revision numbers. If common is not supplied, uses
935 'common' is a list of revision numbers. If common is not supplied, uses
932 nullrev.
936 nullrev.
933 """
937 """
934 if common is None:
938 if common is None:
935 common = [nullrev]
939 common = [nullrev]
936
940
937 if rustancestor is not None:
941 if rustancestor is not None:
938 return rustancestor.MissingAncestors(self.index, common)
942 return rustancestor.MissingAncestors(self.index, common)
939 return ancestor.incrementalmissingancestors(self.parentrevs, common)
943 return ancestor.incrementalmissingancestors(self.parentrevs, common)
940
944
941 def findmissingrevs(self, common=None, heads=None):
945 def findmissingrevs(self, common=None, heads=None):
942 """Return the revision numbers of the ancestors of heads that
946 """Return the revision numbers of the ancestors of heads that
943 are not ancestors of common.
947 are not ancestors of common.
944
948
945 More specifically, return a list of revision numbers corresponding to
949 More specifically, return a list of revision numbers corresponding to
946 nodes N such that every N satisfies the following constraints:
950 nodes N such that every N satisfies the following constraints:
947
951
948 1. N is an ancestor of some node in 'heads'
952 1. N is an ancestor of some node in 'heads'
949 2. N is not an ancestor of any node in 'common'
953 2. N is not an ancestor of any node in 'common'
950
954
951 The list is sorted by revision number, meaning it is
955 The list is sorted by revision number, meaning it is
952 topologically sorted.
956 topologically sorted.
953
957
954 'heads' and 'common' are both lists of revision numbers. If heads is
958 'heads' and 'common' are both lists of revision numbers. If heads is
955 not supplied, uses all of the revlog's heads. If common is not
959 not supplied, uses all of the revlog's heads. If common is not
956 supplied, uses nullid."""
960 supplied, uses nullid."""
957 if common is None:
961 if common is None:
958 common = [nullrev]
962 common = [nullrev]
959 if heads is None:
963 if heads is None:
960 heads = self.headrevs()
964 heads = self.headrevs()
961
965
962 inc = self.incrementalmissingrevs(common=common)
966 inc = self.incrementalmissingrevs(common=common)
963 return inc.missingancestors(heads)
967 return inc.missingancestors(heads)
964
968
965 def findmissing(self, common=None, heads=None):
969 def findmissing(self, common=None, heads=None):
966 """Return the ancestors of heads that are not ancestors of common.
970 """Return the ancestors of heads that are not ancestors of common.
967
971
968 More specifically, return a list of nodes N such that every N
972 More specifically, return a list of nodes N such that every N
969 satisfies the following constraints:
973 satisfies the following constraints:
970
974
971 1. N is an ancestor of some node in 'heads'
975 1. N is an ancestor of some node in 'heads'
972 2. N is not an ancestor of any node in 'common'
976 2. N is not an ancestor of any node in 'common'
973
977
974 The list is sorted by revision number, meaning it is
978 The list is sorted by revision number, meaning it is
975 topologically sorted.
979 topologically sorted.
976
980
977 'heads' and 'common' are both lists of node IDs. If heads is
981 'heads' and 'common' are both lists of node IDs. If heads is
978 not supplied, uses all of the revlog's heads. If common is not
982 not supplied, uses all of the revlog's heads. If common is not
979 supplied, uses nullid."""
983 supplied, uses nullid."""
980 if common is None:
984 if common is None:
981 common = [nullid]
985 common = [nullid]
982 if heads is None:
986 if heads is None:
983 heads = self.heads()
987 heads = self.heads()
984
988
985 common = [self.rev(n) for n in common]
989 common = [self.rev(n) for n in common]
986 heads = [self.rev(n) for n in heads]
990 heads = [self.rev(n) for n in heads]
987
991
988 inc = self.incrementalmissingrevs(common=common)
992 inc = self.incrementalmissingrevs(common=common)
989 return [self.node(r) for r in inc.missingancestors(heads)]
993 return [self.node(r) for r in inc.missingancestors(heads)]
990
994
991 def nodesbetween(self, roots=None, heads=None):
995 def nodesbetween(self, roots=None, heads=None):
992 """Return a topological path from 'roots' to 'heads'.
996 """Return a topological path from 'roots' to 'heads'.
993
997
994 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
998 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
995 topologically sorted list of all nodes N that satisfy both of
999 topologically sorted list of all nodes N that satisfy both of
996 these constraints:
1000 these constraints:
997
1001
998 1. N is a descendant of some node in 'roots'
1002 1. N is a descendant of some node in 'roots'
999 2. N is an ancestor of some node in 'heads'
1003 2. N is an ancestor of some node in 'heads'
1000
1004
1001 Every node is considered to be both a descendant and an ancestor
1005 Every node is considered to be both a descendant and an ancestor
1002 of itself, so every reachable node in 'roots' and 'heads' will be
1006 of itself, so every reachable node in 'roots' and 'heads' will be
1003 included in 'nodes'.
1007 included in 'nodes'.
1004
1008
1005 'outroots' is the list of reachable nodes in 'roots', i.e., the
1009 'outroots' is the list of reachable nodes in 'roots', i.e., the
1006 subset of 'roots' that is returned in 'nodes'. Likewise,
1010 subset of 'roots' that is returned in 'nodes'. Likewise,
1007 'outheads' is the subset of 'heads' that is also in 'nodes'.
1011 'outheads' is the subset of 'heads' that is also in 'nodes'.
1008
1012
1009 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1013 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1010 unspecified, uses nullid as the only root. If 'heads' is
1014 unspecified, uses nullid as the only root. If 'heads' is
1011 unspecified, uses list of all of the revlog's heads."""
1015 unspecified, uses list of all of the revlog's heads."""
1012 nonodes = ([], [], [])
1016 nonodes = ([], [], [])
1013 if roots is not None:
1017 if roots is not None:
1014 roots = list(roots)
1018 roots = list(roots)
1015 if not roots:
1019 if not roots:
1016 return nonodes
1020 return nonodes
1017 lowestrev = min([self.rev(n) for n in roots])
1021 lowestrev = min([self.rev(n) for n in roots])
1018 else:
1022 else:
1019 roots = [nullid] # Everybody's a descendant of nullid
1023 roots = [nullid] # Everybody's a descendant of nullid
1020 lowestrev = nullrev
1024 lowestrev = nullrev
1021 if (lowestrev == nullrev) and (heads is None):
1025 if (lowestrev == nullrev) and (heads is None):
1022 # We want _all_ the nodes!
1026 # We want _all_ the nodes!
1023 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1027 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1024 if heads is None:
1028 if heads is None:
1025 # All nodes are ancestors, so the latest ancestor is the last
1029 # All nodes are ancestors, so the latest ancestor is the last
1026 # node.
1030 # node.
1027 highestrev = len(self) - 1
1031 highestrev = len(self) - 1
1028 # Set ancestors to None to signal that every node is an ancestor.
1032 # Set ancestors to None to signal that every node is an ancestor.
1029 ancestors = None
1033 ancestors = None
1030 # Set heads to an empty dictionary for later discovery of heads
1034 # Set heads to an empty dictionary for later discovery of heads
1031 heads = {}
1035 heads = {}
1032 else:
1036 else:
1033 heads = list(heads)
1037 heads = list(heads)
1034 if not heads:
1038 if not heads:
1035 return nonodes
1039 return nonodes
1036 ancestors = set()
1040 ancestors = set()
1037 # Turn heads into a dictionary so we can remove 'fake' heads.
1041 # Turn heads into a dictionary so we can remove 'fake' heads.
1038 # Also, later we will be using it to filter out the heads we can't
1042 # Also, later we will be using it to filter out the heads we can't
1039 # find from roots.
1043 # find from roots.
1040 heads = dict.fromkeys(heads, False)
1044 heads = dict.fromkeys(heads, False)
1041 # Start at the top and keep marking parents until we're done.
1045 # Start at the top and keep marking parents until we're done.
1042 nodestotag = set(heads)
1046 nodestotag = set(heads)
1043 # Remember where the top was so we can use it as a limit later.
1047 # Remember where the top was so we can use it as a limit later.
1044 highestrev = max([self.rev(n) for n in nodestotag])
1048 highestrev = max([self.rev(n) for n in nodestotag])
1045 while nodestotag:
1049 while nodestotag:
1046 # grab a node to tag
1050 # grab a node to tag
1047 n = nodestotag.pop()
1051 n = nodestotag.pop()
1048 # Never tag nullid
1052 # Never tag nullid
1049 if n == nullid:
1053 if n == nullid:
1050 continue
1054 continue
1051 # A node's revision number represents its place in a
1055 # A node's revision number represents its place in a
1052 # topologically sorted list of nodes.
1056 # topologically sorted list of nodes.
1053 r = self.rev(n)
1057 r = self.rev(n)
1054 if r >= lowestrev:
1058 if r >= lowestrev:
1055 if n not in ancestors:
1059 if n not in ancestors:
1056 # If we are possibly a descendant of one of the roots
1060 # If we are possibly a descendant of one of the roots
1057 # and we haven't already been marked as an ancestor
1061 # and we haven't already been marked as an ancestor
1058 ancestors.add(n) # Mark as ancestor
1062 ancestors.add(n) # Mark as ancestor
1059 # Add non-nullid parents to list of nodes to tag.
1063 # Add non-nullid parents to list of nodes to tag.
1060 nodestotag.update(
1064 nodestotag.update(
1061 [p for p in self.parents(n) if p != nullid]
1065 [p for p in self.parents(n) if p != nullid]
1062 )
1066 )
1063 elif n in heads: # We've seen it before, is it a fake head?
1067 elif n in heads: # We've seen it before, is it a fake head?
1064 # So it is, real heads should not be the ancestors of
1068 # So it is, real heads should not be the ancestors of
1065 # any other heads.
1069 # any other heads.
1066 heads.pop(n)
1070 heads.pop(n)
1067 if not ancestors:
1071 if not ancestors:
1068 return nonodes
1072 return nonodes
1069 # Now that we have our set of ancestors, we want to remove any
1073 # Now that we have our set of ancestors, we want to remove any
1070 # roots that are not ancestors.
1074 # roots that are not ancestors.
1071
1075
1072 # If one of the roots was nullid, everything is included anyway.
1076 # If one of the roots was nullid, everything is included anyway.
1073 if lowestrev > nullrev:
1077 if lowestrev > nullrev:
1074 # But, since we weren't, let's recompute the lowest rev to not
1078 # But, since we weren't, let's recompute the lowest rev to not
1075 # include roots that aren't ancestors.
1079 # include roots that aren't ancestors.
1076
1080
1077 # Filter out roots that aren't ancestors of heads
1081 # Filter out roots that aren't ancestors of heads
1078 roots = [root for root in roots if root in ancestors]
1082 roots = [root for root in roots if root in ancestors]
1079 # Recompute the lowest revision
1083 # Recompute the lowest revision
1080 if roots:
1084 if roots:
1081 lowestrev = min([self.rev(root) for root in roots])
1085 lowestrev = min([self.rev(root) for root in roots])
1082 else:
1086 else:
1083 # No more roots? Return empty list
1087 # No more roots? Return empty list
1084 return nonodes
1088 return nonodes
1085 else:
1089 else:
1086 # We are descending from nullid, and don't need to care about
1090 # We are descending from nullid, and don't need to care about
1087 # any other roots.
1091 # any other roots.
1088 lowestrev = nullrev
1092 lowestrev = nullrev
1089 roots = [nullid]
1093 roots = [nullid]
1090 # Transform our roots list into a set.
1094 # Transform our roots list into a set.
1091 descendants = set(roots)
1095 descendants = set(roots)
1092 # Also, keep the original roots so we can filter out roots that aren't
1096 # Also, keep the original roots so we can filter out roots that aren't
1093 # 'real' roots (i.e. are descended from other roots).
1097 # 'real' roots (i.e. are descended from other roots).
1094 roots = descendants.copy()
1098 roots = descendants.copy()
1095 # Our topologically sorted list of output nodes.
1099 # Our topologically sorted list of output nodes.
1096 orderedout = []
1100 orderedout = []
1097 # Don't start at nullid since we don't want nullid in our output list,
1101 # Don't start at nullid since we don't want nullid in our output list,
1098 # and if nullid shows up in descendants, empty parents will look like
1102 # and if nullid shows up in descendants, empty parents will look like
1099 # they're descendants.
1103 # they're descendants.
1100 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1104 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1101 n = self.node(r)
1105 n = self.node(r)
1102 isdescendant = False
1106 isdescendant = False
1103 if lowestrev == nullrev: # Everybody is a descendant of nullid
1107 if lowestrev == nullrev: # Everybody is a descendant of nullid
1104 isdescendant = True
1108 isdescendant = True
1105 elif n in descendants:
1109 elif n in descendants:
1106 # n is already a descendant
1110 # n is already a descendant
1107 isdescendant = True
1111 isdescendant = True
1108 # This check only needs to be done here because all the roots
1112 # This check only needs to be done here because all the roots
1109 # will start being marked is descendants before the loop.
1113 # will start being marked is descendants before the loop.
1110 if n in roots:
1114 if n in roots:
1111 # If n was a root, check if it's a 'real' root.
1115 # If n was a root, check if it's a 'real' root.
1112 p = tuple(self.parents(n))
1116 p = tuple(self.parents(n))
1113 # If any of its parents are descendants, it's not a root.
1117 # If any of its parents are descendants, it's not a root.
1114 if (p[0] in descendants) or (p[1] in descendants):
1118 if (p[0] in descendants) or (p[1] in descendants):
1115 roots.remove(n)
1119 roots.remove(n)
1116 else:
1120 else:
1117 p = tuple(self.parents(n))
1121 p = tuple(self.parents(n))
1118 # A node is a descendant if either of its parents are
1122 # A node is a descendant if either of its parents are
1119 # descendants. (We seeded the dependents list with the roots
1123 # descendants. (We seeded the dependents list with the roots
1120 # up there, remember?)
1124 # up there, remember?)
1121 if (p[0] in descendants) or (p[1] in descendants):
1125 if (p[0] in descendants) or (p[1] in descendants):
1122 descendants.add(n)
1126 descendants.add(n)
1123 isdescendant = True
1127 isdescendant = True
1124 if isdescendant and ((ancestors is None) or (n in ancestors)):
1128 if isdescendant and ((ancestors is None) or (n in ancestors)):
1125 # Only include nodes that are both descendants and ancestors.
1129 # Only include nodes that are both descendants and ancestors.
1126 orderedout.append(n)
1130 orderedout.append(n)
1127 if (ancestors is not None) and (n in heads):
1131 if (ancestors is not None) and (n in heads):
1128 # We're trying to figure out which heads are reachable
1132 # We're trying to figure out which heads are reachable
1129 # from roots.
1133 # from roots.
1130 # Mark this head as having been reached
1134 # Mark this head as having been reached
1131 heads[n] = True
1135 heads[n] = True
1132 elif ancestors is None:
1136 elif ancestors is None:
1133 # Otherwise, we're trying to discover the heads.
1137 # Otherwise, we're trying to discover the heads.
1134 # Assume this is a head because if it isn't, the next step
1138 # Assume this is a head because if it isn't, the next step
1135 # will eventually remove it.
1139 # will eventually remove it.
1136 heads[n] = True
1140 heads[n] = True
1137 # But, obviously its parents aren't.
1141 # But, obviously its parents aren't.
1138 for p in self.parents(n):
1142 for p in self.parents(n):
1139 heads.pop(p, None)
1143 heads.pop(p, None)
1140 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1144 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1141 roots = list(roots)
1145 roots = list(roots)
1142 assert orderedout
1146 assert orderedout
1143 assert roots
1147 assert roots
1144 assert heads
1148 assert heads
1145 return (orderedout, roots, heads)
1149 return (orderedout, roots, heads)
1146
1150
1147 def headrevs(self, revs=None):
1151 def headrevs(self, revs=None):
1148 if revs is None:
1152 if revs is None:
1149 try:
1153 try:
1150 return self.index.headrevs()
1154 return self.index.headrevs()
1151 except AttributeError:
1155 except AttributeError:
1152 return self._headrevs()
1156 return self._headrevs()
1153 if rustdagop is not None:
1157 if rustdagop is not None:
1154 return rustdagop.headrevs(self.index, revs)
1158 return rustdagop.headrevs(self.index, revs)
1155 return dagop.headrevs(revs, self._uncheckedparentrevs)
1159 return dagop.headrevs(revs, self._uncheckedparentrevs)
1156
1160
1157 def computephases(self, roots):
1161 def computephases(self, roots):
1158 return self.index.computephasesmapsets(roots)
1162 return self.index.computephasesmapsets(roots)
1159
1163
1160 def _headrevs(self):
1164 def _headrevs(self):
1161 count = len(self)
1165 count = len(self)
1162 if not count:
1166 if not count:
1163 return [nullrev]
1167 return [nullrev]
1164 # we won't iter over filtered rev so nobody is a head at start
1168 # we won't iter over filtered rev so nobody is a head at start
1165 ishead = [0] * (count + 1)
1169 ishead = [0] * (count + 1)
1166 index = self.index
1170 index = self.index
1167 for r in self:
1171 for r in self:
1168 ishead[r] = 1 # I may be an head
1172 ishead[r] = 1 # I may be an head
1169 e = index[r]
1173 e = index[r]
1170 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1174 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1171 return [r for r, val in enumerate(ishead) if val]
1175 return [r for r, val in enumerate(ishead) if val]
1172
1176
1173 def heads(self, start=None, stop=None):
1177 def heads(self, start=None, stop=None):
1174 """return the list of all nodes that have no children
1178 """return the list of all nodes that have no children
1175
1179
1176 if start is specified, only heads that are descendants of
1180 if start is specified, only heads that are descendants of
1177 start will be returned
1181 start will be returned
1178 if stop is specified, it will consider all the revs from stop
1182 if stop is specified, it will consider all the revs from stop
1179 as if they had no children
1183 as if they had no children
1180 """
1184 """
1181 if start is None and stop is None:
1185 if start is None and stop is None:
1182 if not len(self):
1186 if not len(self):
1183 return [nullid]
1187 return [nullid]
1184 return [self.node(r) for r in self.headrevs()]
1188 return [self.node(r) for r in self.headrevs()]
1185
1189
1186 if start is None:
1190 if start is None:
1187 start = nullrev
1191 start = nullrev
1188 else:
1192 else:
1189 start = self.rev(start)
1193 start = self.rev(start)
1190
1194
1191 stoprevs = set(self.rev(n) for n in stop or [])
1195 stoprevs = set(self.rev(n) for n in stop or [])
1192
1196
1193 revs = dagop.headrevssubset(
1197 revs = dagop.headrevssubset(
1194 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1198 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1195 )
1199 )
1196
1200
1197 return [self.node(rev) for rev in revs]
1201 return [self.node(rev) for rev in revs]
1198
1202
1199 def children(self, node):
1203 def children(self, node):
1200 """find the children of a given node"""
1204 """find the children of a given node"""
1201 c = []
1205 c = []
1202 p = self.rev(node)
1206 p = self.rev(node)
1203 for r in self.revs(start=p + 1):
1207 for r in self.revs(start=p + 1):
1204 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1208 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1205 if prevs:
1209 if prevs:
1206 for pr in prevs:
1210 for pr in prevs:
1207 if pr == p:
1211 if pr == p:
1208 c.append(self.node(r))
1212 c.append(self.node(r))
1209 elif p == nullrev:
1213 elif p == nullrev:
1210 c.append(self.node(r))
1214 c.append(self.node(r))
1211 return c
1215 return c
1212
1216
1213 def commonancestorsheads(self, a, b):
1217 def commonancestorsheads(self, a, b):
1214 """calculate all the heads of the common ancestors of nodes a and b"""
1218 """calculate all the heads of the common ancestors of nodes a and b"""
1215 a, b = self.rev(a), self.rev(b)
1219 a, b = self.rev(a), self.rev(b)
1216 ancs = self._commonancestorsheads(a, b)
1220 ancs = self._commonancestorsheads(a, b)
1217 return pycompat.maplist(self.node, ancs)
1221 return pycompat.maplist(self.node, ancs)
1218
1222
1219 def _commonancestorsheads(self, *revs):
1223 def _commonancestorsheads(self, *revs):
1220 """calculate all the heads of the common ancestors of revs"""
1224 """calculate all the heads of the common ancestors of revs"""
1221 try:
1225 try:
1222 ancs = self.index.commonancestorsheads(*revs)
1226 ancs = self.index.commonancestorsheads(*revs)
1223 except (AttributeError, OverflowError): # C implementation failed
1227 except (AttributeError, OverflowError): # C implementation failed
1224 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1228 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1225 return ancs
1229 return ancs
1226
1230
1227 def isancestor(self, a, b):
1231 def isancestor(self, a, b):
1228 """return True if node a is an ancestor of node b
1232 """return True if node a is an ancestor of node b
1229
1233
1230 A revision is considered an ancestor of itself."""
1234 A revision is considered an ancestor of itself."""
1231 a, b = self.rev(a), self.rev(b)
1235 a, b = self.rev(a), self.rev(b)
1232 return self.isancestorrev(a, b)
1236 return self.isancestorrev(a, b)
1233
1237
1234 def isancestorrev(self, a, b):
1238 def isancestorrev(self, a, b):
1235 """return True if revision a is an ancestor of revision b
1239 """return True if revision a is an ancestor of revision b
1236
1240
1237 A revision is considered an ancestor of itself.
1241 A revision is considered an ancestor of itself.
1238
1242
1239 The implementation of this is trivial but the use of
1243 The implementation of this is trivial but the use of
1240 reachableroots is not."""
1244 reachableroots is not."""
1241 if a == nullrev:
1245 if a == nullrev:
1242 return True
1246 return True
1243 elif a == b:
1247 elif a == b:
1244 return True
1248 return True
1245 elif a > b:
1249 elif a > b:
1246 return False
1250 return False
1247 return bool(self.reachableroots(a, [b], [a], includepath=False))
1251 return bool(self.reachableroots(a, [b], [a], includepath=False))
1248
1252
1249 def reachableroots(self, minroot, heads, roots, includepath=False):
1253 def reachableroots(self, minroot, heads, roots, includepath=False):
1250 """return (heads(::<roots> and <roots>::<heads>))
1254 """return (heads(::<roots> and <roots>::<heads>))
1251
1255
1252 If includepath is True, return (<roots>::<heads>)."""
1256 If includepath is True, return (<roots>::<heads>)."""
1253 try:
1257 try:
1254 return self.index.reachableroots2(
1258 return self.index.reachableroots2(
1255 minroot, heads, roots, includepath
1259 minroot, heads, roots, includepath
1256 )
1260 )
1257 except AttributeError:
1261 except AttributeError:
1258 return dagop._reachablerootspure(
1262 return dagop._reachablerootspure(
1259 self.parentrevs, minroot, roots, heads, includepath
1263 self.parentrevs, minroot, roots, heads, includepath
1260 )
1264 )
1261
1265
1262 def ancestor(self, a, b):
1266 def ancestor(self, a, b):
1263 """calculate the "best" common ancestor of nodes a and b"""
1267 """calculate the "best" common ancestor of nodes a and b"""
1264
1268
1265 a, b = self.rev(a), self.rev(b)
1269 a, b = self.rev(a), self.rev(b)
1266 try:
1270 try:
1267 ancs = self.index.ancestors(a, b)
1271 ancs = self.index.ancestors(a, b)
1268 except (AttributeError, OverflowError):
1272 except (AttributeError, OverflowError):
1269 ancs = ancestor.ancestors(self.parentrevs, a, b)
1273 ancs = ancestor.ancestors(self.parentrevs, a, b)
1270 if ancs:
1274 if ancs:
1271 # choose a consistent winner when there's a tie
1275 # choose a consistent winner when there's a tie
1272 return min(map(self.node, ancs))
1276 return min(map(self.node, ancs))
1273 return nullid
1277 return nullid
1274
1278
1275 def _match(self, id):
1279 def _match(self, id):
1276 if isinstance(id, int):
1280 if isinstance(id, int):
1277 # rev
1281 # rev
1278 return self.node(id)
1282 return self.node(id)
1279 if len(id) == 20:
1283 if len(id) == 20:
1280 # possibly a binary node
1284 # possibly a binary node
1281 # odds of a binary node being all hex in ASCII are 1 in 10**25
1285 # odds of a binary node being all hex in ASCII are 1 in 10**25
1282 try:
1286 try:
1283 node = id
1287 node = id
1284 self.rev(node) # quick search the index
1288 self.rev(node) # quick search the index
1285 return node
1289 return node
1286 except error.LookupError:
1290 except error.LookupError:
1287 pass # may be partial hex id
1291 pass # may be partial hex id
1288 try:
1292 try:
1289 # str(rev)
1293 # str(rev)
1290 rev = int(id)
1294 rev = int(id)
1291 if b"%d" % rev != id:
1295 if b"%d" % rev != id:
1292 raise ValueError
1296 raise ValueError
1293 if rev < 0:
1297 if rev < 0:
1294 rev = len(self) + rev
1298 rev = len(self) + rev
1295 if rev < 0 or rev >= len(self):
1299 if rev < 0 or rev >= len(self):
1296 raise ValueError
1300 raise ValueError
1297 return self.node(rev)
1301 return self.node(rev)
1298 except (ValueError, OverflowError):
1302 except (ValueError, OverflowError):
1299 pass
1303 pass
1300 if len(id) == 40:
1304 if len(id) == 40:
1301 try:
1305 try:
1302 # a full hex nodeid?
1306 # a full hex nodeid?
1303 node = bin(id)
1307 node = bin(id)
1304 self.rev(node)
1308 self.rev(node)
1305 return node
1309 return node
1306 except (TypeError, error.LookupError):
1310 except (TypeError, error.LookupError):
1307 pass
1311 pass
1308
1312
1309 def _partialmatch(self, id):
1313 def _partialmatch(self, id):
1310 # we don't care wdirfilenodeids as they should be always full hash
1314 # we don't care wdirfilenodeids as they should be always full hash
1311 maybewdir = wdirhex.startswith(id)
1315 maybewdir = wdirhex.startswith(id)
1312 try:
1316 try:
1313 partial = self.index.partialmatch(id)
1317 partial = self.index.partialmatch(id)
1314 if partial and self.hasnode(partial):
1318 if partial and self.hasnode(partial):
1315 if maybewdir:
1319 if maybewdir:
1316 # single 'ff...' match in radix tree, ambiguous with wdir
1320 # single 'ff...' match in radix tree, ambiguous with wdir
1317 raise error.RevlogError
1321 raise error.RevlogError
1318 return partial
1322 return partial
1319 if maybewdir:
1323 if maybewdir:
1320 # no 'ff...' match in radix tree, wdir identified
1324 # no 'ff...' match in radix tree, wdir identified
1321 raise error.WdirUnsupported
1325 raise error.WdirUnsupported
1322 return None
1326 return None
1323 except error.RevlogError:
1327 except error.RevlogError:
1324 # parsers.c radix tree lookup gave multiple matches
1328 # parsers.c radix tree lookup gave multiple matches
1325 # fast path: for unfiltered changelog, radix tree is accurate
1329 # fast path: for unfiltered changelog, radix tree is accurate
1326 if not getattr(self, 'filteredrevs', None):
1330 if not getattr(self, 'filteredrevs', None):
1327 raise error.AmbiguousPrefixLookupError(
1331 raise error.AmbiguousPrefixLookupError(
1328 id, self.indexfile, _(b'ambiguous identifier')
1332 id, self.indexfile, _(b'ambiguous identifier')
1329 )
1333 )
1330 # fall through to slow path that filters hidden revisions
1334 # fall through to slow path that filters hidden revisions
1331 except (AttributeError, ValueError):
1335 except (AttributeError, ValueError):
1332 # we are pure python, or key was too short to search radix tree
1336 # we are pure python, or key was too short to search radix tree
1333 pass
1337 pass
1334
1338
1335 if id in self._pcache:
1339 if id in self._pcache:
1336 return self._pcache[id]
1340 return self._pcache[id]
1337
1341
1338 if len(id) <= 40:
1342 if len(id) <= 40:
1339 try:
1343 try:
1340 # hex(node)[:...]
1344 # hex(node)[:...]
1341 l = len(id) // 2 # grab an even number of digits
1345 l = len(id) // 2 # grab an even number of digits
1342 prefix = bin(id[: l * 2])
1346 prefix = bin(id[: l * 2])
1343 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1347 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1344 nl = [
1348 nl = [
1345 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1349 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1346 ]
1350 ]
1347 if nullhex.startswith(id):
1351 if nullhex.startswith(id):
1348 nl.append(nullid)
1352 nl.append(nullid)
1349 if len(nl) > 0:
1353 if len(nl) > 0:
1350 if len(nl) == 1 and not maybewdir:
1354 if len(nl) == 1 and not maybewdir:
1351 self._pcache[id] = nl[0]
1355 self._pcache[id] = nl[0]
1352 return nl[0]
1356 return nl[0]
1353 raise error.AmbiguousPrefixLookupError(
1357 raise error.AmbiguousPrefixLookupError(
1354 id, self.indexfile, _(b'ambiguous identifier')
1358 id, self.indexfile, _(b'ambiguous identifier')
1355 )
1359 )
1356 if maybewdir:
1360 if maybewdir:
1357 raise error.WdirUnsupported
1361 raise error.WdirUnsupported
1358 return None
1362 return None
1359 except TypeError:
1363 except TypeError:
1360 pass
1364 pass
1361
1365
1362 def lookup(self, id):
1366 def lookup(self, id):
1363 """locate a node based on:
1367 """locate a node based on:
1364 - revision number or str(revision number)
1368 - revision number or str(revision number)
1365 - nodeid or subset of hex nodeid
1369 - nodeid or subset of hex nodeid
1366 """
1370 """
1367 n = self._match(id)
1371 n = self._match(id)
1368 if n is not None:
1372 if n is not None:
1369 return n
1373 return n
1370 n = self._partialmatch(id)
1374 n = self._partialmatch(id)
1371 if n:
1375 if n:
1372 return n
1376 return n
1373
1377
1374 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1378 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1375
1379
1376 def shortest(self, node, minlength=1):
1380 def shortest(self, node, minlength=1):
1377 """Find the shortest unambiguous prefix that matches node."""
1381 """Find the shortest unambiguous prefix that matches node."""
1378
1382
1379 def isvalid(prefix):
1383 def isvalid(prefix):
1380 try:
1384 try:
1381 matchednode = self._partialmatch(prefix)
1385 matchednode = self._partialmatch(prefix)
1382 except error.AmbiguousPrefixLookupError:
1386 except error.AmbiguousPrefixLookupError:
1383 return False
1387 return False
1384 except error.WdirUnsupported:
1388 except error.WdirUnsupported:
1385 # single 'ff...' match
1389 # single 'ff...' match
1386 return True
1390 return True
1387 if matchednode is None:
1391 if matchednode is None:
1388 raise error.LookupError(node, self.indexfile, _(b'no node'))
1392 raise error.LookupError(node, self.indexfile, _(b'no node'))
1389 return True
1393 return True
1390
1394
1391 def maybewdir(prefix):
1395 def maybewdir(prefix):
1392 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1396 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1393
1397
1394 hexnode = hex(node)
1398 hexnode = hex(node)
1395
1399
1396 def disambiguate(hexnode, minlength):
1400 def disambiguate(hexnode, minlength):
1397 """Disambiguate against wdirid."""
1401 """Disambiguate against wdirid."""
1398 for length in range(minlength, 41):
1402 for length in range(minlength, 41):
1399 prefix = hexnode[:length]
1403 prefix = hexnode[:length]
1400 if not maybewdir(prefix):
1404 if not maybewdir(prefix):
1401 return prefix
1405 return prefix
1402
1406
1403 if not getattr(self, 'filteredrevs', None):
1407 if not getattr(self, 'filteredrevs', None):
1404 try:
1408 try:
1405 length = max(self.index.shortest(node), minlength)
1409 length = max(self.index.shortest(node), minlength)
1406 return disambiguate(hexnode, length)
1410 return disambiguate(hexnode, length)
1407 except error.RevlogError:
1411 except error.RevlogError:
1408 if node != wdirid:
1412 if node != wdirid:
1409 raise error.LookupError(node, self.indexfile, _(b'no node'))
1413 raise error.LookupError(node, self.indexfile, _(b'no node'))
1410 except AttributeError:
1414 except AttributeError:
1411 # Fall through to pure code
1415 # Fall through to pure code
1412 pass
1416 pass
1413
1417
1414 if node == wdirid:
1418 if node == wdirid:
1415 for length in range(minlength, 41):
1419 for length in range(minlength, 41):
1416 prefix = hexnode[:length]
1420 prefix = hexnode[:length]
1417 if isvalid(prefix):
1421 if isvalid(prefix):
1418 return prefix
1422 return prefix
1419
1423
1420 for length in range(minlength, 41):
1424 for length in range(minlength, 41):
1421 prefix = hexnode[:length]
1425 prefix = hexnode[:length]
1422 if isvalid(prefix):
1426 if isvalid(prefix):
1423 return disambiguate(hexnode, length)
1427 return disambiguate(hexnode, length)
1424
1428
1425 def cmp(self, node, text):
1429 def cmp(self, node, text):
1426 """compare text with a given file revision
1430 """compare text with a given file revision
1427
1431
1428 returns True if text is different than what is stored.
1432 returns True if text is different than what is stored.
1429 """
1433 """
1430 p1, p2 = self.parents(node)
1434 p1, p2 = self.parents(node)
1431 return storageutil.hashrevisionsha1(text, p1, p2) != node
1435 return storageutil.hashrevisionsha1(text, p1, p2) != node
1432
1436
1433 def _cachesegment(self, offset, data):
1437 def _cachesegment(self, offset, data):
1434 """Add a segment to the revlog cache.
1438 """Add a segment to the revlog cache.
1435
1439
1436 Accepts an absolute offset and the data that is at that location.
1440 Accepts an absolute offset and the data that is at that location.
1437 """
1441 """
1438 o, d = self._chunkcache
1442 o, d = self._chunkcache
1439 # try to add to existing cache
1443 # try to add to existing cache
1440 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1444 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1441 self._chunkcache = o, d + data
1445 self._chunkcache = o, d + data
1442 else:
1446 else:
1443 self._chunkcache = offset, data
1447 self._chunkcache = offset, data
1444
1448
1445 def _readsegment(self, offset, length, df=None):
1449 def _readsegment(self, offset, length, df=None):
1446 """Load a segment of raw data from the revlog.
1450 """Load a segment of raw data from the revlog.
1447
1451
1448 Accepts an absolute offset, length to read, and an optional existing
1452 Accepts an absolute offset, length to read, and an optional existing
1449 file handle to read from.
1453 file handle to read from.
1450
1454
1451 If an existing file handle is passed, it will be seeked and the
1455 If an existing file handle is passed, it will be seeked and the
1452 original seek position will NOT be restored.
1456 original seek position will NOT be restored.
1453
1457
1454 Returns a str or buffer of raw byte data.
1458 Returns a str or buffer of raw byte data.
1455
1459
1456 Raises if the requested number of bytes could not be read.
1460 Raises if the requested number of bytes could not be read.
1457 """
1461 """
1458 # Cache data both forward and backward around the requested
1462 # Cache data both forward and backward around the requested
1459 # data, in a fixed size window. This helps speed up operations
1463 # data, in a fixed size window. This helps speed up operations
1460 # involving reading the revlog backwards.
1464 # involving reading the revlog backwards.
1461 cachesize = self._chunkcachesize
1465 cachesize = self._chunkcachesize
1462 realoffset = offset & ~(cachesize - 1)
1466 realoffset = offset & ~(cachesize - 1)
1463 reallength = (
1467 reallength = (
1464 (offset + length + cachesize) & ~(cachesize - 1)
1468 (offset + length + cachesize) & ~(cachesize - 1)
1465 ) - realoffset
1469 ) - realoffset
1466 with self._datareadfp(df) as df:
1470 with self._datareadfp(df) as df:
1467 df.seek(realoffset)
1471 df.seek(realoffset)
1468 d = df.read(reallength)
1472 d = df.read(reallength)
1469
1473
1470 self._cachesegment(realoffset, d)
1474 self._cachesegment(realoffset, d)
1471 if offset != realoffset or reallength != length:
1475 if offset != realoffset or reallength != length:
1472 startoffset = offset - realoffset
1476 startoffset = offset - realoffset
1473 if len(d) - startoffset < length:
1477 if len(d) - startoffset < length:
1474 raise error.RevlogError(
1478 raise error.RevlogError(
1475 _(
1479 _(
1476 b'partial read of revlog %s; expected %d bytes from '
1480 b'partial read of revlog %s; expected %d bytes from '
1477 b'offset %d, got %d'
1481 b'offset %d, got %d'
1478 )
1482 )
1479 % (
1483 % (
1480 self.indexfile if self._inline else self.datafile,
1484 self.indexfile if self._inline else self.datafile,
1481 length,
1485 length,
1482 realoffset,
1486 realoffset,
1483 len(d) - startoffset,
1487 len(d) - startoffset,
1484 )
1488 )
1485 )
1489 )
1486
1490
1487 return util.buffer(d, startoffset, length)
1491 return util.buffer(d, startoffset, length)
1488
1492
1489 if len(d) < length:
1493 if len(d) < length:
1490 raise error.RevlogError(
1494 raise error.RevlogError(
1491 _(
1495 _(
1492 b'partial read of revlog %s; expected %d bytes from offset '
1496 b'partial read of revlog %s; expected %d bytes from offset '
1493 b'%d, got %d'
1497 b'%d, got %d'
1494 )
1498 )
1495 % (
1499 % (
1496 self.indexfile if self._inline else self.datafile,
1500 self.indexfile if self._inline else self.datafile,
1497 length,
1501 length,
1498 offset,
1502 offset,
1499 len(d),
1503 len(d),
1500 )
1504 )
1501 )
1505 )
1502
1506
1503 return d
1507 return d
1504
1508
1505 def _getsegment(self, offset, length, df=None):
1509 def _getsegment(self, offset, length, df=None):
1506 """Obtain a segment of raw data from the revlog.
1510 """Obtain a segment of raw data from the revlog.
1507
1511
1508 Accepts an absolute offset, length of bytes to obtain, and an
1512 Accepts an absolute offset, length of bytes to obtain, and an
1509 optional file handle to the already-opened revlog. If the file
1513 optional file handle to the already-opened revlog. If the file
1510 handle is used, it's original seek position will not be preserved.
1514 handle is used, it's original seek position will not be preserved.
1511
1515
1512 Requests for data may be returned from a cache.
1516 Requests for data may be returned from a cache.
1513
1517
1514 Returns a str or a buffer instance of raw byte data.
1518 Returns a str or a buffer instance of raw byte data.
1515 """
1519 """
1516 o, d = self._chunkcache
1520 o, d = self._chunkcache
1517 l = len(d)
1521 l = len(d)
1518
1522
1519 # is it in the cache?
1523 # is it in the cache?
1520 cachestart = offset - o
1524 cachestart = offset - o
1521 cacheend = cachestart + length
1525 cacheend = cachestart + length
1522 if cachestart >= 0 and cacheend <= l:
1526 if cachestart >= 0 and cacheend <= l:
1523 if cachestart == 0 and cacheend == l:
1527 if cachestart == 0 and cacheend == l:
1524 return d # avoid a copy
1528 return d # avoid a copy
1525 return util.buffer(d, cachestart, cacheend - cachestart)
1529 return util.buffer(d, cachestart, cacheend - cachestart)
1526
1530
1527 return self._readsegment(offset, length, df=df)
1531 return self._readsegment(offset, length, df=df)
1528
1532
1529 def _getsegmentforrevs(self, startrev, endrev, df=None):
1533 def _getsegmentforrevs(self, startrev, endrev, df=None):
1530 """Obtain a segment of raw data corresponding to a range of revisions.
1534 """Obtain a segment of raw data corresponding to a range of revisions.
1531
1535
1532 Accepts the start and end revisions and an optional already-open
1536 Accepts the start and end revisions and an optional already-open
1533 file handle to be used for reading. If the file handle is read, its
1537 file handle to be used for reading. If the file handle is read, its
1534 seek position will not be preserved.
1538 seek position will not be preserved.
1535
1539
1536 Requests for data may be satisfied by a cache.
1540 Requests for data may be satisfied by a cache.
1537
1541
1538 Returns a 2-tuple of (offset, data) for the requested range of
1542 Returns a 2-tuple of (offset, data) for the requested range of
1539 revisions. Offset is the integer offset from the beginning of the
1543 revisions. Offset is the integer offset from the beginning of the
1540 revlog and data is a str or buffer of the raw byte data.
1544 revlog and data is a str or buffer of the raw byte data.
1541
1545
1542 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1546 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1543 to determine where each revision's data begins and ends.
1547 to determine where each revision's data begins and ends.
1544 """
1548 """
1545 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1549 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1546 # (functions are expensive).
1550 # (functions are expensive).
1547 index = self.index
1551 index = self.index
1548 istart = index[startrev]
1552 istart = index[startrev]
1549 start = int(istart[0] >> 16)
1553 start = int(istart[0] >> 16)
1550 if startrev == endrev:
1554 if startrev == endrev:
1551 end = start + istart[1]
1555 end = start + istart[1]
1552 else:
1556 else:
1553 iend = index[endrev]
1557 iend = index[endrev]
1554 end = int(iend[0] >> 16) + iend[1]
1558 end = int(iend[0] >> 16) + iend[1]
1555
1559
1556 if self._inline:
1560 if self._inline:
1557 start += (startrev + 1) * self._io.size
1561 start += (startrev + 1) * self._io.size
1558 end += (endrev + 1) * self._io.size
1562 end += (endrev + 1) * self._io.size
1559 length = end - start
1563 length = end - start
1560
1564
1561 return start, self._getsegment(start, length, df=df)
1565 return start, self._getsegment(start, length, df=df)
1562
1566
1563 def _chunk(self, rev, df=None):
1567 def _chunk(self, rev, df=None):
1564 """Obtain a single decompressed chunk for a revision.
1568 """Obtain a single decompressed chunk for a revision.
1565
1569
1566 Accepts an integer revision and an optional already-open file handle
1570 Accepts an integer revision and an optional already-open file handle
1567 to be used for reading. If used, the seek position of the file will not
1571 to be used for reading. If used, the seek position of the file will not
1568 be preserved.
1572 be preserved.
1569
1573
1570 Returns a str holding uncompressed data for the requested revision.
1574 Returns a str holding uncompressed data for the requested revision.
1571 """
1575 """
1572 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1576 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1573
1577
1574 def _chunks(self, revs, df=None, targetsize=None):
1578 def _chunks(self, revs, df=None, targetsize=None):
1575 """Obtain decompressed chunks for the specified revisions.
1579 """Obtain decompressed chunks for the specified revisions.
1576
1580
1577 Accepts an iterable of numeric revisions that are assumed to be in
1581 Accepts an iterable of numeric revisions that are assumed to be in
1578 ascending order. Also accepts an optional already-open file handle
1582 ascending order. Also accepts an optional already-open file handle
1579 to be used for reading. If used, the seek position of the file will
1583 to be used for reading. If used, the seek position of the file will
1580 not be preserved.
1584 not be preserved.
1581
1585
1582 This function is similar to calling ``self._chunk()`` multiple times,
1586 This function is similar to calling ``self._chunk()`` multiple times,
1583 but is faster.
1587 but is faster.
1584
1588
1585 Returns a list with decompressed data for each requested revision.
1589 Returns a list with decompressed data for each requested revision.
1586 """
1590 """
1587 if not revs:
1591 if not revs:
1588 return []
1592 return []
1589 start = self.start
1593 start = self.start
1590 length = self.length
1594 length = self.length
1591 inline = self._inline
1595 inline = self._inline
1592 iosize = self._io.size
1596 iosize = self._io.size
1593 buffer = util.buffer
1597 buffer = util.buffer
1594
1598
1595 l = []
1599 l = []
1596 ladd = l.append
1600 ladd = l.append
1597
1601
1598 if not self._withsparseread:
1602 if not self._withsparseread:
1599 slicedchunks = (revs,)
1603 slicedchunks = (revs,)
1600 else:
1604 else:
1601 slicedchunks = deltautil.slicechunk(
1605 slicedchunks = deltautil.slicechunk(
1602 self, revs, targetsize=targetsize
1606 self, revs, targetsize=targetsize
1603 )
1607 )
1604
1608
1605 for revschunk in slicedchunks:
1609 for revschunk in slicedchunks:
1606 firstrev = revschunk[0]
1610 firstrev = revschunk[0]
1607 # Skip trailing revisions with empty diff
1611 # Skip trailing revisions with empty diff
1608 for lastrev in revschunk[::-1]:
1612 for lastrev in revschunk[::-1]:
1609 if length(lastrev) != 0:
1613 if length(lastrev) != 0:
1610 break
1614 break
1611
1615
1612 try:
1616 try:
1613 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1617 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1614 except OverflowError:
1618 except OverflowError:
1615 # issue4215 - we can't cache a run of chunks greater than
1619 # issue4215 - we can't cache a run of chunks greater than
1616 # 2G on Windows
1620 # 2G on Windows
1617 return [self._chunk(rev, df=df) for rev in revschunk]
1621 return [self._chunk(rev, df=df) for rev in revschunk]
1618
1622
1619 decomp = self.decompress
1623 decomp = self.decompress
1620 for rev in revschunk:
1624 for rev in revschunk:
1621 chunkstart = start(rev)
1625 chunkstart = start(rev)
1622 if inline:
1626 if inline:
1623 chunkstart += (rev + 1) * iosize
1627 chunkstart += (rev + 1) * iosize
1624 chunklength = length(rev)
1628 chunklength = length(rev)
1625 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1629 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1626
1630
1627 return l
1631 return l
1628
1632
1629 def _chunkclear(self):
1633 def _chunkclear(self):
1630 """Clear the raw chunk cache."""
1634 """Clear the raw chunk cache."""
1631 self._chunkcache = (0, b'')
1635 self._chunkcache = (0, b'')
1632
1636
1633 def deltaparent(self, rev):
1637 def deltaparent(self, rev):
1634 """return deltaparent of the given revision"""
1638 """return deltaparent of the given revision"""
1635 base = self.index[rev][3]
1639 base = self.index[rev][3]
1636 if base == rev:
1640 if base == rev:
1637 return nullrev
1641 return nullrev
1638 elif self._generaldelta:
1642 elif self._generaldelta:
1639 return base
1643 return base
1640 else:
1644 else:
1641 return rev - 1
1645 return rev - 1
1642
1646
1643 def issnapshot(self, rev):
1647 def issnapshot(self, rev):
1644 """tells whether rev is a snapshot
1648 """tells whether rev is a snapshot
1645 """
1649 """
1646 if not self._sparserevlog:
1650 if not self._sparserevlog:
1647 return self.deltaparent(rev) == nullrev
1651 return self.deltaparent(rev) == nullrev
1648 elif util.safehasattr(self.index, b'issnapshot'):
1652 elif util.safehasattr(self.index, b'issnapshot'):
1649 # directly assign the method to cache the testing and access
1653 # directly assign the method to cache the testing and access
1650 self.issnapshot = self.index.issnapshot
1654 self.issnapshot = self.index.issnapshot
1651 return self.issnapshot(rev)
1655 return self.issnapshot(rev)
1652 if rev == nullrev:
1656 if rev == nullrev:
1653 return True
1657 return True
1654 entry = self.index[rev]
1658 entry = self.index[rev]
1655 base = entry[3]
1659 base = entry[3]
1656 if base == rev:
1660 if base == rev:
1657 return True
1661 return True
1658 if base == nullrev:
1662 if base == nullrev:
1659 return True
1663 return True
1660 p1 = entry[5]
1664 p1 = entry[5]
1661 p2 = entry[6]
1665 p2 = entry[6]
1662 if base == p1 or base == p2:
1666 if base == p1 or base == p2:
1663 return False
1667 return False
1664 return self.issnapshot(base)
1668 return self.issnapshot(base)
1665
1669
1666 def snapshotdepth(self, rev):
1670 def snapshotdepth(self, rev):
1667 """number of snapshot in the chain before this one"""
1671 """number of snapshot in the chain before this one"""
1668 if not self.issnapshot(rev):
1672 if not self.issnapshot(rev):
1669 raise error.ProgrammingError(b'revision %d not a snapshot')
1673 raise error.ProgrammingError(b'revision %d not a snapshot')
1670 return len(self._deltachain(rev)[0]) - 1
1674 return len(self._deltachain(rev)[0]) - 1
1671
1675
1672 def revdiff(self, rev1, rev2):
1676 def revdiff(self, rev1, rev2):
1673 """return or calculate a delta between two revisions
1677 """return or calculate a delta between two revisions
1674
1678
1675 The delta calculated is in binary form and is intended to be written to
1679 The delta calculated is in binary form and is intended to be written to
1676 revlog data directly. So this function needs raw revision data.
1680 revlog data directly. So this function needs raw revision data.
1677 """
1681 """
1678 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1682 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1679 return bytes(self._chunk(rev2))
1683 return bytes(self._chunk(rev2))
1680
1684
1681 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1685 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1682
1686
1683 def _processflags(self, text, flags, operation, raw=False):
1687 def _processflags(self, text, flags, operation, raw=False):
1684 """deprecated entry point to access flag processors"""
1688 """deprecated entry point to access flag processors"""
1685 msg = b'_processflag(...) use the specialized variant'
1689 msg = b'_processflag(...) use the specialized variant'
1686 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1690 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1687 if raw:
1691 if raw:
1688 return text, flagutil.processflagsraw(self, text, flags)
1692 return text, flagutil.processflagsraw(self, text, flags)
1689 elif operation == b'read':
1693 elif operation == b'read':
1690 return flagutil.processflagsread(self, text, flags)
1694 return flagutil.processflagsread(self, text, flags)
1691 else: # write operation
1695 else: # write operation
1692 return flagutil.processflagswrite(self, text, flags)
1696 return flagutil.processflagswrite(self, text, flags)
1693
1697
1694 def revision(self, nodeorrev, _df=None, raw=False):
1698 def revision(self, nodeorrev, _df=None, raw=False):
1695 """return an uncompressed revision of a given node or revision
1699 """return an uncompressed revision of a given node or revision
1696 number.
1700 number.
1697
1701
1698 _df - an existing file handle to read from. (internal-only)
1702 _df - an existing file handle to read from. (internal-only)
1699 raw - an optional argument specifying if the revision data is to be
1703 raw - an optional argument specifying if the revision data is to be
1700 treated as raw data when applying flag transforms. 'raw' should be set
1704 treated as raw data when applying flag transforms. 'raw' should be set
1701 to True when generating changegroups or in debug commands.
1705 to True when generating changegroups or in debug commands.
1702 """
1706 """
1703 if raw:
1707 if raw:
1704 msg = (
1708 msg = (
1705 b'revlog.revision(..., raw=True) is deprecated, '
1709 b'revlog.revision(..., raw=True) is deprecated, '
1706 b'use revlog.rawdata(...)'
1710 b'use revlog.rawdata(...)'
1707 )
1711 )
1708 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1712 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1709 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1713 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1710
1714
1711 def sidedata(self, nodeorrev, _df=None):
1715 def sidedata(self, nodeorrev, _df=None):
1712 """a map of extra data related to the changeset but not part of the hash
1716 """a map of extra data related to the changeset but not part of the hash
1713
1717
1714 This function currently return a dictionary. However, more advanced
1718 This function currently return a dictionary. However, more advanced
1715 mapping object will likely be used in the future for a more
1719 mapping object will likely be used in the future for a more
1716 efficient/lazy code.
1720 efficient/lazy code.
1717 """
1721 """
1718 return self._revisiondata(nodeorrev, _df)[1]
1722 return self._revisiondata(nodeorrev, _df)[1]
1719
1723
1720 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1724 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1721 # deal with <nodeorrev> argument type
1725 # deal with <nodeorrev> argument type
1722 if isinstance(nodeorrev, int):
1726 if isinstance(nodeorrev, int):
1723 rev = nodeorrev
1727 rev = nodeorrev
1724 node = self.node(rev)
1728 node = self.node(rev)
1725 else:
1729 else:
1726 node = nodeorrev
1730 node = nodeorrev
1727 rev = None
1731 rev = None
1728
1732
1729 # fast path the special `nullid` rev
1733 # fast path the special `nullid` rev
1730 if node == nullid:
1734 if node == nullid:
1731 return b"", {}
1735 return b"", {}
1732
1736
1733 # The text as stored inside the revlog. Might be the revision or might
1737 # The text as stored inside the revlog. Might be the revision or might
1734 # need to be processed to retrieve the revision.
1738 # need to be processed to retrieve the revision.
1735 rawtext = None
1739 rawtext = None
1736
1740
1737 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1741 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1738
1742
1739 if raw and validated:
1743 if raw and validated:
1740 # if we don't want to process the raw text and that raw
1744 # if we don't want to process the raw text and that raw
1741 # text is cached, we can exit early.
1745 # text is cached, we can exit early.
1742 return rawtext, {}
1746 return rawtext, {}
1743 if rev is None:
1747 if rev is None:
1744 rev = self.rev(node)
1748 rev = self.rev(node)
1745 # the revlog's flag for this revision
1749 # the revlog's flag for this revision
1746 # (usually alter its state or content)
1750 # (usually alter its state or content)
1747 flags = self.flags(rev)
1751 flags = self.flags(rev)
1748
1752
1749 if validated and flags == REVIDX_DEFAULT_FLAGS:
1753 if validated and flags == REVIDX_DEFAULT_FLAGS:
1750 # no extra flags set, no flag processor runs, text = rawtext
1754 # no extra flags set, no flag processor runs, text = rawtext
1751 return rawtext, {}
1755 return rawtext, {}
1752
1756
1753 sidedata = {}
1757 sidedata = {}
1754 if raw:
1758 if raw:
1755 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1759 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1756 text = rawtext
1760 text = rawtext
1757 else:
1761 else:
1758 try:
1762 try:
1759 r = flagutil.processflagsread(self, rawtext, flags)
1763 r = flagutil.processflagsread(self, rawtext, flags)
1760 except error.SidedataHashError as exc:
1764 except error.SidedataHashError as exc:
1761 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1765 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1762 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1766 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1763 raise error.RevlogError(msg)
1767 raise error.RevlogError(msg)
1764 text, validatehash, sidedata = r
1768 text, validatehash, sidedata = r
1765 if validatehash:
1769 if validatehash:
1766 self.checkhash(text, node, rev=rev)
1770 self.checkhash(text, node, rev=rev)
1767 if not validated:
1771 if not validated:
1768 self._revisioncache = (node, rev, rawtext)
1772 self._revisioncache = (node, rev, rawtext)
1769
1773
1770 return text, sidedata
1774 return text, sidedata
1771
1775
1772 def _rawtext(self, node, rev, _df=None):
1776 def _rawtext(self, node, rev, _df=None):
1773 """return the possibly unvalidated rawtext for a revision
1777 """return the possibly unvalidated rawtext for a revision
1774
1778
1775 returns (rev, rawtext, validated)
1779 returns (rev, rawtext, validated)
1776 """
1780 """
1777
1781
1778 # revision in the cache (could be useful to apply delta)
1782 # revision in the cache (could be useful to apply delta)
1779 cachedrev = None
1783 cachedrev = None
1780 # An intermediate text to apply deltas to
1784 # An intermediate text to apply deltas to
1781 basetext = None
1785 basetext = None
1782
1786
1783 # Check if we have the entry in cache
1787 # Check if we have the entry in cache
1784 # The cache entry looks like (node, rev, rawtext)
1788 # The cache entry looks like (node, rev, rawtext)
1785 if self._revisioncache:
1789 if self._revisioncache:
1786 if self._revisioncache[0] == node:
1790 if self._revisioncache[0] == node:
1787 return (rev, self._revisioncache[2], True)
1791 return (rev, self._revisioncache[2], True)
1788 cachedrev = self._revisioncache[1]
1792 cachedrev = self._revisioncache[1]
1789
1793
1790 if rev is None:
1794 if rev is None:
1791 rev = self.rev(node)
1795 rev = self.rev(node)
1792
1796
1793 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1797 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1794 if stopped:
1798 if stopped:
1795 basetext = self._revisioncache[2]
1799 basetext = self._revisioncache[2]
1796
1800
1797 # drop cache to save memory, the caller is expected to
1801 # drop cache to save memory, the caller is expected to
1798 # update self._revisioncache after validating the text
1802 # update self._revisioncache after validating the text
1799 self._revisioncache = None
1803 self._revisioncache = None
1800
1804
1801 targetsize = None
1805 targetsize = None
1802 rawsize = self.index[rev][2]
1806 rawsize = self.index[rev][2]
1803 if 0 <= rawsize:
1807 if 0 <= rawsize:
1804 targetsize = 4 * rawsize
1808 targetsize = 4 * rawsize
1805
1809
1806 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1810 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1807 if basetext is None:
1811 if basetext is None:
1808 basetext = bytes(bins[0])
1812 basetext = bytes(bins[0])
1809 bins = bins[1:]
1813 bins = bins[1:]
1810
1814
1811 rawtext = mdiff.patches(basetext, bins)
1815 rawtext = mdiff.patches(basetext, bins)
1812 del basetext # let us have a chance to free memory early
1816 del basetext # let us have a chance to free memory early
1813 return (rev, rawtext, False)
1817 return (rev, rawtext, False)
1814
1818
1815 def rawdata(self, nodeorrev, _df=None):
1819 def rawdata(self, nodeorrev, _df=None):
1816 """return an uncompressed raw data of a given node or revision number.
1820 """return an uncompressed raw data of a given node or revision number.
1817
1821
1818 _df - an existing file handle to read from. (internal-only)
1822 _df - an existing file handle to read from. (internal-only)
1819 """
1823 """
1820 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1824 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1821
1825
1822 def hash(self, text, p1, p2):
1826 def hash(self, text, p1, p2):
1823 """Compute a node hash.
1827 """Compute a node hash.
1824
1828
1825 Available as a function so that subclasses can replace the hash
1829 Available as a function so that subclasses can replace the hash
1826 as needed.
1830 as needed.
1827 """
1831 """
1828 return storageutil.hashrevisionsha1(text, p1, p2)
1832 return storageutil.hashrevisionsha1(text, p1, p2)
1829
1833
1830 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1834 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1831 """Check node hash integrity.
1835 """Check node hash integrity.
1832
1836
1833 Available as a function so that subclasses can extend hash mismatch
1837 Available as a function so that subclasses can extend hash mismatch
1834 behaviors as needed.
1838 behaviors as needed.
1835 """
1839 """
1836 try:
1840 try:
1837 if p1 is None and p2 is None:
1841 if p1 is None and p2 is None:
1838 p1, p2 = self.parents(node)
1842 p1, p2 = self.parents(node)
1839 if node != self.hash(text, p1, p2):
1843 if node != self.hash(text, p1, p2):
1840 # Clear the revision cache on hash failure. The revision cache
1844 # Clear the revision cache on hash failure. The revision cache
1841 # only stores the raw revision and clearing the cache does have
1845 # only stores the raw revision and clearing the cache does have
1842 # the side-effect that we won't have a cache hit when the raw
1846 # the side-effect that we won't have a cache hit when the raw
1843 # revision data is accessed. But this case should be rare and
1847 # revision data is accessed. But this case should be rare and
1844 # it is extra work to teach the cache about the hash
1848 # it is extra work to teach the cache about the hash
1845 # verification state.
1849 # verification state.
1846 if self._revisioncache and self._revisioncache[0] == node:
1850 if self._revisioncache and self._revisioncache[0] == node:
1847 self._revisioncache = None
1851 self._revisioncache = None
1848
1852
1849 revornode = rev
1853 revornode = rev
1850 if revornode is None:
1854 if revornode is None:
1851 revornode = templatefilters.short(hex(node))
1855 revornode = templatefilters.short(hex(node))
1852 raise error.RevlogError(
1856 raise error.RevlogError(
1853 _(b"integrity check failed on %s:%s")
1857 _(b"integrity check failed on %s:%s")
1854 % (self.indexfile, pycompat.bytestr(revornode))
1858 % (self.indexfile, pycompat.bytestr(revornode))
1855 )
1859 )
1856 except error.RevlogError:
1860 except error.RevlogError:
1857 if self._censorable and storageutil.iscensoredtext(text):
1861 if self._censorable and storageutil.iscensoredtext(text):
1858 raise error.CensoredNodeError(self.indexfile, node, text)
1862 raise error.CensoredNodeError(self.indexfile, node, text)
1859 raise
1863 raise
1860
1864
1861 def _enforceinlinesize(self, tr, fp=None):
1865 def _enforceinlinesize(self, tr, fp=None):
1862 """Check if the revlog is too big for inline and convert if so.
1866 """Check if the revlog is too big for inline and convert if so.
1863
1867
1864 This should be called after revisions are added to the revlog. If the
1868 This should be called after revisions are added to the revlog. If the
1865 revlog has grown too large to be an inline revlog, it will convert it
1869 revlog has grown too large to be an inline revlog, it will convert it
1866 to use multiple index and data files.
1870 to use multiple index and data files.
1867 """
1871 """
1868 tiprev = len(self) - 1
1872 tiprev = len(self) - 1
1869 if (
1873 if (
1870 not self._inline
1874 not self._inline
1871 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1875 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1872 ):
1876 ):
1873 return
1877 return
1874
1878
1875 trinfo = tr.find(self.indexfile)
1879 trinfo = tr.find(self.indexfile)
1876 if trinfo is None:
1880 if trinfo is None:
1877 raise error.RevlogError(
1881 raise error.RevlogError(
1878 _(b"%s not found in the transaction") % self.indexfile
1882 _(b"%s not found in the transaction") % self.indexfile
1879 )
1883 )
1880
1884
1881 trindex = trinfo[2]
1885 trindex = trinfo[2]
1882 if trindex is not None:
1886 if trindex is not None:
1883 dataoff = self.start(trindex)
1887 dataoff = self.start(trindex)
1884 else:
1888 else:
1885 # revlog was stripped at start of transaction, use all leftover data
1889 # revlog was stripped at start of transaction, use all leftover data
1886 trindex = len(self) - 1
1890 trindex = len(self) - 1
1887 dataoff = self.end(tiprev)
1891 dataoff = self.end(tiprev)
1888
1892
1889 tr.add(self.datafile, dataoff)
1893 tr.add(self.datafile, dataoff)
1890
1894
1891 if fp:
1895 if fp:
1892 fp.flush()
1896 fp.flush()
1893 fp.close()
1897 fp.close()
1894 # We can't use the cached file handle after close(). So prevent
1898 # We can't use the cached file handle after close(). So prevent
1895 # its usage.
1899 # its usage.
1896 self._writinghandles = None
1900 self._writinghandles = None
1897
1901
1898 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1902 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1899 for r in self:
1903 for r in self:
1900 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1904 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1901
1905
1902 with self._indexfp(b'w') as fp:
1906 with self._indexfp(b'w') as fp:
1903 self.version &= ~FLAG_INLINE_DATA
1907 self.version &= ~FLAG_INLINE_DATA
1904 self._inline = False
1908 self._inline = False
1905 io = self._io
1909 io = self._io
1906 for i in self:
1910 for i in self:
1907 e = io.packentry(self.index[i], self.node, self.version, i)
1911 e = io.packentry(self.index[i], self.node, self.version, i)
1908 fp.write(e)
1912 fp.write(e)
1909
1913
1910 # the temp file replace the real index when we exit the context
1914 # the temp file replace the real index when we exit the context
1911 # manager
1915 # manager
1912
1916
1913 tr.replace(self.indexfile, trindex * self._io.size)
1917 tr.replace(self.indexfile, trindex * self._io.size)
1914 self._chunkclear()
1918 self._chunkclear()
1915
1919
1916 def _nodeduplicatecallback(self, transaction, node):
1920 def _nodeduplicatecallback(self, transaction, node):
1917 """called when trying to add a node already stored.
1921 """called when trying to add a node already stored.
1918 """
1922 """
1919
1923
1920 def addrevision(
1924 def addrevision(
1921 self,
1925 self,
1922 text,
1926 text,
1923 transaction,
1927 transaction,
1924 link,
1928 link,
1925 p1,
1929 p1,
1926 p2,
1930 p2,
1927 cachedelta=None,
1931 cachedelta=None,
1928 node=None,
1932 node=None,
1929 flags=REVIDX_DEFAULT_FLAGS,
1933 flags=REVIDX_DEFAULT_FLAGS,
1930 deltacomputer=None,
1934 deltacomputer=None,
1931 sidedata=None,
1935 sidedata=None,
1932 ):
1936 ):
1933 """add a revision to the log
1937 """add a revision to the log
1934
1938
1935 text - the revision data to add
1939 text - the revision data to add
1936 transaction - the transaction object used for rollback
1940 transaction - the transaction object used for rollback
1937 link - the linkrev data to add
1941 link - the linkrev data to add
1938 p1, p2 - the parent nodeids of the revision
1942 p1, p2 - the parent nodeids of the revision
1939 cachedelta - an optional precomputed delta
1943 cachedelta - an optional precomputed delta
1940 node - nodeid of revision; typically node is not specified, and it is
1944 node - nodeid of revision; typically node is not specified, and it is
1941 computed by default as hash(text, p1, p2), however subclasses might
1945 computed by default as hash(text, p1, p2), however subclasses might
1942 use different hashing method (and override checkhash() in such case)
1946 use different hashing method (and override checkhash() in such case)
1943 flags - the known flags to set on the revision
1947 flags - the known flags to set on the revision
1944 deltacomputer - an optional deltacomputer instance shared between
1948 deltacomputer - an optional deltacomputer instance shared between
1945 multiple calls
1949 multiple calls
1946 """
1950 """
1947 if link == nullrev:
1951 if link == nullrev:
1948 raise error.RevlogError(
1952 raise error.RevlogError(
1949 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1953 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1950 )
1954 )
1951
1955
1952 if sidedata is None:
1956 if sidedata is None:
1953 sidedata = {}
1957 sidedata = {}
1954 flags = flags & ~REVIDX_SIDEDATA
1958 flags = flags & ~REVIDX_SIDEDATA
1955 elif not self.hassidedata:
1959 elif not self.hassidedata:
1956 raise error.ProgrammingError(
1960 raise error.ProgrammingError(
1957 _(b"trying to add sidedata to a revlog who don't support them")
1961 _(b"trying to add sidedata to a revlog who don't support them")
1958 )
1962 )
1959 else:
1963 else:
1960 flags |= REVIDX_SIDEDATA
1964 flags |= REVIDX_SIDEDATA
1961
1965
1962 if flags:
1966 if flags:
1963 node = node or self.hash(text, p1, p2)
1967 node = node or self.hash(text, p1, p2)
1964
1968
1965 rawtext, validatehash = flagutil.processflagswrite(
1969 rawtext, validatehash = flagutil.processflagswrite(
1966 self, text, flags, sidedata=sidedata
1970 self, text, flags, sidedata=sidedata
1967 )
1971 )
1968
1972
1969 # If the flag processor modifies the revision data, ignore any provided
1973 # If the flag processor modifies the revision data, ignore any provided
1970 # cachedelta.
1974 # cachedelta.
1971 if rawtext != text:
1975 if rawtext != text:
1972 cachedelta = None
1976 cachedelta = None
1973
1977
1974 if len(rawtext) > _maxentrysize:
1978 if len(rawtext) > _maxentrysize:
1975 raise error.RevlogError(
1979 raise error.RevlogError(
1976 _(
1980 _(
1977 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1981 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1978 )
1982 )
1979 % (self.indexfile, len(rawtext))
1983 % (self.indexfile, len(rawtext))
1980 )
1984 )
1981
1985
1982 node = node or self.hash(rawtext, p1, p2)
1986 node = node or self.hash(rawtext, p1, p2)
1983 if node in self.nodemap:
1987 if node in self.nodemap:
1984 return node
1988 return node
1985
1989
1986 if validatehash:
1990 if validatehash:
1987 self.checkhash(rawtext, node, p1=p1, p2=p2)
1991 self.checkhash(rawtext, node, p1=p1, p2=p2)
1988
1992
1989 return self.addrawrevision(
1993 return self.addrawrevision(
1990 rawtext,
1994 rawtext,
1991 transaction,
1995 transaction,
1992 link,
1996 link,
1993 p1,
1997 p1,
1994 p2,
1998 p2,
1995 node,
1999 node,
1996 flags,
2000 flags,
1997 cachedelta=cachedelta,
2001 cachedelta=cachedelta,
1998 deltacomputer=deltacomputer,
2002 deltacomputer=deltacomputer,
1999 )
2003 )
2000
2004
2001 def addrawrevision(
2005 def addrawrevision(
2002 self,
2006 self,
2003 rawtext,
2007 rawtext,
2004 transaction,
2008 transaction,
2005 link,
2009 link,
2006 p1,
2010 p1,
2007 p2,
2011 p2,
2008 node,
2012 node,
2009 flags,
2013 flags,
2010 cachedelta=None,
2014 cachedelta=None,
2011 deltacomputer=None,
2015 deltacomputer=None,
2012 ):
2016 ):
2013 """add a raw revision with known flags, node and parents
2017 """add a raw revision with known flags, node and parents
2014 useful when reusing a revision not stored in this revlog (ex: received
2018 useful when reusing a revision not stored in this revlog (ex: received
2015 over wire, or read from an external bundle).
2019 over wire, or read from an external bundle).
2016 """
2020 """
2017 dfh = None
2021 dfh = None
2018 if not self._inline:
2022 if not self._inline:
2019 dfh = self._datafp(b"a+")
2023 dfh = self._datafp(b"a+")
2020 ifh = self._indexfp(b"a+")
2024 ifh = self._indexfp(b"a+")
2021 try:
2025 try:
2022 return self._addrevision(
2026 return self._addrevision(
2023 node,
2027 node,
2024 rawtext,
2028 rawtext,
2025 transaction,
2029 transaction,
2026 link,
2030 link,
2027 p1,
2031 p1,
2028 p2,
2032 p2,
2029 flags,
2033 flags,
2030 cachedelta,
2034 cachedelta,
2031 ifh,
2035 ifh,
2032 dfh,
2036 dfh,
2033 deltacomputer=deltacomputer,
2037 deltacomputer=deltacomputer,
2034 )
2038 )
2035 finally:
2039 finally:
2036 if dfh:
2040 if dfh:
2037 dfh.close()
2041 dfh.close()
2038 ifh.close()
2042 ifh.close()
2039
2043
2040 def compress(self, data):
2044 def compress(self, data):
2041 """Generate a possibly-compressed representation of data."""
2045 """Generate a possibly-compressed representation of data."""
2042 if not data:
2046 if not data:
2043 return b'', data
2047 return b'', data
2044
2048
2045 compressed = self._compressor.compress(data)
2049 compressed = self._compressor.compress(data)
2046
2050
2047 if compressed:
2051 if compressed:
2048 # The revlog compressor added the header in the returned data.
2052 # The revlog compressor added the header in the returned data.
2049 return b'', compressed
2053 return b'', compressed
2050
2054
2051 if data[0:1] == b'\0':
2055 if data[0:1] == b'\0':
2052 return b'', data
2056 return b'', data
2053 return b'u', data
2057 return b'u', data
2054
2058
2055 def decompress(self, data):
2059 def decompress(self, data):
2056 """Decompress a revlog chunk.
2060 """Decompress a revlog chunk.
2057
2061
2058 The chunk is expected to begin with a header identifying the
2062 The chunk is expected to begin with a header identifying the
2059 format type so it can be routed to an appropriate decompressor.
2063 format type so it can be routed to an appropriate decompressor.
2060 """
2064 """
2061 if not data:
2065 if not data:
2062 return data
2066 return data
2063
2067
2064 # Revlogs are read much more frequently than they are written and many
2068 # Revlogs are read much more frequently than they are written and many
2065 # chunks only take microseconds to decompress, so performance is
2069 # chunks only take microseconds to decompress, so performance is
2066 # important here.
2070 # important here.
2067 #
2071 #
2068 # We can make a few assumptions about revlogs:
2072 # We can make a few assumptions about revlogs:
2069 #
2073 #
2070 # 1) the majority of chunks will be compressed (as opposed to inline
2074 # 1) the majority of chunks will be compressed (as opposed to inline
2071 # raw data).
2075 # raw data).
2072 # 2) decompressing *any* data will likely by at least 10x slower than
2076 # 2) decompressing *any* data will likely by at least 10x slower than
2073 # returning raw inline data.
2077 # returning raw inline data.
2074 # 3) we want to prioritize common and officially supported compression
2078 # 3) we want to prioritize common and officially supported compression
2075 # engines
2079 # engines
2076 #
2080 #
2077 # It follows that we want to optimize for "decompress compressed data
2081 # It follows that we want to optimize for "decompress compressed data
2078 # when encoded with common and officially supported compression engines"
2082 # when encoded with common and officially supported compression engines"
2079 # case over "raw data" and "data encoded by less common or non-official
2083 # case over "raw data" and "data encoded by less common or non-official
2080 # compression engines." That is why we have the inline lookup first
2084 # compression engines." That is why we have the inline lookup first
2081 # followed by the compengines lookup.
2085 # followed by the compengines lookup.
2082 #
2086 #
2083 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2087 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2084 # compressed chunks. And this matters for changelog and manifest reads.
2088 # compressed chunks. And this matters for changelog and manifest reads.
2085 t = data[0:1]
2089 t = data[0:1]
2086
2090
2087 if t == b'x':
2091 if t == b'x':
2088 try:
2092 try:
2089 return _zlibdecompress(data)
2093 return _zlibdecompress(data)
2090 except zlib.error as e:
2094 except zlib.error as e:
2091 raise error.RevlogError(
2095 raise error.RevlogError(
2092 _(b'revlog decompress error: %s')
2096 _(b'revlog decompress error: %s')
2093 % stringutil.forcebytestr(e)
2097 % stringutil.forcebytestr(e)
2094 )
2098 )
2095 # '\0' is more common than 'u' so it goes first.
2099 # '\0' is more common than 'u' so it goes first.
2096 elif t == b'\0':
2100 elif t == b'\0':
2097 return data
2101 return data
2098 elif t == b'u':
2102 elif t == b'u':
2099 return util.buffer(data, 1)
2103 return util.buffer(data, 1)
2100
2104
2101 try:
2105 try:
2102 compressor = self._decompressors[t]
2106 compressor = self._decompressors[t]
2103 except KeyError:
2107 except KeyError:
2104 try:
2108 try:
2105 engine = util.compengines.forrevlogheader(t)
2109 engine = util.compengines.forrevlogheader(t)
2106 compressor = engine.revlogcompressor(self._compengineopts)
2110 compressor = engine.revlogcompressor(self._compengineopts)
2107 self._decompressors[t] = compressor
2111 self._decompressors[t] = compressor
2108 except KeyError:
2112 except KeyError:
2109 raise error.RevlogError(_(b'unknown compression type %r') % t)
2113 raise error.RevlogError(_(b'unknown compression type %r') % t)
2110
2114
2111 return compressor.decompress(data)
2115 return compressor.decompress(data)
2112
2116
2113 def _addrevision(
2117 def _addrevision(
2114 self,
2118 self,
2115 node,
2119 node,
2116 rawtext,
2120 rawtext,
2117 transaction,
2121 transaction,
2118 link,
2122 link,
2119 p1,
2123 p1,
2120 p2,
2124 p2,
2121 flags,
2125 flags,
2122 cachedelta,
2126 cachedelta,
2123 ifh,
2127 ifh,
2124 dfh,
2128 dfh,
2125 alwayscache=False,
2129 alwayscache=False,
2126 deltacomputer=None,
2130 deltacomputer=None,
2127 ):
2131 ):
2128 """internal function to add revisions to the log
2132 """internal function to add revisions to the log
2129
2133
2130 see addrevision for argument descriptions.
2134 see addrevision for argument descriptions.
2131
2135
2132 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2136 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2133
2137
2134 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2138 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2135 be used.
2139 be used.
2136
2140
2137 invariants:
2141 invariants:
2138 - rawtext is optional (can be None); if not set, cachedelta must be set.
2142 - rawtext is optional (can be None); if not set, cachedelta must be set.
2139 if both are set, they must correspond to each other.
2143 if both are set, they must correspond to each other.
2140 """
2144 """
2141 if node == nullid:
2145 if node == nullid:
2142 raise error.RevlogError(
2146 raise error.RevlogError(
2143 _(b"%s: attempt to add null revision") % self.indexfile
2147 _(b"%s: attempt to add null revision") % self.indexfile
2144 )
2148 )
2145 if node == wdirid or node in wdirfilenodeids:
2149 if node == wdirid or node in wdirfilenodeids:
2146 raise error.RevlogError(
2150 raise error.RevlogError(
2147 _(b"%s: attempt to add wdir revision") % self.indexfile
2151 _(b"%s: attempt to add wdir revision") % self.indexfile
2148 )
2152 )
2149
2153
2150 if self._inline:
2154 if self._inline:
2151 fh = ifh
2155 fh = ifh
2152 else:
2156 else:
2153 fh = dfh
2157 fh = dfh
2154
2158
2155 btext = [rawtext]
2159 btext = [rawtext]
2156
2160
2157 curr = len(self)
2161 curr = len(self)
2158 prev = curr - 1
2162 prev = curr - 1
2159 offset = self.end(prev)
2163 offset = self.end(prev)
2160 p1r, p2r = self.rev(p1), self.rev(p2)
2164 p1r, p2r = self.rev(p1), self.rev(p2)
2161
2165
2162 # full versions are inserted when the needed deltas
2166 # full versions are inserted when the needed deltas
2163 # become comparable to the uncompressed text
2167 # become comparable to the uncompressed text
2164 if rawtext is None:
2168 if rawtext is None:
2165 # need rawtext size, before changed by flag processors, which is
2169 # need rawtext size, before changed by flag processors, which is
2166 # the non-raw size. use revlog explicitly to avoid filelog's extra
2170 # the non-raw size. use revlog explicitly to avoid filelog's extra
2167 # logic that might remove metadata size.
2171 # logic that might remove metadata size.
2168 textlen = mdiff.patchedsize(
2172 textlen = mdiff.patchedsize(
2169 revlog.size(self, cachedelta[0]), cachedelta[1]
2173 revlog.size(self, cachedelta[0]), cachedelta[1]
2170 )
2174 )
2171 else:
2175 else:
2172 textlen = len(rawtext)
2176 textlen = len(rawtext)
2173
2177
2174 if deltacomputer is None:
2178 if deltacomputer is None:
2175 deltacomputer = deltautil.deltacomputer(self)
2179 deltacomputer = deltautil.deltacomputer(self)
2176
2180
2177 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2181 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2178
2182
2179 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2183 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2180
2184
2181 e = (
2185 e = (
2182 offset_type(offset, flags),
2186 offset_type(offset, flags),
2183 deltainfo.deltalen,
2187 deltainfo.deltalen,
2184 textlen,
2188 textlen,
2185 deltainfo.base,
2189 deltainfo.base,
2186 link,
2190 link,
2187 p1r,
2191 p1r,
2188 p2r,
2192 p2r,
2189 node,
2193 node,
2190 )
2194 )
2191 self.index.append(e)
2195 self.index.append(e)
2192 self.nodemap[node] = curr
2193
2196
2194 # Reset the pure node cache start lookup offset to account for new
2197 # Reset the pure node cache start lookup offset to account for new
2195 # revision.
2198 # revision.
2196 if self._nodepos is not None:
2199 if self._nodepos is not None:
2197 self._nodepos = curr
2200 self._nodepos = curr
2198
2201
2199 entry = self._io.packentry(e, self.node, self.version, curr)
2202 entry = self._io.packentry(e, self.node, self.version, curr)
2200 self._writeentry(
2203 self._writeentry(
2201 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2204 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2202 )
2205 )
2203
2206
2204 rawtext = btext[0]
2207 rawtext = btext[0]
2205
2208
2206 if alwayscache and rawtext is None:
2209 if alwayscache and rawtext is None:
2207 rawtext = deltacomputer.buildtext(revinfo, fh)
2210 rawtext = deltacomputer.buildtext(revinfo, fh)
2208
2211
2209 if type(rawtext) == bytes: # only accept immutable objects
2212 if type(rawtext) == bytes: # only accept immutable objects
2210 self._revisioncache = (node, curr, rawtext)
2213 self._revisioncache = (node, curr, rawtext)
2211 self._chainbasecache[curr] = deltainfo.chainbase
2214 self._chainbasecache[curr] = deltainfo.chainbase
2212 return node
2215 return node
2213
2216
2214 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2217 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2215 # Files opened in a+ mode have inconsistent behavior on various
2218 # Files opened in a+ mode have inconsistent behavior on various
2216 # platforms. Windows requires that a file positioning call be made
2219 # platforms. Windows requires that a file positioning call be made
2217 # when the file handle transitions between reads and writes. See
2220 # when the file handle transitions between reads and writes. See
2218 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2221 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2219 # platforms, Python or the platform itself can be buggy. Some versions
2222 # platforms, Python or the platform itself can be buggy. Some versions
2220 # of Solaris have been observed to not append at the end of the file
2223 # of Solaris have been observed to not append at the end of the file
2221 # if the file was seeked to before the end. See issue4943 for more.
2224 # if the file was seeked to before the end. See issue4943 for more.
2222 #
2225 #
2223 # We work around this issue by inserting a seek() before writing.
2226 # We work around this issue by inserting a seek() before writing.
2224 # Note: This is likely not necessary on Python 3. However, because
2227 # Note: This is likely not necessary on Python 3. However, because
2225 # the file handle is reused for reads and may be seeked there, we need
2228 # the file handle is reused for reads and may be seeked there, we need
2226 # to be careful before changing this.
2229 # to be careful before changing this.
2227 ifh.seek(0, os.SEEK_END)
2230 ifh.seek(0, os.SEEK_END)
2228 if dfh:
2231 if dfh:
2229 dfh.seek(0, os.SEEK_END)
2232 dfh.seek(0, os.SEEK_END)
2230
2233
2231 curr = len(self) - 1
2234 curr = len(self) - 1
2232 if not self._inline:
2235 if not self._inline:
2233 transaction.add(self.datafile, offset)
2236 transaction.add(self.datafile, offset)
2234 transaction.add(self.indexfile, curr * len(entry))
2237 transaction.add(self.indexfile, curr * len(entry))
2235 if data[0]:
2238 if data[0]:
2236 dfh.write(data[0])
2239 dfh.write(data[0])
2237 dfh.write(data[1])
2240 dfh.write(data[1])
2238 ifh.write(entry)
2241 ifh.write(entry)
2239 else:
2242 else:
2240 offset += curr * self._io.size
2243 offset += curr * self._io.size
2241 transaction.add(self.indexfile, offset, curr)
2244 transaction.add(self.indexfile, offset, curr)
2242 ifh.write(entry)
2245 ifh.write(entry)
2243 ifh.write(data[0])
2246 ifh.write(data[0])
2244 ifh.write(data[1])
2247 ifh.write(data[1])
2245 self._enforceinlinesize(transaction, ifh)
2248 self._enforceinlinesize(transaction, ifh)
2246
2249
2247 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2250 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2248 """
2251 """
2249 add a delta group
2252 add a delta group
2250
2253
2251 given a set of deltas, add them to the revision log. the
2254 given a set of deltas, add them to the revision log. the
2252 first delta is against its parent, which should be in our
2255 first delta is against its parent, which should be in our
2253 log, the rest are against the previous delta.
2256 log, the rest are against the previous delta.
2254
2257
2255 If ``addrevisioncb`` is defined, it will be called with arguments of
2258 If ``addrevisioncb`` is defined, it will be called with arguments of
2256 this revlog and the node that was added.
2259 this revlog and the node that was added.
2257 """
2260 """
2258
2261
2259 if self._writinghandles:
2262 if self._writinghandles:
2260 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2263 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2261
2264
2262 nodes = []
2265 nodes = []
2263
2266
2264 r = len(self)
2267 r = len(self)
2265 end = 0
2268 end = 0
2266 if r:
2269 if r:
2267 end = self.end(r - 1)
2270 end = self.end(r - 1)
2268 ifh = self._indexfp(b"a+")
2271 ifh = self._indexfp(b"a+")
2269 isize = r * self._io.size
2272 isize = r * self._io.size
2270 if self._inline:
2273 if self._inline:
2271 transaction.add(self.indexfile, end + isize, r)
2274 transaction.add(self.indexfile, end + isize, r)
2272 dfh = None
2275 dfh = None
2273 else:
2276 else:
2274 transaction.add(self.indexfile, isize, r)
2277 transaction.add(self.indexfile, isize, r)
2275 transaction.add(self.datafile, end)
2278 transaction.add(self.datafile, end)
2276 dfh = self._datafp(b"a+")
2279 dfh = self._datafp(b"a+")
2277
2280
2278 def flush():
2281 def flush():
2279 if dfh:
2282 if dfh:
2280 dfh.flush()
2283 dfh.flush()
2281 ifh.flush()
2284 ifh.flush()
2282
2285
2283 self._writinghandles = (ifh, dfh)
2286 self._writinghandles = (ifh, dfh)
2284
2287
2285 try:
2288 try:
2286 deltacomputer = deltautil.deltacomputer(self)
2289 deltacomputer = deltautil.deltacomputer(self)
2287 # loop through our set of deltas
2290 # loop through our set of deltas
2288 for data in deltas:
2291 for data in deltas:
2289 node, p1, p2, linknode, deltabase, delta, flags = data
2292 node, p1, p2, linknode, deltabase, delta, flags = data
2290 link = linkmapper(linknode)
2293 link = linkmapper(linknode)
2291 flags = flags or REVIDX_DEFAULT_FLAGS
2294 flags = flags or REVIDX_DEFAULT_FLAGS
2292
2295
2293 nodes.append(node)
2296 nodes.append(node)
2294
2297
2295 if node in self.nodemap:
2298 if node in self.nodemap:
2296 self._nodeduplicatecallback(transaction, node)
2299 self._nodeduplicatecallback(transaction, node)
2297 # this can happen if two branches make the same change
2300 # this can happen if two branches make the same change
2298 continue
2301 continue
2299
2302
2300 for p in (p1, p2):
2303 for p in (p1, p2):
2301 if p not in self.nodemap:
2304 if p not in self.nodemap:
2302 raise error.LookupError(
2305 raise error.LookupError(
2303 p, self.indexfile, _(b'unknown parent')
2306 p, self.indexfile, _(b'unknown parent')
2304 )
2307 )
2305
2308
2306 if deltabase not in self.nodemap:
2309 if deltabase not in self.nodemap:
2307 raise error.LookupError(
2310 raise error.LookupError(
2308 deltabase, self.indexfile, _(b'unknown delta base')
2311 deltabase, self.indexfile, _(b'unknown delta base')
2309 )
2312 )
2310
2313
2311 baserev = self.rev(deltabase)
2314 baserev = self.rev(deltabase)
2312
2315
2313 if baserev != nullrev and self.iscensored(baserev):
2316 if baserev != nullrev and self.iscensored(baserev):
2314 # if base is censored, delta must be full replacement in a
2317 # if base is censored, delta must be full replacement in a
2315 # single patch operation
2318 # single patch operation
2316 hlen = struct.calcsize(b">lll")
2319 hlen = struct.calcsize(b">lll")
2317 oldlen = self.rawsize(baserev)
2320 oldlen = self.rawsize(baserev)
2318 newlen = len(delta) - hlen
2321 newlen = len(delta) - hlen
2319 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2322 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2320 raise error.CensoredBaseError(
2323 raise error.CensoredBaseError(
2321 self.indexfile, self.node(baserev)
2324 self.indexfile, self.node(baserev)
2322 )
2325 )
2323
2326
2324 if not flags and self._peek_iscensored(baserev, delta, flush):
2327 if not flags and self._peek_iscensored(baserev, delta, flush):
2325 flags |= REVIDX_ISCENSORED
2328 flags |= REVIDX_ISCENSORED
2326
2329
2327 # We assume consumers of addrevisioncb will want to retrieve
2330 # We assume consumers of addrevisioncb will want to retrieve
2328 # the added revision, which will require a call to
2331 # the added revision, which will require a call to
2329 # revision(). revision() will fast path if there is a cache
2332 # revision(). revision() will fast path if there is a cache
2330 # hit. So, we tell _addrevision() to always cache in this case.
2333 # hit. So, we tell _addrevision() to always cache in this case.
2331 # We're only using addgroup() in the context of changegroup
2334 # We're only using addgroup() in the context of changegroup
2332 # generation so the revision data can always be handled as raw
2335 # generation so the revision data can always be handled as raw
2333 # by the flagprocessor.
2336 # by the flagprocessor.
2334 self._addrevision(
2337 self._addrevision(
2335 node,
2338 node,
2336 None,
2339 None,
2337 transaction,
2340 transaction,
2338 link,
2341 link,
2339 p1,
2342 p1,
2340 p2,
2343 p2,
2341 flags,
2344 flags,
2342 (baserev, delta),
2345 (baserev, delta),
2343 ifh,
2346 ifh,
2344 dfh,
2347 dfh,
2345 alwayscache=bool(addrevisioncb),
2348 alwayscache=bool(addrevisioncb),
2346 deltacomputer=deltacomputer,
2349 deltacomputer=deltacomputer,
2347 )
2350 )
2348
2351
2349 if addrevisioncb:
2352 if addrevisioncb:
2350 addrevisioncb(self, node)
2353 addrevisioncb(self, node)
2351
2354
2352 if not dfh and not self._inline:
2355 if not dfh and not self._inline:
2353 # addrevision switched from inline to conventional
2356 # addrevision switched from inline to conventional
2354 # reopen the index
2357 # reopen the index
2355 ifh.close()
2358 ifh.close()
2356 dfh = self._datafp(b"a+")
2359 dfh = self._datafp(b"a+")
2357 ifh = self._indexfp(b"a+")
2360 ifh = self._indexfp(b"a+")
2358 self._writinghandles = (ifh, dfh)
2361 self._writinghandles = (ifh, dfh)
2359 finally:
2362 finally:
2360 self._writinghandles = None
2363 self._writinghandles = None
2361
2364
2362 if dfh:
2365 if dfh:
2363 dfh.close()
2366 dfh.close()
2364 ifh.close()
2367 ifh.close()
2365
2368
2366 return nodes
2369 return nodes
2367
2370
2368 def iscensored(self, rev):
2371 def iscensored(self, rev):
2369 """Check if a file revision is censored."""
2372 """Check if a file revision is censored."""
2370 if not self._censorable:
2373 if not self._censorable:
2371 return False
2374 return False
2372
2375
2373 return self.flags(rev) & REVIDX_ISCENSORED
2376 return self.flags(rev) & REVIDX_ISCENSORED
2374
2377
2375 def _peek_iscensored(self, baserev, delta, flush):
2378 def _peek_iscensored(self, baserev, delta, flush):
2376 """Quickly check if a delta produces a censored revision."""
2379 """Quickly check if a delta produces a censored revision."""
2377 if not self._censorable:
2380 if not self._censorable:
2378 return False
2381 return False
2379
2382
2380 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2383 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2381
2384
2382 def getstrippoint(self, minlink):
2385 def getstrippoint(self, minlink):
2383 """find the minimum rev that must be stripped to strip the linkrev
2386 """find the minimum rev that must be stripped to strip the linkrev
2384
2387
2385 Returns a tuple containing the minimum rev and a set of all revs that
2388 Returns a tuple containing the minimum rev and a set of all revs that
2386 have linkrevs that will be broken by this strip.
2389 have linkrevs that will be broken by this strip.
2387 """
2390 """
2388 return storageutil.resolvestripinfo(
2391 return storageutil.resolvestripinfo(
2389 minlink,
2392 minlink,
2390 len(self) - 1,
2393 len(self) - 1,
2391 self.headrevs(),
2394 self.headrevs(),
2392 self.linkrev,
2395 self.linkrev,
2393 self.parentrevs,
2396 self.parentrevs,
2394 )
2397 )
2395
2398
2396 def strip(self, minlink, transaction):
2399 def strip(self, minlink, transaction):
2397 """truncate the revlog on the first revision with a linkrev >= minlink
2400 """truncate the revlog on the first revision with a linkrev >= minlink
2398
2401
2399 This function is called when we're stripping revision minlink and
2402 This function is called when we're stripping revision minlink and
2400 its descendants from the repository.
2403 its descendants from the repository.
2401
2404
2402 We have to remove all revisions with linkrev >= minlink, because
2405 We have to remove all revisions with linkrev >= minlink, because
2403 the equivalent changelog revisions will be renumbered after the
2406 the equivalent changelog revisions will be renumbered after the
2404 strip.
2407 strip.
2405
2408
2406 So we truncate the revlog on the first of these revisions, and
2409 So we truncate the revlog on the first of these revisions, and
2407 trust that the caller has saved the revisions that shouldn't be
2410 trust that the caller has saved the revisions that shouldn't be
2408 removed and that it'll re-add them after this truncation.
2411 removed and that it'll re-add them after this truncation.
2409 """
2412 """
2410 if len(self) == 0:
2413 if len(self) == 0:
2411 return
2414 return
2412
2415
2413 rev, _ = self.getstrippoint(minlink)
2416 rev, _ = self.getstrippoint(minlink)
2414 if rev == len(self):
2417 if rev == len(self):
2415 return
2418 return
2416
2419
2417 # first truncate the files on disk
2420 # first truncate the files on disk
2418 end = self.start(rev)
2421 end = self.start(rev)
2419 if not self._inline:
2422 if not self._inline:
2420 transaction.add(self.datafile, end)
2423 transaction.add(self.datafile, end)
2421 end = rev * self._io.size
2424 end = rev * self._io.size
2422 else:
2425 else:
2423 end += rev * self._io.size
2426 end += rev * self._io.size
2424
2427
2425 transaction.add(self.indexfile, end)
2428 transaction.add(self.indexfile, end)
2426
2429
2427 # then reset internal state in memory to forget those revisions
2430 # then reset internal state in memory to forget those revisions
2428 self._revisioncache = None
2431 self._revisioncache = None
2429 self._chaininfocache = {}
2432 self._chaininfocache = {}
2430 self._chunkclear()
2433 self._chunkclear()
2431 for x in pycompat.xrange(rev, len(self)):
2434 for x in pycompat.xrange(rev, len(self)):
2432 del self.nodemap[self.node(x)]
2435 del self.nodemap[self.node(x)]
2433
2436
2434 del self.index[rev:-1]
2437 del self.index[rev:-1]
2435 self._nodepos = None
2438 self._nodepos = None
2436
2439
2437 def checksize(self):
2440 def checksize(self):
2438 """Check size of index and data files
2441 """Check size of index and data files
2439
2442
2440 return a (dd, di) tuple.
2443 return a (dd, di) tuple.
2441 - dd: extra bytes for the "data" file
2444 - dd: extra bytes for the "data" file
2442 - di: extra bytes for the "index" file
2445 - di: extra bytes for the "index" file
2443
2446
2444 A healthy revlog will return (0, 0).
2447 A healthy revlog will return (0, 0).
2445 """
2448 """
2446 expected = 0
2449 expected = 0
2447 if len(self):
2450 if len(self):
2448 expected = max(0, self.end(len(self) - 1))
2451 expected = max(0, self.end(len(self) - 1))
2449
2452
2450 try:
2453 try:
2451 with self._datafp() as f:
2454 with self._datafp() as f:
2452 f.seek(0, io.SEEK_END)
2455 f.seek(0, io.SEEK_END)
2453 actual = f.tell()
2456 actual = f.tell()
2454 dd = actual - expected
2457 dd = actual - expected
2455 except IOError as inst:
2458 except IOError as inst:
2456 if inst.errno != errno.ENOENT:
2459 if inst.errno != errno.ENOENT:
2457 raise
2460 raise
2458 dd = 0
2461 dd = 0
2459
2462
2460 try:
2463 try:
2461 f = self.opener(self.indexfile)
2464 f = self.opener(self.indexfile)
2462 f.seek(0, io.SEEK_END)
2465 f.seek(0, io.SEEK_END)
2463 actual = f.tell()
2466 actual = f.tell()
2464 f.close()
2467 f.close()
2465 s = self._io.size
2468 s = self._io.size
2466 i = max(0, actual // s)
2469 i = max(0, actual // s)
2467 di = actual - (i * s)
2470 di = actual - (i * s)
2468 if self._inline:
2471 if self._inline:
2469 databytes = 0
2472 databytes = 0
2470 for r in self:
2473 for r in self:
2471 databytes += max(0, self.length(r))
2474 databytes += max(0, self.length(r))
2472 dd = 0
2475 dd = 0
2473 di = actual - len(self) * s - databytes
2476 di = actual - len(self) * s - databytes
2474 except IOError as inst:
2477 except IOError as inst:
2475 if inst.errno != errno.ENOENT:
2478 if inst.errno != errno.ENOENT:
2476 raise
2479 raise
2477 di = 0
2480 di = 0
2478
2481
2479 return (dd, di)
2482 return (dd, di)
2480
2483
2481 def files(self):
2484 def files(self):
2482 res = [self.indexfile]
2485 res = [self.indexfile]
2483 if not self._inline:
2486 if not self._inline:
2484 res.append(self.datafile)
2487 res.append(self.datafile)
2485 return res
2488 return res
2486
2489
2487 def emitrevisions(
2490 def emitrevisions(
2488 self,
2491 self,
2489 nodes,
2492 nodes,
2490 nodesorder=None,
2493 nodesorder=None,
2491 revisiondata=False,
2494 revisiondata=False,
2492 assumehaveparentrevisions=False,
2495 assumehaveparentrevisions=False,
2493 deltamode=repository.CG_DELTAMODE_STD,
2496 deltamode=repository.CG_DELTAMODE_STD,
2494 ):
2497 ):
2495 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2498 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2496 raise error.ProgrammingError(
2499 raise error.ProgrammingError(
2497 b'unhandled value for nodesorder: %s' % nodesorder
2500 b'unhandled value for nodesorder: %s' % nodesorder
2498 )
2501 )
2499
2502
2500 if nodesorder is None and not self._generaldelta:
2503 if nodesorder is None and not self._generaldelta:
2501 nodesorder = b'storage'
2504 nodesorder = b'storage'
2502
2505
2503 if (
2506 if (
2504 not self._storedeltachains
2507 not self._storedeltachains
2505 and deltamode != repository.CG_DELTAMODE_PREV
2508 and deltamode != repository.CG_DELTAMODE_PREV
2506 ):
2509 ):
2507 deltamode = repository.CG_DELTAMODE_FULL
2510 deltamode = repository.CG_DELTAMODE_FULL
2508
2511
2509 return storageutil.emitrevisions(
2512 return storageutil.emitrevisions(
2510 self,
2513 self,
2511 nodes,
2514 nodes,
2512 nodesorder,
2515 nodesorder,
2513 revlogrevisiondelta,
2516 revlogrevisiondelta,
2514 deltaparentfn=self.deltaparent,
2517 deltaparentfn=self.deltaparent,
2515 candeltafn=self.candelta,
2518 candeltafn=self.candelta,
2516 rawsizefn=self.rawsize,
2519 rawsizefn=self.rawsize,
2517 revdifffn=self.revdiff,
2520 revdifffn=self.revdiff,
2518 flagsfn=self.flags,
2521 flagsfn=self.flags,
2519 deltamode=deltamode,
2522 deltamode=deltamode,
2520 revisiondata=revisiondata,
2523 revisiondata=revisiondata,
2521 assumehaveparentrevisions=assumehaveparentrevisions,
2524 assumehaveparentrevisions=assumehaveparentrevisions,
2522 )
2525 )
2523
2526
2524 DELTAREUSEALWAYS = b'always'
2527 DELTAREUSEALWAYS = b'always'
2525 DELTAREUSESAMEREVS = b'samerevs'
2528 DELTAREUSESAMEREVS = b'samerevs'
2526 DELTAREUSENEVER = b'never'
2529 DELTAREUSENEVER = b'never'
2527
2530
2528 DELTAREUSEFULLADD = b'fulladd'
2531 DELTAREUSEFULLADD = b'fulladd'
2529
2532
2530 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2533 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2531
2534
2532 def clone(
2535 def clone(
2533 self,
2536 self,
2534 tr,
2537 tr,
2535 destrevlog,
2538 destrevlog,
2536 addrevisioncb=None,
2539 addrevisioncb=None,
2537 deltareuse=DELTAREUSESAMEREVS,
2540 deltareuse=DELTAREUSESAMEREVS,
2538 forcedeltabothparents=None,
2541 forcedeltabothparents=None,
2539 sidedatacompanion=None,
2542 sidedatacompanion=None,
2540 ):
2543 ):
2541 """Copy this revlog to another, possibly with format changes.
2544 """Copy this revlog to another, possibly with format changes.
2542
2545
2543 The destination revlog will contain the same revisions and nodes.
2546 The destination revlog will contain the same revisions and nodes.
2544 However, it may not be bit-for-bit identical due to e.g. delta encoding
2547 However, it may not be bit-for-bit identical due to e.g. delta encoding
2545 differences.
2548 differences.
2546
2549
2547 The ``deltareuse`` argument control how deltas from the existing revlog
2550 The ``deltareuse`` argument control how deltas from the existing revlog
2548 are preserved in the destination revlog. The argument can have the
2551 are preserved in the destination revlog. The argument can have the
2549 following values:
2552 following values:
2550
2553
2551 DELTAREUSEALWAYS
2554 DELTAREUSEALWAYS
2552 Deltas will always be reused (if possible), even if the destination
2555 Deltas will always be reused (if possible), even if the destination
2553 revlog would not select the same revisions for the delta. This is the
2556 revlog would not select the same revisions for the delta. This is the
2554 fastest mode of operation.
2557 fastest mode of operation.
2555 DELTAREUSESAMEREVS
2558 DELTAREUSESAMEREVS
2556 Deltas will be reused if the destination revlog would pick the same
2559 Deltas will be reused if the destination revlog would pick the same
2557 revisions for the delta. This mode strikes a balance between speed
2560 revisions for the delta. This mode strikes a balance between speed
2558 and optimization.
2561 and optimization.
2559 DELTAREUSENEVER
2562 DELTAREUSENEVER
2560 Deltas will never be reused. This is the slowest mode of execution.
2563 Deltas will never be reused. This is the slowest mode of execution.
2561 This mode can be used to recompute deltas (e.g. if the diff/delta
2564 This mode can be used to recompute deltas (e.g. if the diff/delta
2562 algorithm changes).
2565 algorithm changes).
2563 DELTAREUSEFULLADD
2566 DELTAREUSEFULLADD
2564 Revision will be re-added as if their were new content. This is
2567 Revision will be re-added as if their were new content. This is
2565 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2568 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2566 eg: large file detection and handling.
2569 eg: large file detection and handling.
2567
2570
2568 Delta computation can be slow, so the choice of delta reuse policy can
2571 Delta computation can be slow, so the choice of delta reuse policy can
2569 significantly affect run time.
2572 significantly affect run time.
2570
2573
2571 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2574 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2572 two extremes. Deltas will be reused if they are appropriate. But if the
2575 two extremes. Deltas will be reused if they are appropriate. But if the
2573 delta could choose a better revision, it will do so. This means if you
2576 delta could choose a better revision, it will do so. This means if you
2574 are converting a non-generaldelta revlog to a generaldelta revlog,
2577 are converting a non-generaldelta revlog to a generaldelta revlog,
2575 deltas will be recomputed if the delta's parent isn't a parent of the
2578 deltas will be recomputed if the delta's parent isn't a parent of the
2576 revision.
2579 revision.
2577
2580
2578 In addition to the delta policy, the ``forcedeltabothparents``
2581 In addition to the delta policy, the ``forcedeltabothparents``
2579 argument controls whether to force compute deltas against both parents
2582 argument controls whether to force compute deltas against both parents
2580 for merges. By default, the current default is used.
2583 for merges. By default, the current default is used.
2581
2584
2582 If not None, the `sidedatacompanion` is callable that accept two
2585 If not None, the `sidedatacompanion` is callable that accept two
2583 arguments:
2586 arguments:
2584
2587
2585 (srcrevlog, rev)
2588 (srcrevlog, rev)
2586
2589
2587 and return a triplet that control changes to sidedata content from the
2590 and return a triplet that control changes to sidedata content from the
2588 old revision to the new clone result:
2591 old revision to the new clone result:
2589
2592
2590 (dropall, filterout, update)
2593 (dropall, filterout, update)
2591
2594
2592 * if `dropall` is True, all sidedata should be dropped
2595 * if `dropall` is True, all sidedata should be dropped
2593 * `filterout` is a set of sidedata keys that should be dropped
2596 * `filterout` is a set of sidedata keys that should be dropped
2594 * `update` is a mapping of additionnal/new key -> value
2597 * `update` is a mapping of additionnal/new key -> value
2595 """
2598 """
2596 if deltareuse not in self.DELTAREUSEALL:
2599 if deltareuse not in self.DELTAREUSEALL:
2597 raise ValueError(
2600 raise ValueError(
2598 _(b'value for deltareuse invalid: %s') % deltareuse
2601 _(b'value for deltareuse invalid: %s') % deltareuse
2599 )
2602 )
2600
2603
2601 if len(destrevlog):
2604 if len(destrevlog):
2602 raise ValueError(_(b'destination revlog is not empty'))
2605 raise ValueError(_(b'destination revlog is not empty'))
2603
2606
2604 if getattr(self, 'filteredrevs', None):
2607 if getattr(self, 'filteredrevs', None):
2605 raise ValueError(_(b'source revlog has filtered revisions'))
2608 raise ValueError(_(b'source revlog has filtered revisions'))
2606 if getattr(destrevlog, 'filteredrevs', None):
2609 if getattr(destrevlog, 'filteredrevs', None):
2607 raise ValueError(_(b'destination revlog has filtered revisions'))
2610 raise ValueError(_(b'destination revlog has filtered revisions'))
2608
2611
2609 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2612 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2610 # if possible.
2613 # if possible.
2611 oldlazydelta = destrevlog._lazydelta
2614 oldlazydelta = destrevlog._lazydelta
2612 oldlazydeltabase = destrevlog._lazydeltabase
2615 oldlazydeltabase = destrevlog._lazydeltabase
2613 oldamd = destrevlog._deltabothparents
2616 oldamd = destrevlog._deltabothparents
2614
2617
2615 try:
2618 try:
2616 if deltareuse == self.DELTAREUSEALWAYS:
2619 if deltareuse == self.DELTAREUSEALWAYS:
2617 destrevlog._lazydeltabase = True
2620 destrevlog._lazydeltabase = True
2618 destrevlog._lazydelta = True
2621 destrevlog._lazydelta = True
2619 elif deltareuse == self.DELTAREUSESAMEREVS:
2622 elif deltareuse == self.DELTAREUSESAMEREVS:
2620 destrevlog._lazydeltabase = False
2623 destrevlog._lazydeltabase = False
2621 destrevlog._lazydelta = True
2624 destrevlog._lazydelta = True
2622 elif deltareuse == self.DELTAREUSENEVER:
2625 elif deltareuse == self.DELTAREUSENEVER:
2623 destrevlog._lazydeltabase = False
2626 destrevlog._lazydeltabase = False
2624 destrevlog._lazydelta = False
2627 destrevlog._lazydelta = False
2625
2628
2626 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2629 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2627
2630
2628 self._clone(
2631 self._clone(
2629 tr,
2632 tr,
2630 destrevlog,
2633 destrevlog,
2631 addrevisioncb,
2634 addrevisioncb,
2632 deltareuse,
2635 deltareuse,
2633 forcedeltabothparents,
2636 forcedeltabothparents,
2634 sidedatacompanion,
2637 sidedatacompanion,
2635 )
2638 )
2636
2639
2637 finally:
2640 finally:
2638 destrevlog._lazydelta = oldlazydelta
2641 destrevlog._lazydelta = oldlazydelta
2639 destrevlog._lazydeltabase = oldlazydeltabase
2642 destrevlog._lazydeltabase = oldlazydeltabase
2640 destrevlog._deltabothparents = oldamd
2643 destrevlog._deltabothparents = oldamd
2641
2644
2642 def _clone(
2645 def _clone(
2643 self,
2646 self,
2644 tr,
2647 tr,
2645 destrevlog,
2648 destrevlog,
2646 addrevisioncb,
2649 addrevisioncb,
2647 deltareuse,
2650 deltareuse,
2648 forcedeltabothparents,
2651 forcedeltabothparents,
2649 sidedatacompanion,
2652 sidedatacompanion,
2650 ):
2653 ):
2651 """perform the core duty of `revlog.clone` after parameter processing"""
2654 """perform the core duty of `revlog.clone` after parameter processing"""
2652 deltacomputer = deltautil.deltacomputer(destrevlog)
2655 deltacomputer = deltautil.deltacomputer(destrevlog)
2653 index = self.index
2656 index = self.index
2654 for rev in self:
2657 for rev in self:
2655 entry = index[rev]
2658 entry = index[rev]
2656
2659
2657 # Some classes override linkrev to take filtered revs into
2660 # Some classes override linkrev to take filtered revs into
2658 # account. Use raw entry from index.
2661 # account. Use raw entry from index.
2659 flags = entry[0] & 0xFFFF
2662 flags = entry[0] & 0xFFFF
2660 linkrev = entry[4]
2663 linkrev = entry[4]
2661 p1 = index[entry[5]][7]
2664 p1 = index[entry[5]][7]
2662 p2 = index[entry[6]][7]
2665 p2 = index[entry[6]][7]
2663 node = entry[7]
2666 node = entry[7]
2664
2667
2665 sidedataactions = (False, [], {})
2668 sidedataactions = (False, [], {})
2666 if sidedatacompanion is not None:
2669 if sidedatacompanion is not None:
2667 sidedataactions = sidedatacompanion(self, rev)
2670 sidedataactions = sidedatacompanion(self, rev)
2668
2671
2669 # (Possibly) reuse the delta from the revlog if allowed and
2672 # (Possibly) reuse the delta from the revlog if allowed and
2670 # the revlog chunk is a delta.
2673 # the revlog chunk is a delta.
2671 cachedelta = None
2674 cachedelta = None
2672 rawtext = None
2675 rawtext = None
2673 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2676 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2674 dropall, filterout, update = sidedataactions
2677 dropall, filterout, update = sidedataactions
2675 text, sidedata = self._revisiondata(rev)
2678 text, sidedata = self._revisiondata(rev)
2676 if dropall:
2679 if dropall:
2677 sidedata = {}
2680 sidedata = {}
2678 for key in filterout:
2681 for key in filterout:
2679 sidedata.pop(key, None)
2682 sidedata.pop(key, None)
2680 sidedata.update(update)
2683 sidedata.update(update)
2681 if not sidedata:
2684 if not sidedata:
2682 sidedata = None
2685 sidedata = None
2683 destrevlog.addrevision(
2686 destrevlog.addrevision(
2684 text,
2687 text,
2685 tr,
2688 tr,
2686 linkrev,
2689 linkrev,
2687 p1,
2690 p1,
2688 p2,
2691 p2,
2689 cachedelta=cachedelta,
2692 cachedelta=cachedelta,
2690 node=node,
2693 node=node,
2691 flags=flags,
2694 flags=flags,
2692 deltacomputer=deltacomputer,
2695 deltacomputer=deltacomputer,
2693 sidedata=sidedata,
2696 sidedata=sidedata,
2694 )
2697 )
2695 else:
2698 else:
2696 if destrevlog._lazydelta:
2699 if destrevlog._lazydelta:
2697 dp = self.deltaparent(rev)
2700 dp = self.deltaparent(rev)
2698 if dp != nullrev:
2701 if dp != nullrev:
2699 cachedelta = (dp, bytes(self._chunk(rev)))
2702 cachedelta = (dp, bytes(self._chunk(rev)))
2700
2703
2701 if not cachedelta:
2704 if not cachedelta:
2702 rawtext = self.rawdata(rev)
2705 rawtext = self.rawdata(rev)
2703
2706
2704 ifh = destrevlog.opener(
2707 ifh = destrevlog.opener(
2705 destrevlog.indexfile, b'a+', checkambig=False
2708 destrevlog.indexfile, b'a+', checkambig=False
2706 )
2709 )
2707 dfh = None
2710 dfh = None
2708 if not destrevlog._inline:
2711 if not destrevlog._inline:
2709 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2712 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2710 try:
2713 try:
2711 destrevlog._addrevision(
2714 destrevlog._addrevision(
2712 node,
2715 node,
2713 rawtext,
2716 rawtext,
2714 tr,
2717 tr,
2715 linkrev,
2718 linkrev,
2716 p1,
2719 p1,
2717 p2,
2720 p2,
2718 flags,
2721 flags,
2719 cachedelta,
2722 cachedelta,
2720 ifh,
2723 ifh,
2721 dfh,
2724 dfh,
2722 deltacomputer=deltacomputer,
2725 deltacomputer=deltacomputer,
2723 )
2726 )
2724 finally:
2727 finally:
2725 if dfh:
2728 if dfh:
2726 dfh.close()
2729 dfh.close()
2727 ifh.close()
2730 ifh.close()
2728
2731
2729 if addrevisioncb:
2732 if addrevisioncb:
2730 addrevisioncb(self, rev, node)
2733 addrevisioncb(self, rev, node)
2731
2734
2732 def censorrevision(self, tr, censornode, tombstone=b''):
2735 def censorrevision(self, tr, censornode, tombstone=b''):
2733 if (self.version & 0xFFFF) == REVLOGV0:
2736 if (self.version & 0xFFFF) == REVLOGV0:
2734 raise error.RevlogError(
2737 raise error.RevlogError(
2735 _(b'cannot censor with version %d revlogs') % self.version
2738 _(b'cannot censor with version %d revlogs') % self.version
2736 )
2739 )
2737
2740
2738 censorrev = self.rev(censornode)
2741 censorrev = self.rev(censornode)
2739 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2742 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2740
2743
2741 if len(tombstone) > self.rawsize(censorrev):
2744 if len(tombstone) > self.rawsize(censorrev):
2742 raise error.Abort(
2745 raise error.Abort(
2743 _(b'censor tombstone must be no longer than censored data')
2746 _(b'censor tombstone must be no longer than censored data')
2744 )
2747 )
2745
2748
2746 # Rewriting the revlog in place is hard. Our strategy for censoring is
2749 # Rewriting the revlog in place is hard. Our strategy for censoring is
2747 # to create a new revlog, copy all revisions to it, then replace the
2750 # to create a new revlog, copy all revisions to it, then replace the
2748 # revlogs on transaction close.
2751 # revlogs on transaction close.
2749
2752
2750 newindexfile = self.indexfile + b'.tmpcensored'
2753 newindexfile = self.indexfile + b'.tmpcensored'
2751 newdatafile = self.datafile + b'.tmpcensored'
2754 newdatafile = self.datafile + b'.tmpcensored'
2752
2755
2753 # This is a bit dangerous. We could easily have a mismatch of state.
2756 # This is a bit dangerous. We could easily have a mismatch of state.
2754 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2757 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2755 newrl.version = self.version
2758 newrl.version = self.version
2756 newrl._generaldelta = self._generaldelta
2759 newrl._generaldelta = self._generaldelta
2757 newrl._io = self._io
2760 newrl._io = self._io
2758
2761
2759 for rev in self.revs():
2762 for rev in self.revs():
2760 node = self.node(rev)
2763 node = self.node(rev)
2761 p1, p2 = self.parents(node)
2764 p1, p2 = self.parents(node)
2762
2765
2763 if rev == censorrev:
2766 if rev == censorrev:
2764 newrl.addrawrevision(
2767 newrl.addrawrevision(
2765 tombstone,
2768 tombstone,
2766 tr,
2769 tr,
2767 self.linkrev(censorrev),
2770 self.linkrev(censorrev),
2768 p1,
2771 p1,
2769 p2,
2772 p2,
2770 censornode,
2773 censornode,
2771 REVIDX_ISCENSORED,
2774 REVIDX_ISCENSORED,
2772 )
2775 )
2773
2776
2774 if newrl.deltaparent(rev) != nullrev:
2777 if newrl.deltaparent(rev) != nullrev:
2775 raise error.Abort(
2778 raise error.Abort(
2776 _(
2779 _(
2777 b'censored revision stored as delta; '
2780 b'censored revision stored as delta; '
2778 b'cannot censor'
2781 b'cannot censor'
2779 ),
2782 ),
2780 hint=_(
2783 hint=_(
2781 b'censoring of revlogs is not '
2784 b'censoring of revlogs is not '
2782 b'fully implemented; please report '
2785 b'fully implemented; please report '
2783 b'this bug'
2786 b'this bug'
2784 ),
2787 ),
2785 )
2788 )
2786 continue
2789 continue
2787
2790
2788 if self.iscensored(rev):
2791 if self.iscensored(rev):
2789 if self.deltaparent(rev) != nullrev:
2792 if self.deltaparent(rev) != nullrev:
2790 raise error.Abort(
2793 raise error.Abort(
2791 _(
2794 _(
2792 b'cannot censor due to censored '
2795 b'cannot censor due to censored '
2793 b'revision having delta stored'
2796 b'revision having delta stored'
2794 )
2797 )
2795 )
2798 )
2796 rawtext = self._chunk(rev)
2799 rawtext = self._chunk(rev)
2797 else:
2800 else:
2798 rawtext = self.rawdata(rev)
2801 rawtext = self.rawdata(rev)
2799
2802
2800 newrl.addrawrevision(
2803 newrl.addrawrevision(
2801 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2804 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2802 )
2805 )
2803
2806
2804 tr.addbackup(self.indexfile, location=b'store')
2807 tr.addbackup(self.indexfile, location=b'store')
2805 if not self._inline:
2808 if not self._inline:
2806 tr.addbackup(self.datafile, location=b'store')
2809 tr.addbackup(self.datafile, location=b'store')
2807
2810
2808 self.opener.rename(newrl.indexfile, self.indexfile)
2811 self.opener.rename(newrl.indexfile, self.indexfile)
2809 if not self._inline:
2812 if not self._inline:
2810 self.opener.rename(newrl.datafile, self.datafile)
2813 self.opener.rename(newrl.datafile, self.datafile)
2811
2814
2812 self.clearcaches()
2815 self.clearcaches()
2813 self._loadindex()
2816 self._loadindex()
2814
2817
2815 def verifyintegrity(self, state):
2818 def verifyintegrity(self, state):
2816 """Verifies the integrity of the revlog.
2819 """Verifies the integrity of the revlog.
2817
2820
2818 Yields ``revlogproblem`` instances describing problems that are
2821 Yields ``revlogproblem`` instances describing problems that are
2819 found.
2822 found.
2820 """
2823 """
2821 dd, di = self.checksize()
2824 dd, di = self.checksize()
2822 if dd:
2825 if dd:
2823 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2826 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2824 if di:
2827 if di:
2825 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2828 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2826
2829
2827 version = self.version & 0xFFFF
2830 version = self.version & 0xFFFF
2828
2831
2829 # The verifier tells us what version revlog we should be.
2832 # The verifier tells us what version revlog we should be.
2830 if version != state[b'expectedversion']:
2833 if version != state[b'expectedversion']:
2831 yield revlogproblem(
2834 yield revlogproblem(
2832 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2835 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2833 % (self.indexfile, version, state[b'expectedversion'])
2836 % (self.indexfile, version, state[b'expectedversion'])
2834 )
2837 )
2835
2838
2836 state[b'skipread'] = set()
2839 state[b'skipread'] = set()
2837
2840
2838 for rev in self:
2841 for rev in self:
2839 node = self.node(rev)
2842 node = self.node(rev)
2840
2843
2841 # Verify contents. 4 cases to care about:
2844 # Verify contents. 4 cases to care about:
2842 #
2845 #
2843 # common: the most common case
2846 # common: the most common case
2844 # rename: with a rename
2847 # rename: with a rename
2845 # meta: file content starts with b'\1\n', the metadata
2848 # meta: file content starts with b'\1\n', the metadata
2846 # header defined in filelog.py, but without a rename
2849 # header defined in filelog.py, but without a rename
2847 # ext: content stored externally
2850 # ext: content stored externally
2848 #
2851 #
2849 # More formally, their differences are shown below:
2852 # More formally, their differences are shown below:
2850 #
2853 #
2851 # | common | rename | meta | ext
2854 # | common | rename | meta | ext
2852 # -------------------------------------------------------
2855 # -------------------------------------------------------
2853 # flags() | 0 | 0 | 0 | not 0
2856 # flags() | 0 | 0 | 0 | not 0
2854 # renamed() | False | True | False | ?
2857 # renamed() | False | True | False | ?
2855 # rawtext[0:2]=='\1\n'| False | True | True | ?
2858 # rawtext[0:2]=='\1\n'| False | True | True | ?
2856 #
2859 #
2857 # "rawtext" means the raw text stored in revlog data, which
2860 # "rawtext" means the raw text stored in revlog data, which
2858 # could be retrieved by "rawdata(rev)". "text"
2861 # could be retrieved by "rawdata(rev)". "text"
2859 # mentioned below is "revision(rev)".
2862 # mentioned below is "revision(rev)".
2860 #
2863 #
2861 # There are 3 different lengths stored physically:
2864 # There are 3 different lengths stored physically:
2862 # 1. L1: rawsize, stored in revlog index
2865 # 1. L1: rawsize, stored in revlog index
2863 # 2. L2: len(rawtext), stored in revlog data
2866 # 2. L2: len(rawtext), stored in revlog data
2864 # 3. L3: len(text), stored in revlog data if flags==0, or
2867 # 3. L3: len(text), stored in revlog data if flags==0, or
2865 # possibly somewhere else if flags!=0
2868 # possibly somewhere else if flags!=0
2866 #
2869 #
2867 # L1 should be equal to L2. L3 could be different from them.
2870 # L1 should be equal to L2. L3 could be different from them.
2868 # "text" may or may not affect commit hash depending on flag
2871 # "text" may or may not affect commit hash depending on flag
2869 # processors (see flagutil.addflagprocessor).
2872 # processors (see flagutil.addflagprocessor).
2870 #
2873 #
2871 # | common | rename | meta | ext
2874 # | common | rename | meta | ext
2872 # -------------------------------------------------
2875 # -------------------------------------------------
2873 # rawsize() | L1 | L1 | L1 | L1
2876 # rawsize() | L1 | L1 | L1 | L1
2874 # size() | L1 | L2-LM | L1(*) | L1 (?)
2877 # size() | L1 | L2-LM | L1(*) | L1 (?)
2875 # len(rawtext) | L2 | L2 | L2 | L2
2878 # len(rawtext) | L2 | L2 | L2 | L2
2876 # len(text) | L2 | L2 | L2 | L3
2879 # len(text) | L2 | L2 | L2 | L3
2877 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2880 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2878 #
2881 #
2879 # LM: length of metadata, depending on rawtext
2882 # LM: length of metadata, depending on rawtext
2880 # (*): not ideal, see comment in filelog.size
2883 # (*): not ideal, see comment in filelog.size
2881 # (?): could be "- len(meta)" if the resolved content has
2884 # (?): could be "- len(meta)" if the resolved content has
2882 # rename metadata
2885 # rename metadata
2883 #
2886 #
2884 # Checks needed to be done:
2887 # Checks needed to be done:
2885 # 1. length check: L1 == L2, in all cases.
2888 # 1. length check: L1 == L2, in all cases.
2886 # 2. hash check: depending on flag processor, we may need to
2889 # 2. hash check: depending on flag processor, we may need to
2887 # use either "text" (external), or "rawtext" (in revlog).
2890 # use either "text" (external), or "rawtext" (in revlog).
2888
2891
2889 try:
2892 try:
2890 skipflags = state.get(b'skipflags', 0)
2893 skipflags = state.get(b'skipflags', 0)
2891 if skipflags:
2894 if skipflags:
2892 skipflags &= self.flags(rev)
2895 skipflags &= self.flags(rev)
2893
2896
2894 if skipflags:
2897 if skipflags:
2895 state[b'skipread'].add(node)
2898 state[b'skipread'].add(node)
2896 else:
2899 else:
2897 # Side-effect: read content and verify hash.
2900 # Side-effect: read content and verify hash.
2898 self.revision(node)
2901 self.revision(node)
2899
2902
2900 l1 = self.rawsize(rev)
2903 l1 = self.rawsize(rev)
2901 l2 = len(self.rawdata(node))
2904 l2 = len(self.rawdata(node))
2902
2905
2903 if l1 != l2:
2906 if l1 != l2:
2904 yield revlogproblem(
2907 yield revlogproblem(
2905 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2908 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2906 node=node,
2909 node=node,
2907 )
2910 )
2908
2911
2909 except error.CensoredNodeError:
2912 except error.CensoredNodeError:
2910 if state[b'erroroncensored']:
2913 if state[b'erroroncensored']:
2911 yield revlogproblem(
2914 yield revlogproblem(
2912 error=_(b'censored file data'), node=node
2915 error=_(b'censored file data'), node=node
2913 )
2916 )
2914 state[b'skipread'].add(node)
2917 state[b'skipread'].add(node)
2915 except Exception as e:
2918 except Exception as e:
2916 yield revlogproblem(
2919 yield revlogproblem(
2917 error=_(b'unpacking %s: %s')
2920 error=_(b'unpacking %s: %s')
2918 % (short(node), stringutil.forcebytestr(e)),
2921 % (short(node), stringutil.forcebytestr(e)),
2919 node=node,
2922 node=node,
2920 )
2923 )
2921 state[b'skipread'].add(node)
2924 state[b'skipread'].add(node)
2922
2925
2923 def storageinfo(
2926 def storageinfo(
2924 self,
2927 self,
2925 exclusivefiles=False,
2928 exclusivefiles=False,
2926 sharedfiles=False,
2929 sharedfiles=False,
2927 revisionscount=False,
2930 revisionscount=False,
2928 trackedsize=False,
2931 trackedsize=False,
2929 storedsize=False,
2932 storedsize=False,
2930 ):
2933 ):
2931 d = {}
2934 d = {}
2932
2935
2933 if exclusivefiles:
2936 if exclusivefiles:
2934 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2937 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2935 if not self._inline:
2938 if not self._inline:
2936 d[b'exclusivefiles'].append((self.opener, self.datafile))
2939 d[b'exclusivefiles'].append((self.opener, self.datafile))
2937
2940
2938 if sharedfiles:
2941 if sharedfiles:
2939 d[b'sharedfiles'] = []
2942 d[b'sharedfiles'] = []
2940
2943
2941 if revisionscount:
2944 if revisionscount:
2942 d[b'revisionscount'] = len(self)
2945 d[b'revisionscount'] = len(self)
2943
2946
2944 if trackedsize:
2947 if trackedsize:
2945 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2948 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2946
2949
2947 if storedsize:
2950 if storedsize:
2948 d[b'storedsize'] = sum(
2951 d[b'storedsize'] = sum(
2949 self.opener.stat(path).st_size for path in self.files()
2952 self.opener.stat(path).st_size for path in self.files()
2950 )
2953 )
2951
2954
2952 return d
2955 return d
General Comments 0
You need to be logged in to leave comments. Login now