##// END OF EJS Templates
vfs: give all vfs an options attribute by default...
marmoute -
r43295:3518da50 default
parent child Browse files
Show More
@@ -1,43 +1,44 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # Dump revlogs as raw data stream
2 # Dump revlogs as raw data stream
3 # $ find .hg/store/ -name "*.i" | xargs dumprevlog > repo.dump
3 # $ find .hg/store/ -name "*.i" | xargs dumprevlog > repo.dump
4
4
5 from __future__ import absolute_import, print_function
5 from __future__ import absolute_import, print_function
6
6
7 import sys
7 import sys
8 from mercurial import (
8 from mercurial import (
9 encoding,
9 encoding,
10 node,
10 node,
11 pycompat,
11 pycompat,
12 revlog,
12 revlog,
13 )
13 )
14 from mercurial.utils import (
14 from mercurial.utils import (
15 procutil,
15 procutil,
16 )
16 )
17
17
18 for fp in (sys.stdin, sys.stdout, sys.stderr):
18 for fp in (sys.stdin, sys.stdout, sys.stderr):
19 procutil.setbinary(fp)
19 procutil.setbinary(fp)
20
20
21 def binopen(path, mode=b'rb'):
21 def binopen(path, mode=b'rb'):
22 if b'b' not in mode:
22 if b'b' not in mode:
23 mode = mode + b'b'
23 mode = mode + b'b'
24 return open(path, pycompat.sysstr(mode))
24 return open(path, pycompat.sysstr(mode))
25 binopen.options = {}
25
26
26 def printb(data, end=b'\n'):
27 def printb(data, end=b'\n'):
27 sys.stdout.flush()
28 sys.stdout.flush()
28 pycompat.stdout.write(data + end)
29 pycompat.stdout.write(data + end)
29
30
30 for f in sys.argv[1:]:
31 for f in sys.argv[1:]:
31 r = revlog.revlog(binopen, encoding.strtolocal(f))
32 r = revlog.revlog(binopen, encoding.strtolocal(f))
32 print("file:", f)
33 print("file:", f)
33 for i in r:
34 for i in r:
34 n = r.node(i)
35 n = r.node(i)
35 p = r.parents(n)
36 p = r.parents(n)
36 d = r.revision(n)
37 d = r.revision(n)
37 printb(b"node: %s" % node.hex(n))
38 printb(b"node: %s" % node.hex(n))
38 printb(b"linkrev: %d" % r.linkrev(i))
39 printb(b"linkrev: %d" % r.linkrev(i))
39 printb(b"parents: %s %s" % (node.hex(p[0]), node.hex(p[1])))
40 printb(b"parents: %s %s" % (node.hex(p[0]), node.hex(p[1])))
40 printb(b"length: %d" % len(d))
41 printb(b"length: %d" % len(d))
41 printb(b"-start-")
42 printb(b"-start-")
42 printb(d)
43 printb(d)
43 printb(b"-end-")
44 printb(b"-end-")
@@ -1,2660 +1,2660 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .revlogutils.constants import (
38 from .revlogutils.constants import (
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_ISCENSORED,
55 REVIDX_ISCENSORED,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 )
57 )
58 from .thirdparty import (
58 from .thirdparty import (
59 attr,
59 attr,
60 )
60 )
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 templatefilters,
68 templatefilters,
69 util,
69 util,
70 )
70 )
71 from .interfaces import (
71 from .interfaces import (
72 repository,
72 repository,
73 util as interfaceutil,
73 util as interfaceutil,
74 )
74 )
75 from .revlogutils import (
75 from .revlogutils import (
76 deltas as deltautil,
76 deltas as deltautil,
77 flagutil,
77 flagutil,
78 )
78 )
79 from .utils import (
79 from .utils import (
80 storageutil,
80 storageutil,
81 stringutil,
81 stringutil,
82 )
82 )
83
83
84 # blanked usage of all the name to prevent pyflakes constraints
84 # blanked usage of all the name to prevent pyflakes constraints
85 # We need these name available in the module for extensions.
85 # We need these name available in the module for extensions.
86 REVLOGV0
86 REVLOGV0
87 REVLOGV1
87 REVLOGV1
88 REVLOGV2
88 REVLOGV2
89 FLAG_INLINE_DATA
89 FLAG_INLINE_DATA
90 FLAG_GENERALDELTA
90 FLAG_GENERALDELTA
91 REVLOG_DEFAULT_FLAGS
91 REVLOG_DEFAULT_FLAGS
92 REVLOG_DEFAULT_FORMAT
92 REVLOG_DEFAULT_FORMAT
93 REVLOG_DEFAULT_VERSION
93 REVLOG_DEFAULT_VERSION
94 REVLOGV1_FLAGS
94 REVLOGV1_FLAGS
95 REVLOGV2_FLAGS
95 REVLOGV2_FLAGS
96 REVIDX_ISCENSORED
96 REVIDX_ISCENSORED
97 REVIDX_ELLIPSIS
97 REVIDX_ELLIPSIS
98 REVIDX_EXTSTORED
98 REVIDX_EXTSTORED
99 REVIDX_DEFAULT_FLAGS
99 REVIDX_DEFAULT_FLAGS
100 REVIDX_FLAGS_ORDER
100 REVIDX_FLAGS_ORDER
101 REVIDX_RAWTEXT_CHANGING_FLAGS
101 REVIDX_RAWTEXT_CHANGING_FLAGS
102
102
103 parsers = policy.importmod(r'parsers')
103 parsers = policy.importmod(r'parsers')
104 rustancestor = policy.importrust(r'ancestor')
104 rustancestor = policy.importrust(r'ancestor')
105 rustdagop = policy.importrust(r'dagop')
105 rustdagop = policy.importrust(r'dagop')
106
106
107 # Aliased for performance.
107 # Aliased for performance.
108 _zlibdecompress = zlib.decompress
108 _zlibdecompress = zlib.decompress
109
109
110 # max size of revlog with inline data
110 # max size of revlog with inline data
111 _maxinline = 131072
111 _maxinline = 131072
112 _chunksize = 1048576
112 _chunksize = 1048576
113
113
114 # Flag processors for REVIDX_ELLIPSIS.
114 # Flag processors for REVIDX_ELLIPSIS.
115 def ellipsisreadprocessor(rl, text):
115 def ellipsisreadprocessor(rl, text):
116 return text, False, {}
116 return text, False, {}
117
117
118 def ellipsiswriteprocessor(rl, text, sidedata):
118 def ellipsiswriteprocessor(rl, text, sidedata):
119 return text, False
119 return text, False
120
120
121 def ellipsisrawprocessor(rl, text):
121 def ellipsisrawprocessor(rl, text):
122 return False
122 return False
123
123
124 ellipsisprocessor = (
124 ellipsisprocessor = (
125 ellipsisreadprocessor,
125 ellipsisreadprocessor,
126 ellipsiswriteprocessor,
126 ellipsiswriteprocessor,
127 ellipsisrawprocessor,
127 ellipsisrawprocessor,
128 )
128 )
129
129
130 def getoffset(q):
130 def getoffset(q):
131 return int(q >> 16)
131 return int(q >> 16)
132
132
133 def gettype(q):
133 def gettype(q):
134 return int(q & 0xFFFF)
134 return int(q & 0xFFFF)
135
135
136 def offset_type(offset, type):
136 def offset_type(offset, type):
137 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
137 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
138 raise ValueError('unknown revlog index flags')
138 raise ValueError('unknown revlog index flags')
139 return int(int(offset) << 16 | type)
139 return int(int(offset) << 16 | type)
140
140
141 @attr.s(slots=True, frozen=True)
141 @attr.s(slots=True, frozen=True)
142 class _revisioninfo(object):
142 class _revisioninfo(object):
143 """Information about a revision that allows building its fulltext
143 """Information about a revision that allows building its fulltext
144 node: expected hash of the revision
144 node: expected hash of the revision
145 p1, p2: parent revs of the revision
145 p1, p2: parent revs of the revision
146 btext: built text cache consisting of a one-element list
146 btext: built text cache consisting of a one-element list
147 cachedelta: (baserev, uncompressed_delta) or None
147 cachedelta: (baserev, uncompressed_delta) or None
148 flags: flags associated to the revision storage
148 flags: flags associated to the revision storage
149
149
150 One of btext[0] or cachedelta must be set.
150 One of btext[0] or cachedelta must be set.
151 """
151 """
152 node = attr.ib()
152 node = attr.ib()
153 p1 = attr.ib()
153 p1 = attr.ib()
154 p2 = attr.ib()
154 p2 = attr.ib()
155 btext = attr.ib()
155 btext = attr.ib()
156 textlen = attr.ib()
156 textlen = attr.ib()
157 cachedelta = attr.ib()
157 cachedelta = attr.ib()
158 flags = attr.ib()
158 flags = attr.ib()
159
159
160 @interfaceutil.implementer(repository.irevisiondelta)
160 @interfaceutil.implementer(repository.irevisiondelta)
161 @attr.s(slots=True)
161 @attr.s(slots=True)
162 class revlogrevisiondelta(object):
162 class revlogrevisiondelta(object):
163 node = attr.ib()
163 node = attr.ib()
164 p1node = attr.ib()
164 p1node = attr.ib()
165 p2node = attr.ib()
165 p2node = attr.ib()
166 basenode = attr.ib()
166 basenode = attr.ib()
167 flags = attr.ib()
167 flags = attr.ib()
168 baserevisionsize = attr.ib()
168 baserevisionsize = attr.ib()
169 revision = attr.ib()
169 revision = attr.ib()
170 delta = attr.ib()
170 delta = attr.ib()
171 linknode = attr.ib(default=None)
171 linknode = attr.ib(default=None)
172
172
173 @interfaceutil.implementer(repository.iverifyproblem)
173 @interfaceutil.implementer(repository.iverifyproblem)
174 @attr.s(frozen=True)
174 @attr.s(frozen=True)
175 class revlogproblem(object):
175 class revlogproblem(object):
176 warning = attr.ib(default=None)
176 warning = attr.ib(default=None)
177 error = attr.ib(default=None)
177 error = attr.ib(default=None)
178 node = attr.ib(default=None)
178 node = attr.ib(default=None)
179
179
180 # index v0:
180 # index v0:
181 # 4 bytes: offset
181 # 4 bytes: offset
182 # 4 bytes: compressed length
182 # 4 bytes: compressed length
183 # 4 bytes: base rev
183 # 4 bytes: base rev
184 # 4 bytes: link rev
184 # 4 bytes: link rev
185 # 20 bytes: parent 1 nodeid
185 # 20 bytes: parent 1 nodeid
186 # 20 bytes: parent 2 nodeid
186 # 20 bytes: parent 2 nodeid
187 # 20 bytes: nodeid
187 # 20 bytes: nodeid
188 indexformatv0 = struct.Struct(">4l20s20s20s")
188 indexformatv0 = struct.Struct(">4l20s20s20s")
189 indexformatv0_pack = indexformatv0.pack
189 indexformatv0_pack = indexformatv0.pack
190 indexformatv0_unpack = indexformatv0.unpack
190 indexformatv0_unpack = indexformatv0.unpack
191
191
192 class revlogoldindex(list):
192 class revlogoldindex(list):
193 def __getitem__(self, i):
193 def __getitem__(self, i):
194 if i == -1:
194 if i == -1:
195 return (0, 0, 0, -1, -1, -1, -1, nullid)
195 return (0, 0, 0, -1, -1, -1, -1, nullid)
196 return list.__getitem__(self, i)
196 return list.__getitem__(self, i)
197
197
198 class revlogoldio(object):
198 class revlogoldio(object):
199 def __init__(self):
199 def __init__(self):
200 self.size = indexformatv0.size
200 self.size = indexformatv0.size
201
201
202 def parseindex(self, data, inline):
202 def parseindex(self, data, inline):
203 s = self.size
203 s = self.size
204 index = []
204 index = []
205 nodemap = {nullid: nullrev}
205 nodemap = {nullid: nullrev}
206 n = off = 0
206 n = off = 0
207 l = len(data)
207 l = len(data)
208 while off + s <= l:
208 while off + s <= l:
209 cur = data[off:off + s]
209 cur = data[off:off + s]
210 off += s
210 off += s
211 e = indexformatv0_unpack(cur)
211 e = indexformatv0_unpack(cur)
212 # transform to revlogv1 format
212 # transform to revlogv1 format
213 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
213 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
214 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
214 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
215 index.append(e2)
215 index.append(e2)
216 nodemap[e[6]] = n
216 nodemap[e[6]] = n
217 n += 1
217 n += 1
218
218
219 return revlogoldindex(index), nodemap, None
219 return revlogoldindex(index), nodemap, None
220
220
221 def packentry(self, entry, node, version, rev):
221 def packentry(self, entry, node, version, rev):
222 if gettype(entry[0]):
222 if gettype(entry[0]):
223 raise error.RevlogError(_('index entry flags need revlog '
223 raise error.RevlogError(_('index entry flags need revlog '
224 'version 1'))
224 'version 1'))
225 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
225 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
226 node(entry[5]), node(entry[6]), entry[7])
226 node(entry[5]), node(entry[6]), entry[7])
227 return indexformatv0_pack(*e2)
227 return indexformatv0_pack(*e2)
228
228
229 # index ng:
229 # index ng:
230 # 6 bytes: offset
230 # 6 bytes: offset
231 # 2 bytes: flags
231 # 2 bytes: flags
232 # 4 bytes: compressed length
232 # 4 bytes: compressed length
233 # 4 bytes: uncompressed length
233 # 4 bytes: uncompressed length
234 # 4 bytes: base rev
234 # 4 bytes: base rev
235 # 4 bytes: link rev
235 # 4 bytes: link rev
236 # 4 bytes: parent 1 rev
236 # 4 bytes: parent 1 rev
237 # 4 bytes: parent 2 rev
237 # 4 bytes: parent 2 rev
238 # 32 bytes: nodeid
238 # 32 bytes: nodeid
239 indexformatng = struct.Struct(">Qiiiiii20s12x")
239 indexformatng = struct.Struct(">Qiiiiii20s12x")
240 indexformatng_pack = indexformatng.pack
240 indexformatng_pack = indexformatng.pack
241 versionformat = struct.Struct(">I")
241 versionformat = struct.Struct(">I")
242 versionformat_pack = versionformat.pack
242 versionformat_pack = versionformat.pack
243 versionformat_unpack = versionformat.unpack
243 versionformat_unpack = versionformat.unpack
244
244
245 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
245 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
246 # signed integer)
246 # signed integer)
247 _maxentrysize = 0x7fffffff
247 _maxentrysize = 0x7fffffff
248
248
249 class revlogio(object):
249 class revlogio(object):
250 def __init__(self):
250 def __init__(self):
251 self.size = indexformatng.size
251 self.size = indexformatng.size
252
252
253 def parseindex(self, data, inline):
253 def parseindex(self, data, inline):
254 # call the C implementation to parse the index data
254 # call the C implementation to parse the index data
255 index, cache = parsers.parse_index2(data, inline)
255 index, cache = parsers.parse_index2(data, inline)
256 return index, getattr(index, 'nodemap', None), cache
256 return index, getattr(index, 'nodemap', None), cache
257
257
258 def packentry(self, entry, node, version, rev):
258 def packentry(self, entry, node, version, rev):
259 p = indexformatng_pack(*entry)
259 p = indexformatng_pack(*entry)
260 if rev == 0:
260 if rev == 0:
261 p = versionformat_pack(version) + p[4:]
261 p = versionformat_pack(version) + p[4:]
262 return p
262 return p
263
263
264 class revlog(object):
264 class revlog(object):
265 """
265 """
266 the underlying revision storage object
266 the underlying revision storage object
267
267
268 A revlog consists of two parts, an index and the revision data.
268 A revlog consists of two parts, an index and the revision data.
269
269
270 The index is a file with a fixed record size containing
270 The index is a file with a fixed record size containing
271 information on each revision, including its nodeid (hash), the
271 information on each revision, including its nodeid (hash), the
272 nodeids of its parents, the position and offset of its data within
272 nodeids of its parents, the position and offset of its data within
273 the data file, and the revision it's based on. Finally, each entry
273 the data file, and the revision it's based on. Finally, each entry
274 contains a linkrev entry that can serve as a pointer to external
274 contains a linkrev entry that can serve as a pointer to external
275 data.
275 data.
276
276
277 The revision data itself is a linear collection of data chunks.
277 The revision data itself is a linear collection of data chunks.
278 Each chunk represents a revision and is usually represented as a
278 Each chunk represents a revision and is usually represented as a
279 delta against the previous chunk. To bound lookup time, runs of
279 delta against the previous chunk. To bound lookup time, runs of
280 deltas are limited to about 2 times the length of the original
280 deltas are limited to about 2 times the length of the original
281 version data. This makes retrieval of a version proportional to
281 version data. This makes retrieval of a version proportional to
282 its size, or O(1) relative to the number of revisions.
282 its size, or O(1) relative to the number of revisions.
283
283
284 Both pieces of the revlog are written to in an append-only
284 Both pieces of the revlog are written to in an append-only
285 fashion, which means we never need to rewrite a file to insert or
285 fashion, which means we never need to rewrite a file to insert or
286 remove data, and can use some simple techniques to avoid the need
286 remove data, and can use some simple techniques to avoid the need
287 for locking while reading.
287 for locking while reading.
288
288
289 If checkambig, indexfile is opened with checkambig=True at
289 If checkambig, indexfile is opened with checkambig=True at
290 writing, to avoid file stat ambiguity.
290 writing, to avoid file stat ambiguity.
291
291
292 If mmaplargeindex is True, and an mmapindexthreshold is set, the
292 If mmaplargeindex is True, and an mmapindexthreshold is set, the
293 index will be mmapped rather than read if it is larger than the
293 index will be mmapped rather than read if it is larger than the
294 configured threshold.
294 configured threshold.
295
295
296 If censorable is True, the revlog can have censored revisions.
296 If censorable is True, the revlog can have censored revisions.
297
297
298 If `upperboundcomp` is not None, this is the expected maximal gain from
298 If `upperboundcomp` is not None, this is the expected maximal gain from
299 compression for the data content.
299 compression for the data content.
300 """
300 """
301
301
302 _flagserrorclass = error.RevlogError
302 _flagserrorclass = error.RevlogError
303
303
304 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
304 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
305 mmaplargeindex=False, censorable=False,
305 mmaplargeindex=False, censorable=False,
306 upperboundcomp=None):
306 upperboundcomp=None):
307 """
307 """
308 create a revlog object
308 create a revlog object
309
309
310 opener is a function that abstracts the file opening operation
310 opener is a function that abstracts the file opening operation
311 and can be used to implement COW semantics or the like.
311 and can be used to implement COW semantics or the like.
312
312
313 """
313 """
314 self.upperboundcomp = upperboundcomp
314 self.upperboundcomp = upperboundcomp
315 self.indexfile = indexfile
315 self.indexfile = indexfile
316 self.datafile = datafile or (indexfile[:-2] + ".d")
316 self.datafile = datafile or (indexfile[:-2] + ".d")
317 self.opener = opener
317 self.opener = opener
318 # When True, indexfile is opened with checkambig=True at writing, to
318 # When True, indexfile is opened with checkambig=True at writing, to
319 # avoid file stat ambiguity.
319 # avoid file stat ambiguity.
320 self._checkambig = checkambig
320 self._checkambig = checkambig
321 self._mmaplargeindex = mmaplargeindex
321 self._mmaplargeindex = mmaplargeindex
322 self._censorable = censorable
322 self._censorable = censorable
323 # 3-tuple of (node, rev, text) for a raw revision.
323 # 3-tuple of (node, rev, text) for a raw revision.
324 self._revisioncache = None
324 self._revisioncache = None
325 # Maps rev to chain base rev.
325 # Maps rev to chain base rev.
326 self._chainbasecache = util.lrucachedict(100)
326 self._chainbasecache = util.lrucachedict(100)
327 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
327 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
328 self._chunkcache = (0, '')
328 self._chunkcache = (0, '')
329 # How much data to read and cache into the raw revlog data cache.
329 # How much data to read and cache into the raw revlog data cache.
330 self._chunkcachesize = 65536
330 self._chunkcachesize = 65536
331 self._maxchainlen = None
331 self._maxchainlen = None
332 self._deltabothparents = True
332 self._deltabothparents = True
333 self.index = []
333 self.index = []
334 # Mapping of partial identifiers to full nodes.
334 # Mapping of partial identifiers to full nodes.
335 self._pcache = {}
335 self._pcache = {}
336 # Mapping of revision integer to full node.
336 # Mapping of revision integer to full node.
337 self._nodecache = {nullid: nullrev}
337 self._nodecache = {nullid: nullrev}
338 self._nodepos = None
338 self._nodepos = None
339 self._compengine = 'zlib'
339 self._compengine = 'zlib'
340 self._compengineopts = {}
340 self._compengineopts = {}
341 self._maxdeltachainspan = -1
341 self._maxdeltachainspan = -1
342 self._withsparseread = False
342 self._withsparseread = False
343 self._sparserevlog = False
343 self._sparserevlog = False
344 self._srdensitythreshold = 0.50
344 self._srdensitythreshold = 0.50
345 self._srmingapsize = 262144
345 self._srmingapsize = 262144
346
346
347 # Make copy of flag processors so each revlog instance can support
347 # Make copy of flag processors so each revlog instance can support
348 # custom flags.
348 # custom flags.
349 self._flagprocessors = dict(flagutil.flagprocessors)
349 self._flagprocessors = dict(flagutil.flagprocessors)
350
350
351 # 2-tuple of file handles being used for active writing.
351 # 2-tuple of file handles being used for active writing.
352 self._writinghandles = None
352 self._writinghandles = None
353
353
354 self._loadindex()
354 self._loadindex()
355
355
356 def _loadindex(self):
356 def _loadindex(self):
357 mmapindexthreshold = None
357 mmapindexthreshold = None
358 opts = getattr(self.opener, 'options', {}) or {}
358 opts = self.opener.options
359
359
360 if 'revlogv2' in opts:
360 if 'revlogv2' in opts:
361 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
361 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
362 elif 'revlogv1' in opts:
362 elif 'revlogv1' in opts:
363 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
363 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
364 if 'generaldelta' in opts:
364 if 'generaldelta' in opts:
365 newversionflags |= FLAG_GENERALDELTA
365 newversionflags |= FLAG_GENERALDELTA
366 elif 'revlogv0' in getattr(self.opener, 'options', {}):
366 elif 'revlogv0' in self.opener.options:
367 newversionflags = REVLOGV0
367 newversionflags = REVLOGV0
368 else:
368 else:
369 newversionflags = REVLOG_DEFAULT_VERSION
369 newversionflags = REVLOG_DEFAULT_VERSION
370
370
371 if 'chunkcachesize' in opts:
371 if 'chunkcachesize' in opts:
372 self._chunkcachesize = opts['chunkcachesize']
372 self._chunkcachesize = opts['chunkcachesize']
373 if 'maxchainlen' in opts:
373 if 'maxchainlen' in opts:
374 self._maxchainlen = opts['maxchainlen']
374 self._maxchainlen = opts['maxchainlen']
375 if 'deltabothparents' in opts:
375 if 'deltabothparents' in opts:
376 self._deltabothparents = opts['deltabothparents']
376 self._deltabothparents = opts['deltabothparents']
377 self._lazydelta = bool(opts.get('lazydelta', True))
377 self._lazydelta = bool(opts.get('lazydelta', True))
378 self._lazydeltabase = False
378 self._lazydeltabase = False
379 if self._lazydelta:
379 if self._lazydelta:
380 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
380 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
381 if 'compengine' in opts:
381 if 'compengine' in opts:
382 self._compengine = opts['compengine']
382 self._compengine = opts['compengine']
383 if 'zlib.level' in opts:
383 if 'zlib.level' in opts:
384 self._compengineopts['zlib.level'] = opts['zlib.level']
384 self._compengineopts['zlib.level'] = opts['zlib.level']
385 if 'zstd.level' in opts:
385 if 'zstd.level' in opts:
386 self._compengineopts['zstd.level'] = opts['zstd.level']
386 self._compengineopts['zstd.level'] = opts['zstd.level']
387 if 'maxdeltachainspan' in opts:
387 if 'maxdeltachainspan' in opts:
388 self._maxdeltachainspan = opts['maxdeltachainspan']
388 self._maxdeltachainspan = opts['maxdeltachainspan']
389 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
389 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
390 mmapindexthreshold = opts['mmapindexthreshold']
390 mmapindexthreshold = opts['mmapindexthreshold']
391 self._sparserevlog = bool(opts.get('sparse-revlog', False))
391 self._sparserevlog = bool(opts.get('sparse-revlog', False))
392 withsparseread = bool(opts.get('with-sparse-read', False))
392 withsparseread = bool(opts.get('with-sparse-read', False))
393 # sparse-revlog forces sparse-read
393 # sparse-revlog forces sparse-read
394 self._withsparseread = self._sparserevlog or withsparseread
394 self._withsparseread = self._sparserevlog or withsparseread
395 if 'sparse-read-density-threshold' in opts:
395 if 'sparse-read-density-threshold' in opts:
396 self._srdensitythreshold = opts['sparse-read-density-threshold']
396 self._srdensitythreshold = opts['sparse-read-density-threshold']
397 if 'sparse-read-min-gap-size' in opts:
397 if 'sparse-read-min-gap-size' in opts:
398 self._srmingapsize = opts['sparse-read-min-gap-size']
398 self._srmingapsize = opts['sparse-read-min-gap-size']
399 if opts.get('enableellipsis'):
399 if opts.get('enableellipsis'):
400 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
400 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
401
401
402 # revlog v0 doesn't have flag processors
402 # revlog v0 doesn't have flag processors
403 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
403 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
404 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
404 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
405
405
406 if self._chunkcachesize <= 0:
406 if self._chunkcachesize <= 0:
407 raise error.RevlogError(_('revlog chunk cache size %r is not '
407 raise error.RevlogError(_('revlog chunk cache size %r is not '
408 'greater than 0') % self._chunkcachesize)
408 'greater than 0') % self._chunkcachesize)
409 elif self._chunkcachesize & (self._chunkcachesize - 1):
409 elif self._chunkcachesize & (self._chunkcachesize - 1):
410 raise error.RevlogError(_('revlog chunk cache size %r is not a '
410 raise error.RevlogError(_('revlog chunk cache size %r is not a '
411 'power of 2') % self._chunkcachesize)
411 'power of 2') % self._chunkcachesize)
412
412
413 indexdata = ''
413 indexdata = ''
414 self._initempty = True
414 self._initempty = True
415 try:
415 try:
416 with self._indexfp() as f:
416 with self._indexfp() as f:
417 if (mmapindexthreshold is not None and
417 if (mmapindexthreshold is not None and
418 self.opener.fstat(f).st_size >= mmapindexthreshold):
418 self.opener.fstat(f).st_size >= mmapindexthreshold):
419 # TODO: should .close() to release resources without
419 # TODO: should .close() to release resources without
420 # relying on Python GC
420 # relying on Python GC
421 indexdata = util.buffer(util.mmapread(f))
421 indexdata = util.buffer(util.mmapread(f))
422 else:
422 else:
423 indexdata = f.read()
423 indexdata = f.read()
424 if len(indexdata) > 0:
424 if len(indexdata) > 0:
425 versionflags = versionformat_unpack(indexdata[:4])[0]
425 versionflags = versionformat_unpack(indexdata[:4])[0]
426 self._initempty = False
426 self._initempty = False
427 else:
427 else:
428 versionflags = newversionflags
428 versionflags = newversionflags
429 except IOError as inst:
429 except IOError as inst:
430 if inst.errno != errno.ENOENT:
430 if inst.errno != errno.ENOENT:
431 raise
431 raise
432
432
433 versionflags = newversionflags
433 versionflags = newversionflags
434
434
435 self.version = versionflags
435 self.version = versionflags
436
436
437 flags = versionflags & ~0xFFFF
437 flags = versionflags & ~0xFFFF
438 fmt = versionflags & 0xFFFF
438 fmt = versionflags & 0xFFFF
439
439
440 if fmt == REVLOGV0:
440 if fmt == REVLOGV0:
441 if flags:
441 if flags:
442 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
442 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
443 'revlog %s') %
443 'revlog %s') %
444 (flags >> 16, fmt, self.indexfile))
444 (flags >> 16, fmt, self.indexfile))
445
445
446 self._inline = False
446 self._inline = False
447 self._generaldelta = False
447 self._generaldelta = False
448
448
449 elif fmt == REVLOGV1:
449 elif fmt == REVLOGV1:
450 if flags & ~REVLOGV1_FLAGS:
450 if flags & ~REVLOGV1_FLAGS:
451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
452 'revlog %s') %
452 'revlog %s') %
453 (flags >> 16, fmt, self.indexfile))
453 (flags >> 16, fmt, self.indexfile))
454
454
455 self._inline = versionflags & FLAG_INLINE_DATA
455 self._inline = versionflags & FLAG_INLINE_DATA
456 self._generaldelta = versionflags & FLAG_GENERALDELTA
456 self._generaldelta = versionflags & FLAG_GENERALDELTA
457
457
458 elif fmt == REVLOGV2:
458 elif fmt == REVLOGV2:
459 if flags & ~REVLOGV2_FLAGS:
459 if flags & ~REVLOGV2_FLAGS:
460 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
460 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
461 'revlog %s') %
461 'revlog %s') %
462 (flags >> 16, fmt, self.indexfile))
462 (flags >> 16, fmt, self.indexfile))
463
463
464 self._inline = versionflags & FLAG_INLINE_DATA
464 self._inline = versionflags & FLAG_INLINE_DATA
465 # generaldelta implied by version 2 revlogs.
465 # generaldelta implied by version 2 revlogs.
466 self._generaldelta = True
466 self._generaldelta = True
467
467
468 else:
468 else:
469 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
469 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
470 (fmt, self.indexfile))
470 (fmt, self.indexfile))
471 # sparse-revlog can't be on without general-delta (issue6056)
471 # sparse-revlog can't be on without general-delta (issue6056)
472 if not self._generaldelta:
472 if not self._generaldelta:
473 self._sparserevlog = False
473 self._sparserevlog = False
474
474
475 self._storedeltachains = True
475 self._storedeltachains = True
476
476
477 self._io = revlogio()
477 self._io = revlogio()
478 if self.version == REVLOGV0:
478 if self.version == REVLOGV0:
479 self._io = revlogoldio()
479 self._io = revlogoldio()
480 try:
480 try:
481 d = self._io.parseindex(indexdata, self._inline)
481 d = self._io.parseindex(indexdata, self._inline)
482 except (ValueError, IndexError):
482 except (ValueError, IndexError):
483 raise error.RevlogError(_("index %s is corrupted") %
483 raise error.RevlogError(_("index %s is corrupted") %
484 self.indexfile)
484 self.indexfile)
485 self.index, nodemap, self._chunkcache = d
485 self.index, nodemap, self._chunkcache = d
486 if nodemap is not None:
486 if nodemap is not None:
487 self.nodemap = self._nodecache = nodemap
487 self.nodemap = self._nodecache = nodemap
488 if not self._chunkcache:
488 if not self._chunkcache:
489 self._chunkclear()
489 self._chunkclear()
490 # revnum -> (chain-length, sum-delta-length)
490 # revnum -> (chain-length, sum-delta-length)
491 self._chaininfocache = {}
491 self._chaininfocache = {}
492 # revlog header -> revlog compressor
492 # revlog header -> revlog compressor
493 self._decompressors = {}
493 self._decompressors = {}
494
494
495 @util.propertycache
495 @util.propertycache
496 def _compressor(self):
496 def _compressor(self):
497 engine = util.compengines[self._compengine]
497 engine = util.compengines[self._compengine]
498 return engine.revlogcompressor(self._compengineopts)
498 return engine.revlogcompressor(self._compengineopts)
499
499
500 def _indexfp(self, mode='r'):
500 def _indexfp(self, mode='r'):
501 """file object for the revlog's index file"""
501 """file object for the revlog's index file"""
502 args = {r'mode': mode}
502 args = {r'mode': mode}
503 if mode != 'r':
503 if mode != 'r':
504 args[r'checkambig'] = self._checkambig
504 args[r'checkambig'] = self._checkambig
505 if mode == 'w':
505 if mode == 'w':
506 args[r'atomictemp'] = True
506 args[r'atomictemp'] = True
507 return self.opener(self.indexfile, **args)
507 return self.opener(self.indexfile, **args)
508
508
509 def _datafp(self, mode='r'):
509 def _datafp(self, mode='r'):
510 """file object for the revlog's data file"""
510 """file object for the revlog's data file"""
511 return self.opener(self.datafile, mode=mode)
511 return self.opener(self.datafile, mode=mode)
512
512
513 @contextlib.contextmanager
513 @contextlib.contextmanager
514 def _datareadfp(self, existingfp=None):
514 def _datareadfp(self, existingfp=None):
515 """file object suitable to read data"""
515 """file object suitable to read data"""
516 # Use explicit file handle, if given.
516 # Use explicit file handle, if given.
517 if existingfp is not None:
517 if existingfp is not None:
518 yield existingfp
518 yield existingfp
519
519
520 # Use a file handle being actively used for writes, if available.
520 # Use a file handle being actively used for writes, if available.
521 # There is some danger to doing this because reads will seek the
521 # There is some danger to doing this because reads will seek the
522 # file. However, _writeentry() performs a SEEK_END before all writes,
522 # file. However, _writeentry() performs a SEEK_END before all writes,
523 # so we should be safe.
523 # so we should be safe.
524 elif self._writinghandles:
524 elif self._writinghandles:
525 if self._inline:
525 if self._inline:
526 yield self._writinghandles[0]
526 yield self._writinghandles[0]
527 else:
527 else:
528 yield self._writinghandles[1]
528 yield self._writinghandles[1]
529
529
530 # Otherwise open a new file handle.
530 # Otherwise open a new file handle.
531 else:
531 else:
532 if self._inline:
532 if self._inline:
533 func = self._indexfp
533 func = self._indexfp
534 else:
534 else:
535 func = self._datafp
535 func = self._datafp
536 with func() as fp:
536 with func() as fp:
537 yield fp
537 yield fp
538
538
539 def tip(self):
539 def tip(self):
540 return self.node(len(self.index) - 1)
540 return self.node(len(self.index) - 1)
541 def __contains__(self, rev):
541 def __contains__(self, rev):
542 return 0 <= rev < len(self)
542 return 0 <= rev < len(self)
543 def __len__(self):
543 def __len__(self):
544 return len(self.index)
544 return len(self.index)
545 def __iter__(self):
545 def __iter__(self):
546 return iter(pycompat.xrange(len(self)))
546 return iter(pycompat.xrange(len(self)))
547 def revs(self, start=0, stop=None):
547 def revs(self, start=0, stop=None):
548 """iterate over all rev in this revlog (from start to stop)"""
548 """iterate over all rev in this revlog (from start to stop)"""
549 return storageutil.iterrevs(len(self), start=start, stop=stop)
549 return storageutil.iterrevs(len(self), start=start, stop=stop)
550
550
551 @util.propertycache
551 @util.propertycache
552 def nodemap(self):
552 def nodemap(self):
553 if self.index:
553 if self.index:
554 # populate mapping down to the initial node
554 # populate mapping down to the initial node
555 node0 = self.index[0][7] # get around changelog filtering
555 node0 = self.index[0][7] # get around changelog filtering
556 self.rev(node0)
556 self.rev(node0)
557 return self._nodecache
557 return self._nodecache
558
558
559 def hasnode(self, node):
559 def hasnode(self, node):
560 try:
560 try:
561 self.rev(node)
561 self.rev(node)
562 return True
562 return True
563 except KeyError:
563 except KeyError:
564 return False
564 return False
565
565
566 def candelta(self, baserev, rev):
566 def candelta(self, baserev, rev):
567 """whether two revisions (baserev, rev) can be delta-ed or not"""
567 """whether two revisions (baserev, rev) can be delta-ed or not"""
568 # Disable delta if either rev requires a content-changing flag
568 # Disable delta if either rev requires a content-changing flag
569 # processor (ex. LFS). This is because such flag processor can alter
569 # processor (ex. LFS). This is because such flag processor can alter
570 # the rawtext content that the delta will be based on, and two clients
570 # the rawtext content that the delta will be based on, and two clients
571 # could have a same revlog node with different flags (i.e. different
571 # could have a same revlog node with different flags (i.e. different
572 # rawtext contents) and the delta could be incompatible.
572 # rawtext contents) and the delta could be incompatible.
573 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
573 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
574 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
574 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
575 return False
575 return False
576 return True
576 return True
577
577
578 def clearcaches(self):
578 def clearcaches(self):
579 self._revisioncache = None
579 self._revisioncache = None
580 self._chainbasecache.clear()
580 self._chainbasecache.clear()
581 self._chunkcache = (0, '')
581 self._chunkcache = (0, '')
582 self._pcache = {}
582 self._pcache = {}
583
583
584 try:
584 try:
585 # If we are using the native C version, you are in a fun case
585 # If we are using the native C version, you are in a fun case
586 # where self.index, self.nodemap and self._nodecaches is the same
586 # where self.index, self.nodemap and self._nodecaches is the same
587 # object.
587 # object.
588 self._nodecache.clearcaches()
588 self._nodecache.clearcaches()
589 except AttributeError:
589 except AttributeError:
590 self._nodecache = {nullid: nullrev}
590 self._nodecache = {nullid: nullrev}
591 self._nodepos = None
591 self._nodepos = None
592
592
593 def rev(self, node):
593 def rev(self, node):
594 try:
594 try:
595 return self._nodecache[node]
595 return self._nodecache[node]
596 except TypeError:
596 except TypeError:
597 raise
597 raise
598 except error.RevlogError:
598 except error.RevlogError:
599 # parsers.c radix tree lookup failed
599 # parsers.c radix tree lookup failed
600 if node == wdirid or node in wdirfilenodeids:
600 if node == wdirid or node in wdirfilenodeids:
601 raise error.WdirUnsupported
601 raise error.WdirUnsupported
602 raise error.LookupError(node, self.indexfile, _('no node'))
602 raise error.LookupError(node, self.indexfile, _('no node'))
603 except KeyError:
603 except KeyError:
604 # pure python cache lookup failed
604 # pure python cache lookup failed
605 n = self._nodecache
605 n = self._nodecache
606 i = self.index
606 i = self.index
607 p = self._nodepos
607 p = self._nodepos
608 if p is None:
608 if p is None:
609 p = len(i) - 1
609 p = len(i) - 1
610 else:
610 else:
611 assert p < len(i)
611 assert p < len(i)
612 for r in pycompat.xrange(p, -1, -1):
612 for r in pycompat.xrange(p, -1, -1):
613 v = i[r][7]
613 v = i[r][7]
614 n[v] = r
614 n[v] = r
615 if v == node:
615 if v == node:
616 self._nodepos = r - 1
616 self._nodepos = r - 1
617 return r
617 return r
618 if node == wdirid or node in wdirfilenodeids:
618 if node == wdirid or node in wdirfilenodeids:
619 raise error.WdirUnsupported
619 raise error.WdirUnsupported
620 raise error.LookupError(node, self.indexfile, _('no node'))
620 raise error.LookupError(node, self.indexfile, _('no node'))
621
621
622 # Accessors for index entries.
622 # Accessors for index entries.
623
623
624 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
624 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
625 # are flags.
625 # are flags.
626 def start(self, rev):
626 def start(self, rev):
627 return int(self.index[rev][0] >> 16)
627 return int(self.index[rev][0] >> 16)
628
628
629 def flags(self, rev):
629 def flags(self, rev):
630 return self.index[rev][0] & 0xFFFF
630 return self.index[rev][0] & 0xFFFF
631
631
632 def length(self, rev):
632 def length(self, rev):
633 return self.index[rev][1]
633 return self.index[rev][1]
634
634
635 def rawsize(self, rev):
635 def rawsize(self, rev):
636 """return the length of the uncompressed text for a given revision"""
636 """return the length of the uncompressed text for a given revision"""
637 l = self.index[rev][2]
637 l = self.index[rev][2]
638 if l >= 0:
638 if l >= 0:
639 return l
639 return l
640
640
641 t = self.rawdata(rev)
641 t = self.rawdata(rev)
642 return len(t)
642 return len(t)
643
643
644 def size(self, rev):
644 def size(self, rev):
645 """length of non-raw text (processed by a "read" flag processor)"""
645 """length of non-raw text (processed by a "read" flag processor)"""
646 # fast path: if no "read" flag processor could change the content,
646 # fast path: if no "read" flag processor could change the content,
647 # size is rawsize. note: ELLIPSIS is known to not change the content.
647 # size is rawsize. note: ELLIPSIS is known to not change the content.
648 flags = self.flags(rev)
648 flags = self.flags(rev)
649 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
649 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
650 return self.rawsize(rev)
650 return self.rawsize(rev)
651
651
652 return len(self.revision(rev, raw=False))
652 return len(self.revision(rev, raw=False))
653
653
654 def chainbase(self, rev):
654 def chainbase(self, rev):
655 base = self._chainbasecache.get(rev)
655 base = self._chainbasecache.get(rev)
656 if base is not None:
656 if base is not None:
657 return base
657 return base
658
658
659 index = self.index
659 index = self.index
660 iterrev = rev
660 iterrev = rev
661 base = index[iterrev][3]
661 base = index[iterrev][3]
662 while base != iterrev:
662 while base != iterrev:
663 iterrev = base
663 iterrev = base
664 base = index[iterrev][3]
664 base = index[iterrev][3]
665
665
666 self._chainbasecache[rev] = base
666 self._chainbasecache[rev] = base
667 return base
667 return base
668
668
669 def linkrev(self, rev):
669 def linkrev(self, rev):
670 return self.index[rev][4]
670 return self.index[rev][4]
671
671
672 def parentrevs(self, rev):
672 def parentrevs(self, rev):
673 try:
673 try:
674 entry = self.index[rev]
674 entry = self.index[rev]
675 except IndexError:
675 except IndexError:
676 if rev == wdirrev:
676 if rev == wdirrev:
677 raise error.WdirUnsupported
677 raise error.WdirUnsupported
678 raise
678 raise
679
679
680 return entry[5], entry[6]
680 return entry[5], entry[6]
681
681
682 # fast parentrevs(rev) where rev isn't filtered
682 # fast parentrevs(rev) where rev isn't filtered
683 _uncheckedparentrevs = parentrevs
683 _uncheckedparentrevs = parentrevs
684
684
685 def node(self, rev):
685 def node(self, rev):
686 try:
686 try:
687 return self.index[rev][7]
687 return self.index[rev][7]
688 except IndexError:
688 except IndexError:
689 if rev == wdirrev:
689 if rev == wdirrev:
690 raise error.WdirUnsupported
690 raise error.WdirUnsupported
691 raise
691 raise
692
692
693 # Derived from index values.
693 # Derived from index values.
694
694
695 def end(self, rev):
695 def end(self, rev):
696 return self.start(rev) + self.length(rev)
696 return self.start(rev) + self.length(rev)
697
697
698 def parents(self, node):
698 def parents(self, node):
699 i = self.index
699 i = self.index
700 d = i[self.rev(node)]
700 d = i[self.rev(node)]
701 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
701 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
702
702
703 def chainlen(self, rev):
703 def chainlen(self, rev):
704 return self._chaininfo(rev)[0]
704 return self._chaininfo(rev)[0]
705
705
706 def _chaininfo(self, rev):
706 def _chaininfo(self, rev):
707 chaininfocache = self._chaininfocache
707 chaininfocache = self._chaininfocache
708 if rev in chaininfocache:
708 if rev in chaininfocache:
709 return chaininfocache[rev]
709 return chaininfocache[rev]
710 index = self.index
710 index = self.index
711 generaldelta = self._generaldelta
711 generaldelta = self._generaldelta
712 iterrev = rev
712 iterrev = rev
713 e = index[iterrev]
713 e = index[iterrev]
714 clen = 0
714 clen = 0
715 compresseddeltalen = 0
715 compresseddeltalen = 0
716 while iterrev != e[3]:
716 while iterrev != e[3]:
717 clen += 1
717 clen += 1
718 compresseddeltalen += e[1]
718 compresseddeltalen += e[1]
719 if generaldelta:
719 if generaldelta:
720 iterrev = e[3]
720 iterrev = e[3]
721 else:
721 else:
722 iterrev -= 1
722 iterrev -= 1
723 if iterrev in chaininfocache:
723 if iterrev in chaininfocache:
724 t = chaininfocache[iterrev]
724 t = chaininfocache[iterrev]
725 clen += t[0]
725 clen += t[0]
726 compresseddeltalen += t[1]
726 compresseddeltalen += t[1]
727 break
727 break
728 e = index[iterrev]
728 e = index[iterrev]
729 else:
729 else:
730 # Add text length of base since decompressing that also takes
730 # Add text length of base since decompressing that also takes
731 # work. For cache hits the length is already included.
731 # work. For cache hits the length is already included.
732 compresseddeltalen += e[1]
732 compresseddeltalen += e[1]
733 r = (clen, compresseddeltalen)
733 r = (clen, compresseddeltalen)
734 chaininfocache[rev] = r
734 chaininfocache[rev] = r
735 return r
735 return r
736
736
737 def _deltachain(self, rev, stoprev=None):
737 def _deltachain(self, rev, stoprev=None):
738 """Obtain the delta chain for a revision.
738 """Obtain the delta chain for a revision.
739
739
740 ``stoprev`` specifies a revision to stop at. If not specified, we
740 ``stoprev`` specifies a revision to stop at. If not specified, we
741 stop at the base of the chain.
741 stop at the base of the chain.
742
742
743 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
743 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
744 revs in ascending order and ``stopped`` is a bool indicating whether
744 revs in ascending order and ``stopped`` is a bool indicating whether
745 ``stoprev`` was hit.
745 ``stoprev`` was hit.
746 """
746 """
747 # Try C implementation.
747 # Try C implementation.
748 try:
748 try:
749 return self.index.deltachain(rev, stoprev, self._generaldelta)
749 return self.index.deltachain(rev, stoprev, self._generaldelta)
750 except AttributeError:
750 except AttributeError:
751 pass
751 pass
752
752
753 chain = []
753 chain = []
754
754
755 # Alias to prevent attribute lookup in tight loop.
755 # Alias to prevent attribute lookup in tight loop.
756 index = self.index
756 index = self.index
757 generaldelta = self._generaldelta
757 generaldelta = self._generaldelta
758
758
759 iterrev = rev
759 iterrev = rev
760 e = index[iterrev]
760 e = index[iterrev]
761 while iterrev != e[3] and iterrev != stoprev:
761 while iterrev != e[3] and iterrev != stoprev:
762 chain.append(iterrev)
762 chain.append(iterrev)
763 if generaldelta:
763 if generaldelta:
764 iterrev = e[3]
764 iterrev = e[3]
765 else:
765 else:
766 iterrev -= 1
766 iterrev -= 1
767 e = index[iterrev]
767 e = index[iterrev]
768
768
769 if iterrev == stoprev:
769 if iterrev == stoprev:
770 stopped = True
770 stopped = True
771 else:
771 else:
772 chain.append(iterrev)
772 chain.append(iterrev)
773 stopped = False
773 stopped = False
774
774
775 chain.reverse()
775 chain.reverse()
776 return chain, stopped
776 return chain, stopped
777
777
778 def ancestors(self, revs, stoprev=0, inclusive=False):
778 def ancestors(self, revs, stoprev=0, inclusive=False):
779 """Generate the ancestors of 'revs' in reverse revision order.
779 """Generate the ancestors of 'revs' in reverse revision order.
780 Does not generate revs lower than stoprev.
780 Does not generate revs lower than stoprev.
781
781
782 See the documentation for ancestor.lazyancestors for more details."""
782 See the documentation for ancestor.lazyancestors for more details."""
783
783
784 # first, make sure start revisions aren't filtered
784 # first, make sure start revisions aren't filtered
785 revs = list(revs)
785 revs = list(revs)
786 checkrev = self.node
786 checkrev = self.node
787 for r in revs:
787 for r in revs:
788 checkrev(r)
788 checkrev(r)
789 # and we're sure ancestors aren't filtered as well
789 # and we're sure ancestors aren't filtered as well
790
790
791 if rustancestor is not None:
791 if rustancestor is not None:
792 lazyancestors = rustancestor.LazyAncestors
792 lazyancestors = rustancestor.LazyAncestors
793 arg = self.index
793 arg = self.index
794 elif util.safehasattr(parsers, 'rustlazyancestors'):
794 elif util.safehasattr(parsers, 'rustlazyancestors'):
795 lazyancestors = ancestor.rustlazyancestors
795 lazyancestors = ancestor.rustlazyancestors
796 arg = self.index
796 arg = self.index
797 else:
797 else:
798 lazyancestors = ancestor.lazyancestors
798 lazyancestors = ancestor.lazyancestors
799 arg = self._uncheckedparentrevs
799 arg = self._uncheckedparentrevs
800 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
800 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
801
801
802 def descendants(self, revs):
802 def descendants(self, revs):
803 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
803 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
804
804
805 def findcommonmissing(self, common=None, heads=None):
805 def findcommonmissing(self, common=None, heads=None):
806 """Return a tuple of the ancestors of common and the ancestors of heads
806 """Return a tuple of the ancestors of common and the ancestors of heads
807 that are not ancestors of common. In revset terminology, we return the
807 that are not ancestors of common. In revset terminology, we return the
808 tuple:
808 tuple:
809
809
810 ::common, (::heads) - (::common)
810 ::common, (::heads) - (::common)
811
811
812 The list is sorted by revision number, meaning it is
812 The list is sorted by revision number, meaning it is
813 topologically sorted.
813 topologically sorted.
814
814
815 'heads' and 'common' are both lists of node IDs. If heads is
815 'heads' and 'common' are both lists of node IDs. If heads is
816 not supplied, uses all of the revlog's heads. If common is not
816 not supplied, uses all of the revlog's heads. If common is not
817 supplied, uses nullid."""
817 supplied, uses nullid."""
818 if common is None:
818 if common is None:
819 common = [nullid]
819 common = [nullid]
820 if heads is None:
820 if heads is None:
821 heads = self.heads()
821 heads = self.heads()
822
822
823 common = [self.rev(n) for n in common]
823 common = [self.rev(n) for n in common]
824 heads = [self.rev(n) for n in heads]
824 heads = [self.rev(n) for n in heads]
825
825
826 # we want the ancestors, but inclusive
826 # we want the ancestors, but inclusive
827 class lazyset(object):
827 class lazyset(object):
828 def __init__(self, lazyvalues):
828 def __init__(self, lazyvalues):
829 self.addedvalues = set()
829 self.addedvalues = set()
830 self.lazyvalues = lazyvalues
830 self.lazyvalues = lazyvalues
831
831
832 def __contains__(self, value):
832 def __contains__(self, value):
833 return value in self.addedvalues or value in self.lazyvalues
833 return value in self.addedvalues or value in self.lazyvalues
834
834
835 def __iter__(self):
835 def __iter__(self):
836 added = self.addedvalues
836 added = self.addedvalues
837 for r in added:
837 for r in added:
838 yield r
838 yield r
839 for r in self.lazyvalues:
839 for r in self.lazyvalues:
840 if not r in added:
840 if not r in added:
841 yield r
841 yield r
842
842
843 def add(self, value):
843 def add(self, value):
844 self.addedvalues.add(value)
844 self.addedvalues.add(value)
845
845
846 def update(self, values):
846 def update(self, values):
847 self.addedvalues.update(values)
847 self.addedvalues.update(values)
848
848
849 has = lazyset(self.ancestors(common))
849 has = lazyset(self.ancestors(common))
850 has.add(nullrev)
850 has.add(nullrev)
851 has.update(common)
851 has.update(common)
852
852
853 # take all ancestors from heads that aren't in has
853 # take all ancestors from heads that aren't in has
854 missing = set()
854 missing = set()
855 visit = collections.deque(r for r in heads if r not in has)
855 visit = collections.deque(r for r in heads if r not in has)
856 while visit:
856 while visit:
857 r = visit.popleft()
857 r = visit.popleft()
858 if r in missing:
858 if r in missing:
859 continue
859 continue
860 else:
860 else:
861 missing.add(r)
861 missing.add(r)
862 for p in self.parentrevs(r):
862 for p in self.parentrevs(r):
863 if p not in has:
863 if p not in has:
864 visit.append(p)
864 visit.append(p)
865 missing = list(missing)
865 missing = list(missing)
866 missing.sort()
866 missing.sort()
867 return has, [self.node(miss) for miss in missing]
867 return has, [self.node(miss) for miss in missing]
868
868
869 def incrementalmissingrevs(self, common=None):
869 def incrementalmissingrevs(self, common=None):
870 """Return an object that can be used to incrementally compute the
870 """Return an object that can be used to incrementally compute the
871 revision numbers of the ancestors of arbitrary sets that are not
871 revision numbers of the ancestors of arbitrary sets that are not
872 ancestors of common. This is an ancestor.incrementalmissingancestors
872 ancestors of common. This is an ancestor.incrementalmissingancestors
873 object.
873 object.
874
874
875 'common' is a list of revision numbers. If common is not supplied, uses
875 'common' is a list of revision numbers. If common is not supplied, uses
876 nullrev.
876 nullrev.
877 """
877 """
878 if common is None:
878 if common is None:
879 common = [nullrev]
879 common = [nullrev]
880
880
881 if rustancestor is not None:
881 if rustancestor is not None:
882 return rustancestor.MissingAncestors(self.index, common)
882 return rustancestor.MissingAncestors(self.index, common)
883 return ancestor.incrementalmissingancestors(self.parentrevs, common)
883 return ancestor.incrementalmissingancestors(self.parentrevs, common)
884
884
885 def findmissingrevs(self, common=None, heads=None):
885 def findmissingrevs(self, common=None, heads=None):
886 """Return the revision numbers of the ancestors of heads that
886 """Return the revision numbers of the ancestors of heads that
887 are not ancestors of common.
887 are not ancestors of common.
888
888
889 More specifically, return a list of revision numbers corresponding to
889 More specifically, return a list of revision numbers corresponding to
890 nodes N such that every N satisfies the following constraints:
890 nodes N such that every N satisfies the following constraints:
891
891
892 1. N is an ancestor of some node in 'heads'
892 1. N is an ancestor of some node in 'heads'
893 2. N is not an ancestor of any node in 'common'
893 2. N is not an ancestor of any node in 'common'
894
894
895 The list is sorted by revision number, meaning it is
895 The list is sorted by revision number, meaning it is
896 topologically sorted.
896 topologically sorted.
897
897
898 'heads' and 'common' are both lists of revision numbers. If heads is
898 'heads' and 'common' are both lists of revision numbers. If heads is
899 not supplied, uses all of the revlog's heads. If common is not
899 not supplied, uses all of the revlog's heads. If common is not
900 supplied, uses nullid."""
900 supplied, uses nullid."""
901 if common is None:
901 if common is None:
902 common = [nullrev]
902 common = [nullrev]
903 if heads is None:
903 if heads is None:
904 heads = self.headrevs()
904 heads = self.headrevs()
905
905
906 inc = self.incrementalmissingrevs(common=common)
906 inc = self.incrementalmissingrevs(common=common)
907 return inc.missingancestors(heads)
907 return inc.missingancestors(heads)
908
908
909 def findmissing(self, common=None, heads=None):
909 def findmissing(self, common=None, heads=None):
910 """Return the ancestors of heads that are not ancestors of common.
910 """Return the ancestors of heads that are not ancestors of common.
911
911
912 More specifically, return a list of nodes N such that every N
912 More specifically, return a list of nodes N such that every N
913 satisfies the following constraints:
913 satisfies the following constraints:
914
914
915 1. N is an ancestor of some node in 'heads'
915 1. N is an ancestor of some node in 'heads'
916 2. N is not an ancestor of any node in 'common'
916 2. N is not an ancestor of any node in 'common'
917
917
918 The list is sorted by revision number, meaning it is
918 The list is sorted by revision number, meaning it is
919 topologically sorted.
919 topologically sorted.
920
920
921 'heads' and 'common' are both lists of node IDs. If heads is
921 'heads' and 'common' are both lists of node IDs. If heads is
922 not supplied, uses all of the revlog's heads. If common is not
922 not supplied, uses all of the revlog's heads. If common is not
923 supplied, uses nullid."""
923 supplied, uses nullid."""
924 if common is None:
924 if common is None:
925 common = [nullid]
925 common = [nullid]
926 if heads is None:
926 if heads is None:
927 heads = self.heads()
927 heads = self.heads()
928
928
929 common = [self.rev(n) for n in common]
929 common = [self.rev(n) for n in common]
930 heads = [self.rev(n) for n in heads]
930 heads = [self.rev(n) for n in heads]
931
931
932 inc = self.incrementalmissingrevs(common=common)
932 inc = self.incrementalmissingrevs(common=common)
933 return [self.node(r) for r in inc.missingancestors(heads)]
933 return [self.node(r) for r in inc.missingancestors(heads)]
934
934
935 def nodesbetween(self, roots=None, heads=None):
935 def nodesbetween(self, roots=None, heads=None):
936 """Return a topological path from 'roots' to 'heads'.
936 """Return a topological path from 'roots' to 'heads'.
937
937
938 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
938 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
939 topologically sorted list of all nodes N that satisfy both of
939 topologically sorted list of all nodes N that satisfy both of
940 these constraints:
940 these constraints:
941
941
942 1. N is a descendant of some node in 'roots'
942 1. N is a descendant of some node in 'roots'
943 2. N is an ancestor of some node in 'heads'
943 2. N is an ancestor of some node in 'heads'
944
944
945 Every node is considered to be both a descendant and an ancestor
945 Every node is considered to be both a descendant and an ancestor
946 of itself, so every reachable node in 'roots' and 'heads' will be
946 of itself, so every reachable node in 'roots' and 'heads' will be
947 included in 'nodes'.
947 included in 'nodes'.
948
948
949 'outroots' is the list of reachable nodes in 'roots', i.e., the
949 'outroots' is the list of reachable nodes in 'roots', i.e., the
950 subset of 'roots' that is returned in 'nodes'. Likewise,
950 subset of 'roots' that is returned in 'nodes'. Likewise,
951 'outheads' is the subset of 'heads' that is also in 'nodes'.
951 'outheads' is the subset of 'heads' that is also in 'nodes'.
952
952
953 'roots' and 'heads' are both lists of node IDs. If 'roots' is
953 'roots' and 'heads' are both lists of node IDs. If 'roots' is
954 unspecified, uses nullid as the only root. If 'heads' is
954 unspecified, uses nullid as the only root. If 'heads' is
955 unspecified, uses list of all of the revlog's heads."""
955 unspecified, uses list of all of the revlog's heads."""
956 nonodes = ([], [], [])
956 nonodes = ([], [], [])
957 if roots is not None:
957 if roots is not None:
958 roots = list(roots)
958 roots = list(roots)
959 if not roots:
959 if not roots:
960 return nonodes
960 return nonodes
961 lowestrev = min([self.rev(n) for n in roots])
961 lowestrev = min([self.rev(n) for n in roots])
962 else:
962 else:
963 roots = [nullid] # Everybody's a descendant of nullid
963 roots = [nullid] # Everybody's a descendant of nullid
964 lowestrev = nullrev
964 lowestrev = nullrev
965 if (lowestrev == nullrev) and (heads is None):
965 if (lowestrev == nullrev) and (heads is None):
966 # We want _all_ the nodes!
966 # We want _all_ the nodes!
967 return ([self.node(r) for r in self], [nullid], list(self.heads()))
967 return ([self.node(r) for r in self], [nullid], list(self.heads()))
968 if heads is None:
968 if heads is None:
969 # All nodes are ancestors, so the latest ancestor is the last
969 # All nodes are ancestors, so the latest ancestor is the last
970 # node.
970 # node.
971 highestrev = len(self) - 1
971 highestrev = len(self) - 1
972 # Set ancestors to None to signal that every node is an ancestor.
972 # Set ancestors to None to signal that every node is an ancestor.
973 ancestors = None
973 ancestors = None
974 # Set heads to an empty dictionary for later discovery of heads
974 # Set heads to an empty dictionary for later discovery of heads
975 heads = {}
975 heads = {}
976 else:
976 else:
977 heads = list(heads)
977 heads = list(heads)
978 if not heads:
978 if not heads:
979 return nonodes
979 return nonodes
980 ancestors = set()
980 ancestors = set()
981 # Turn heads into a dictionary so we can remove 'fake' heads.
981 # Turn heads into a dictionary so we can remove 'fake' heads.
982 # Also, later we will be using it to filter out the heads we can't
982 # Also, later we will be using it to filter out the heads we can't
983 # find from roots.
983 # find from roots.
984 heads = dict.fromkeys(heads, False)
984 heads = dict.fromkeys(heads, False)
985 # Start at the top and keep marking parents until we're done.
985 # Start at the top and keep marking parents until we're done.
986 nodestotag = set(heads)
986 nodestotag = set(heads)
987 # Remember where the top was so we can use it as a limit later.
987 # Remember where the top was so we can use it as a limit later.
988 highestrev = max([self.rev(n) for n in nodestotag])
988 highestrev = max([self.rev(n) for n in nodestotag])
989 while nodestotag:
989 while nodestotag:
990 # grab a node to tag
990 # grab a node to tag
991 n = nodestotag.pop()
991 n = nodestotag.pop()
992 # Never tag nullid
992 # Never tag nullid
993 if n == nullid:
993 if n == nullid:
994 continue
994 continue
995 # A node's revision number represents its place in a
995 # A node's revision number represents its place in a
996 # topologically sorted list of nodes.
996 # topologically sorted list of nodes.
997 r = self.rev(n)
997 r = self.rev(n)
998 if r >= lowestrev:
998 if r >= lowestrev:
999 if n not in ancestors:
999 if n not in ancestors:
1000 # If we are possibly a descendant of one of the roots
1000 # If we are possibly a descendant of one of the roots
1001 # and we haven't already been marked as an ancestor
1001 # and we haven't already been marked as an ancestor
1002 ancestors.add(n) # Mark as ancestor
1002 ancestors.add(n) # Mark as ancestor
1003 # Add non-nullid parents to list of nodes to tag.
1003 # Add non-nullid parents to list of nodes to tag.
1004 nodestotag.update([p for p in self.parents(n) if
1004 nodestotag.update([p for p in self.parents(n) if
1005 p != nullid])
1005 p != nullid])
1006 elif n in heads: # We've seen it before, is it a fake head?
1006 elif n in heads: # We've seen it before, is it a fake head?
1007 # So it is, real heads should not be the ancestors of
1007 # So it is, real heads should not be the ancestors of
1008 # any other heads.
1008 # any other heads.
1009 heads.pop(n)
1009 heads.pop(n)
1010 if not ancestors:
1010 if not ancestors:
1011 return nonodes
1011 return nonodes
1012 # Now that we have our set of ancestors, we want to remove any
1012 # Now that we have our set of ancestors, we want to remove any
1013 # roots that are not ancestors.
1013 # roots that are not ancestors.
1014
1014
1015 # If one of the roots was nullid, everything is included anyway.
1015 # If one of the roots was nullid, everything is included anyway.
1016 if lowestrev > nullrev:
1016 if lowestrev > nullrev:
1017 # But, since we weren't, let's recompute the lowest rev to not
1017 # But, since we weren't, let's recompute the lowest rev to not
1018 # include roots that aren't ancestors.
1018 # include roots that aren't ancestors.
1019
1019
1020 # Filter out roots that aren't ancestors of heads
1020 # Filter out roots that aren't ancestors of heads
1021 roots = [root for root in roots if root in ancestors]
1021 roots = [root for root in roots if root in ancestors]
1022 # Recompute the lowest revision
1022 # Recompute the lowest revision
1023 if roots:
1023 if roots:
1024 lowestrev = min([self.rev(root) for root in roots])
1024 lowestrev = min([self.rev(root) for root in roots])
1025 else:
1025 else:
1026 # No more roots? Return empty list
1026 # No more roots? Return empty list
1027 return nonodes
1027 return nonodes
1028 else:
1028 else:
1029 # We are descending from nullid, and don't need to care about
1029 # We are descending from nullid, and don't need to care about
1030 # any other roots.
1030 # any other roots.
1031 lowestrev = nullrev
1031 lowestrev = nullrev
1032 roots = [nullid]
1032 roots = [nullid]
1033 # Transform our roots list into a set.
1033 # Transform our roots list into a set.
1034 descendants = set(roots)
1034 descendants = set(roots)
1035 # Also, keep the original roots so we can filter out roots that aren't
1035 # Also, keep the original roots so we can filter out roots that aren't
1036 # 'real' roots (i.e. are descended from other roots).
1036 # 'real' roots (i.e. are descended from other roots).
1037 roots = descendants.copy()
1037 roots = descendants.copy()
1038 # Our topologically sorted list of output nodes.
1038 # Our topologically sorted list of output nodes.
1039 orderedout = []
1039 orderedout = []
1040 # Don't start at nullid since we don't want nullid in our output list,
1040 # Don't start at nullid since we don't want nullid in our output list,
1041 # and if nullid shows up in descendants, empty parents will look like
1041 # and if nullid shows up in descendants, empty parents will look like
1042 # they're descendants.
1042 # they're descendants.
1043 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1043 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1044 n = self.node(r)
1044 n = self.node(r)
1045 isdescendant = False
1045 isdescendant = False
1046 if lowestrev == nullrev: # Everybody is a descendant of nullid
1046 if lowestrev == nullrev: # Everybody is a descendant of nullid
1047 isdescendant = True
1047 isdescendant = True
1048 elif n in descendants:
1048 elif n in descendants:
1049 # n is already a descendant
1049 # n is already a descendant
1050 isdescendant = True
1050 isdescendant = True
1051 # This check only needs to be done here because all the roots
1051 # This check only needs to be done here because all the roots
1052 # will start being marked is descendants before the loop.
1052 # will start being marked is descendants before the loop.
1053 if n in roots:
1053 if n in roots:
1054 # If n was a root, check if it's a 'real' root.
1054 # If n was a root, check if it's a 'real' root.
1055 p = tuple(self.parents(n))
1055 p = tuple(self.parents(n))
1056 # If any of its parents are descendants, it's not a root.
1056 # If any of its parents are descendants, it's not a root.
1057 if (p[0] in descendants) or (p[1] in descendants):
1057 if (p[0] in descendants) or (p[1] in descendants):
1058 roots.remove(n)
1058 roots.remove(n)
1059 else:
1059 else:
1060 p = tuple(self.parents(n))
1060 p = tuple(self.parents(n))
1061 # A node is a descendant if either of its parents are
1061 # A node is a descendant if either of its parents are
1062 # descendants. (We seeded the dependents list with the roots
1062 # descendants. (We seeded the dependents list with the roots
1063 # up there, remember?)
1063 # up there, remember?)
1064 if (p[0] in descendants) or (p[1] in descendants):
1064 if (p[0] in descendants) or (p[1] in descendants):
1065 descendants.add(n)
1065 descendants.add(n)
1066 isdescendant = True
1066 isdescendant = True
1067 if isdescendant and ((ancestors is None) or (n in ancestors)):
1067 if isdescendant and ((ancestors is None) or (n in ancestors)):
1068 # Only include nodes that are both descendants and ancestors.
1068 # Only include nodes that are both descendants and ancestors.
1069 orderedout.append(n)
1069 orderedout.append(n)
1070 if (ancestors is not None) and (n in heads):
1070 if (ancestors is not None) and (n in heads):
1071 # We're trying to figure out which heads are reachable
1071 # We're trying to figure out which heads are reachable
1072 # from roots.
1072 # from roots.
1073 # Mark this head as having been reached
1073 # Mark this head as having been reached
1074 heads[n] = True
1074 heads[n] = True
1075 elif ancestors is None:
1075 elif ancestors is None:
1076 # Otherwise, we're trying to discover the heads.
1076 # Otherwise, we're trying to discover the heads.
1077 # Assume this is a head because if it isn't, the next step
1077 # Assume this is a head because if it isn't, the next step
1078 # will eventually remove it.
1078 # will eventually remove it.
1079 heads[n] = True
1079 heads[n] = True
1080 # But, obviously its parents aren't.
1080 # But, obviously its parents aren't.
1081 for p in self.parents(n):
1081 for p in self.parents(n):
1082 heads.pop(p, None)
1082 heads.pop(p, None)
1083 heads = [head for head, flag in heads.iteritems() if flag]
1083 heads = [head for head, flag in heads.iteritems() if flag]
1084 roots = list(roots)
1084 roots = list(roots)
1085 assert orderedout
1085 assert orderedout
1086 assert roots
1086 assert roots
1087 assert heads
1087 assert heads
1088 return (orderedout, roots, heads)
1088 return (orderedout, roots, heads)
1089
1089
1090 def headrevs(self, revs=None):
1090 def headrevs(self, revs=None):
1091 if revs is None:
1091 if revs is None:
1092 try:
1092 try:
1093 return self.index.headrevs()
1093 return self.index.headrevs()
1094 except AttributeError:
1094 except AttributeError:
1095 return self._headrevs()
1095 return self._headrevs()
1096 if rustdagop is not None:
1096 if rustdagop is not None:
1097 return rustdagop.headrevs(self.index, revs)
1097 return rustdagop.headrevs(self.index, revs)
1098 return dagop.headrevs(revs, self._uncheckedparentrevs)
1098 return dagop.headrevs(revs, self._uncheckedparentrevs)
1099
1099
1100 def computephases(self, roots):
1100 def computephases(self, roots):
1101 return self.index.computephasesmapsets(roots)
1101 return self.index.computephasesmapsets(roots)
1102
1102
1103 def _headrevs(self):
1103 def _headrevs(self):
1104 count = len(self)
1104 count = len(self)
1105 if not count:
1105 if not count:
1106 return [nullrev]
1106 return [nullrev]
1107 # we won't iter over filtered rev so nobody is a head at start
1107 # we won't iter over filtered rev so nobody is a head at start
1108 ishead = [0] * (count + 1)
1108 ishead = [0] * (count + 1)
1109 index = self.index
1109 index = self.index
1110 for r in self:
1110 for r in self:
1111 ishead[r] = 1 # I may be an head
1111 ishead[r] = 1 # I may be an head
1112 e = index[r]
1112 e = index[r]
1113 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1113 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1114 return [r for r, val in enumerate(ishead) if val]
1114 return [r for r, val in enumerate(ishead) if val]
1115
1115
1116 def heads(self, start=None, stop=None):
1116 def heads(self, start=None, stop=None):
1117 """return the list of all nodes that have no children
1117 """return the list of all nodes that have no children
1118
1118
1119 if start is specified, only heads that are descendants of
1119 if start is specified, only heads that are descendants of
1120 start will be returned
1120 start will be returned
1121 if stop is specified, it will consider all the revs from stop
1121 if stop is specified, it will consider all the revs from stop
1122 as if they had no children
1122 as if they had no children
1123 """
1123 """
1124 if start is None and stop is None:
1124 if start is None and stop is None:
1125 if not len(self):
1125 if not len(self):
1126 return [nullid]
1126 return [nullid]
1127 return [self.node(r) for r in self.headrevs()]
1127 return [self.node(r) for r in self.headrevs()]
1128
1128
1129 if start is None:
1129 if start is None:
1130 start = nullrev
1130 start = nullrev
1131 else:
1131 else:
1132 start = self.rev(start)
1132 start = self.rev(start)
1133
1133
1134 stoprevs = set(self.rev(n) for n in stop or [])
1134 stoprevs = set(self.rev(n) for n in stop or [])
1135
1135
1136 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1136 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1137 stoprevs=stoprevs)
1137 stoprevs=stoprevs)
1138
1138
1139 return [self.node(rev) for rev in revs]
1139 return [self.node(rev) for rev in revs]
1140
1140
1141 def children(self, node):
1141 def children(self, node):
1142 """find the children of a given node"""
1142 """find the children of a given node"""
1143 c = []
1143 c = []
1144 p = self.rev(node)
1144 p = self.rev(node)
1145 for r in self.revs(start=p + 1):
1145 for r in self.revs(start=p + 1):
1146 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1146 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1147 if prevs:
1147 if prevs:
1148 for pr in prevs:
1148 for pr in prevs:
1149 if pr == p:
1149 if pr == p:
1150 c.append(self.node(r))
1150 c.append(self.node(r))
1151 elif p == nullrev:
1151 elif p == nullrev:
1152 c.append(self.node(r))
1152 c.append(self.node(r))
1153 return c
1153 return c
1154
1154
1155 def commonancestorsheads(self, a, b):
1155 def commonancestorsheads(self, a, b):
1156 """calculate all the heads of the common ancestors of nodes a and b"""
1156 """calculate all the heads of the common ancestors of nodes a and b"""
1157 a, b = self.rev(a), self.rev(b)
1157 a, b = self.rev(a), self.rev(b)
1158 ancs = self._commonancestorsheads(a, b)
1158 ancs = self._commonancestorsheads(a, b)
1159 return pycompat.maplist(self.node, ancs)
1159 return pycompat.maplist(self.node, ancs)
1160
1160
1161 def _commonancestorsheads(self, *revs):
1161 def _commonancestorsheads(self, *revs):
1162 """calculate all the heads of the common ancestors of revs"""
1162 """calculate all the heads of the common ancestors of revs"""
1163 try:
1163 try:
1164 ancs = self.index.commonancestorsheads(*revs)
1164 ancs = self.index.commonancestorsheads(*revs)
1165 except (AttributeError, OverflowError): # C implementation failed
1165 except (AttributeError, OverflowError): # C implementation failed
1166 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1166 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1167 return ancs
1167 return ancs
1168
1168
1169 def isancestor(self, a, b):
1169 def isancestor(self, a, b):
1170 """return True if node a is an ancestor of node b
1170 """return True if node a is an ancestor of node b
1171
1171
1172 A revision is considered an ancestor of itself."""
1172 A revision is considered an ancestor of itself."""
1173 a, b = self.rev(a), self.rev(b)
1173 a, b = self.rev(a), self.rev(b)
1174 return self.isancestorrev(a, b)
1174 return self.isancestorrev(a, b)
1175
1175
1176 def isancestorrev(self, a, b):
1176 def isancestorrev(self, a, b):
1177 """return True if revision a is an ancestor of revision b
1177 """return True if revision a is an ancestor of revision b
1178
1178
1179 A revision is considered an ancestor of itself.
1179 A revision is considered an ancestor of itself.
1180
1180
1181 The implementation of this is trivial but the use of
1181 The implementation of this is trivial but the use of
1182 reachableroots is not."""
1182 reachableroots is not."""
1183 if a == nullrev:
1183 if a == nullrev:
1184 return True
1184 return True
1185 elif a == b:
1185 elif a == b:
1186 return True
1186 return True
1187 elif a > b:
1187 elif a > b:
1188 return False
1188 return False
1189 return bool(self.reachableroots(a, [b], [a], includepath=False))
1189 return bool(self.reachableroots(a, [b], [a], includepath=False))
1190
1190
1191 def reachableroots(self, minroot, heads, roots, includepath=False):
1191 def reachableroots(self, minroot, heads, roots, includepath=False):
1192 """return (heads(::<roots> and <roots>::<heads>))
1192 """return (heads(::<roots> and <roots>::<heads>))
1193
1193
1194 If includepath is True, return (<roots>::<heads>)."""
1194 If includepath is True, return (<roots>::<heads>)."""
1195 try:
1195 try:
1196 return self.index.reachableroots2(minroot, heads, roots,
1196 return self.index.reachableroots2(minroot, heads, roots,
1197 includepath)
1197 includepath)
1198 except AttributeError:
1198 except AttributeError:
1199 return dagop._reachablerootspure(self.parentrevs,
1199 return dagop._reachablerootspure(self.parentrevs,
1200 minroot, roots, heads, includepath)
1200 minroot, roots, heads, includepath)
1201
1201
1202 def ancestor(self, a, b):
1202 def ancestor(self, a, b):
1203 """calculate the "best" common ancestor of nodes a and b"""
1203 """calculate the "best" common ancestor of nodes a and b"""
1204
1204
1205 a, b = self.rev(a), self.rev(b)
1205 a, b = self.rev(a), self.rev(b)
1206 try:
1206 try:
1207 ancs = self.index.ancestors(a, b)
1207 ancs = self.index.ancestors(a, b)
1208 except (AttributeError, OverflowError):
1208 except (AttributeError, OverflowError):
1209 ancs = ancestor.ancestors(self.parentrevs, a, b)
1209 ancs = ancestor.ancestors(self.parentrevs, a, b)
1210 if ancs:
1210 if ancs:
1211 # choose a consistent winner when there's a tie
1211 # choose a consistent winner when there's a tie
1212 return min(map(self.node, ancs))
1212 return min(map(self.node, ancs))
1213 return nullid
1213 return nullid
1214
1214
1215 def _match(self, id):
1215 def _match(self, id):
1216 if isinstance(id, int):
1216 if isinstance(id, int):
1217 # rev
1217 # rev
1218 return self.node(id)
1218 return self.node(id)
1219 if len(id) == 20:
1219 if len(id) == 20:
1220 # possibly a binary node
1220 # possibly a binary node
1221 # odds of a binary node being all hex in ASCII are 1 in 10**25
1221 # odds of a binary node being all hex in ASCII are 1 in 10**25
1222 try:
1222 try:
1223 node = id
1223 node = id
1224 self.rev(node) # quick search the index
1224 self.rev(node) # quick search the index
1225 return node
1225 return node
1226 except error.LookupError:
1226 except error.LookupError:
1227 pass # may be partial hex id
1227 pass # may be partial hex id
1228 try:
1228 try:
1229 # str(rev)
1229 # str(rev)
1230 rev = int(id)
1230 rev = int(id)
1231 if "%d" % rev != id:
1231 if "%d" % rev != id:
1232 raise ValueError
1232 raise ValueError
1233 if rev < 0:
1233 if rev < 0:
1234 rev = len(self) + rev
1234 rev = len(self) + rev
1235 if rev < 0 or rev >= len(self):
1235 if rev < 0 or rev >= len(self):
1236 raise ValueError
1236 raise ValueError
1237 return self.node(rev)
1237 return self.node(rev)
1238 except (ValueError, OverflowError):
1238 except (ValueError, OverflowError):
1239 pass
1239 pass
1240 if len(id) == 40:
1240 if len(id) == 40:
1241 try:
1241 try:
1242 # a full hex nodeid?
1242 # a full hex nodeid?
1243 node = bin(id)
1243 node = bin(id)
1244 self.rev(node)
1244 self.rev(node)
1245 return node
1245 return node
1246 except (TypeError, error.LookupError):
1246 except (TypeError, error.LookupError):
1247 pass
1247 pass
1248
1248
1249 def _partialmatch(self, id):
1249 def _partialmatch(self, id):
1250 # we don't care wdirfilenodeids as they should be always full hash
1250 # we don't care wdirfilenodeids as they should be always full hash
1251 maybewdir = wdirhex.startswith(id)
1251 maybewdir = wdirhex.startswith(id)
1252 try:
1252 try:
1253 partial = self.index.partialmatch(id)
1253 partial = self.index.partialmatch(id)
1254 if partial and self.hasnode(partial):
1254 if partial and self.hasnode(partial):
1255 if maybewdir:
1255 if maybewdir:
1256 # single 'ff...' match in radix tree, ambiguous with wdir
1256 # single 'ff...' match in radix tree, ambiguous with wdir
1257 raise error.RevlogError
1257 raise error.RevlogError
1258 return partial
1258 return partial
1259 if maybewdir:
1259 if maybewdir:
1260 # no 'ff...' match in radix tree, wdir identified
1260 # no 'ff...' match in radix tree, wdir identified
1261 raise error.WdirUnsupported
1261 raise error.WdirUnsupported
1262 return None
1262 return None
1263 except error.RevlogError:
1263 except error.RevlogError:
1264 # parsers.c radix tree lookup gave multiple matches
1264 # parsers.c radix tree lookup gave multiple matches
1265 # fast path: for unfiltered changelog, radix tree is accurate
1265 # fast path: for unfiltered changelog, radix tree is accurate
1266 if not getattr(self, 'filteredrevs', None):
1266 if not getattr(self, 'filteredrevs', None):
1267 raise error.AmbiguousPrefixLookupError(
1267 raise error.AmbiguousPrefixLookupError(
1268 id, self.indexfile, _('ambiguous identifier'))
1268 id, self.indexfile, _('ambiguous identifier'))
1269 # fall through to slow path that filters hidden revisions
1269 # fall through to slow path that filters hidden revisions
1270 except (AttributeError, ValueError):
1270 except (AttributeError, ValueError):
1271 # we are pure python, or key was too short to search radix tree
1271 # we are pure python, or key was too short to search radix tree
1272 pass
1272 pass
1273
1273
1274 if id in self._pcache:
1274 if id in self._pcache:
1275 return self._pcache[id]
1275 return self._pcache[id]
1276
1276
1277 if len(id) <= 40:
1277 if len(id) <= 40:
1278 try:
1278 try:
1279 # hex(node)[:...]
1279 # hex(node)[:...]
1280 l = len(id) // 2 # grab an even number of digits
1280 l = len(id) // 2 # grab an even number of digits
1281 prefix = bin(id[:l * 2])
1281 prefix = bin(id[:l * 2])
1282 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1282 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1283 nl = [n for n in nl if hex(n).startswith(id) and
1283 nl = [n for n in nl if hex(n).startswith(id) and
1284 self.hasnode(n)]
1284 self.hasnode(n)]
1285 if nullhex.startswith(id):
1285 if nullhex.startswith(id):
1286 nl.append(nullid)
1286 nl.append(nullid)
1287 if len(nl) > 0:
1287 if len(nl) > 0:
1288 if len(nl) == 1 and not maybewdir:
1288 if len(nl) == 1 and not maybewdir:
1289 self._pcache[id] = nl[0]
1289 self._pcache[id] = nl[0]
1290 return nl[0]
1290 return nl[0]
1291 raise error.AmbiguousPrefixLookupError(
1291 raise error.AmbiguousPrefixLookupError(
1292 id, self.indexfile, _('ambiguous identifier'))
1292 id, self.indexfile, _('ambiguous identifier'))
1293 if maybewdir:
1293 if maybewdir:
1294 raise error.WdirUnsupported
1294 raise error.WdirUnsupported
1295 return None
1295 return None
1296 except TypeError:
1296 except TypeError:
1297 pass
1297 pass
1298
1298
1299 def lookup(self, id):
1299 def lookup(self, id):
1300 """locate a node based on:
1300 """locate a node based on:
1301 - revision number or str(revision number)
1301 - revision number or str(revision number)
1302 - nodeid or subset of hex nodeid
1302 - nodeid or subset of hex nodeid
1303 """
1303 """
1304 n = self._match(id)
1304 n = self._match(id)
1305 if n is not None:
1305 if n is not None:
1306 return n
1306 return n
1307 n = self._partialmatch(id)
1307 n = self._partialmatch(id)
1308 if n:
1308 if n:
1309 return n
1309 return n
1310
1310
1311 raise error.LookupError(id, self.indexfile, _('no match found'))
1311 raise error.LookupError(id, self.indexfile, _('no match found'))
1312
1312
1313 def shortest(self, node, minlength=1):
1313 def shortest(self, node, minlength=1):
1314 """Find the shortest unambiguous prefix that matches node."""
1314 """Find the shortest unambiguous prefix that matches node."""
1315 def isvalid(prefix):
1315 def isvalid(prefix):
1316 try:
1316 try:
1317 matchednode = self._partialmatch(prefix)
1317 matchednode = self._partialmatch(prefix)
1318 except error.AmbiguousPrefixLookupError:
1318 except error.AmbiguousPrefixLookupError:
1319 return False
1319 return False
1320 except error.WdirUnsupported:
1320 except error.WdirUnsupported:
1321 # single 'ff...' match
1321 # single 'ff...' match
1322 return True
1322 return True
1323 if matchednode is None:
1323 if matchednode is None:
1324 raise error.LookupError(node, self.indexfile, _('no node'))
1324 raise error.LookupError(node, self.indexfile, _('no node'))
1325 return True
1325 return True
1326
1326
1327 def maybewdir(prefix):
1327 def maybewdir(prefix):
1328 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1328 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1329
1329
1330 hexnode = hex(node)
1330 hexnode = hex(node)
1331
1331
1332 def disambiguate(hexnode, minlength):
1332 def disambiguate(hexnode, minlength):
1333 """Disambiguate against wdirid."""
1333 """Disambiguate against wdirid."""
1334 for length in range(minlength, 41):
1334 for length in range(minlength, 41):
1335 prefix = hexnode[:length]
1335 prefix = hexnode[:length]
1336 if not maybewdir(prefix):
1336 if not maybewdir(prefix):
1337 return prefix
1337 return prefix
1338
1338
1339 if not getattr(self, 'filteredrevs', None):
1339 if not getattr(self, 'filteredrevs', None):
1340 try:
1340 try:
1341 length = max(self.index.shortest(node), minlength)
1341 length = max(self.index.shortest(node), minlength)
1342 return disambiguate(hexnode, length)
1342 return disambiguate(hexnode, length)
1343 except error.RevlogError:
1343 except error.RevlogError:
1344 if node != wdirid:
1344 if node != wdirid:
1345 raise error.LookupError(node, self.indexfile, _('no node'))
1345 raise error.LookupError(node, self.indexfile, _('no node'))
1346 except AttributeError:
1346 except AttributeError:
1347 # Fall through to pure code
1347 # Fall through to pure code
1348 pass
1348 pass
1349
1349
1350 if node == wdirid:
1350 if node == wdirid:
1351 for length in range(minlength, 41):
1351 for length in range(minlength, 41):
1352 prefix = hexnode[:length]
1352 prefix = hexnode[:length]
1353 if isvalid(prefix):
1353 if isvalid(prefix):
1354 return prefix
1354 return prefix
1355
1355
1356 for length in range(minlength, 41):
1356 for length in range(minlength, 41):
1357 prefix = hexnode[:length]
1357 prefix = hexnode[:length]
1358 if isvalid(prefix):
1358 if isvalid(prefix):
1359 return disambiguate(hexnode, length)
1359 return disambiguate(hexnode, length)
1360
1360
1361 def cmp(self, node, text):
1361 def cmp(self, node, text):
1362 """compare text with a given file revision
1362 """compare text with a given file revision
1363
1363
1364 returns True if text is different than what is stored.
1364 returns True if text is different than what is stored.
1365 """
1365 """
1366 p1, p2 = self.parents(node)
1366 p1, p2 = self.parents(node)
1367 return storageutil.hashrevisionsha1(text, p1, p2) != node
1367 return storageutil.hashrevisionsha1(text, p1, p2) != node
1368
1368
1369 def _cachesegment(self, offset, data):
1369 def _cachesegment(self, offset, data):
1370 """Add a segment to the revlog cache.
1370 """Add a segment to the revlog cache.
1371
1371
1372 Accepts an absolute offset and the data that is at that location.
1372 Accepts an absolute offset and the data that is at that location.
1373 """
1373 """
1374 o, d = self._chunkcache
1374 o, d = self._chunkcache
1375 # try to add to existing cache
1375 # try to add to existing cache
1376 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1376 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1377 self._chunkcache = o, d + data
1377 self._chunkcache = o, d + data
1378 else:
1378 else:
1379 self._chunkcache = offset, data
1379 self._chunkcache = offset, data
1380
1380
1381 def _readsegment(self, offset, length, df=None):
1381 def _readsegment(self, offset, length, df=None):
1382 """Load a segment of raw data from the revlog.
1382 """Load a segment of raw data from the revlog.
1383
1383
1384 Accepts an absolute offset, length to read, and an optional existing
1384 Accepts an absolute offset, length to read, and an optional existing
1385 file handle to read from.
1385 file handle to read from.
1386
1386
1387 If an existing file handle is passed, it will be seeked and the
1387 If an existing file handle is passed, it will be seeked and the
1388 original seek position will NOT be restored.
1388 original seek position will NOT be restored.
1389
1389
1390 Returns a str or buffer of raw byte data.
1390 Returns a str or buffer of raw byte data.
1391
1391
1392 Raises if the requested number of bytes could not be read.
1392 Raises if the requested number of bytes could not be read.
1393 """
1393 """
1394 # Cache data both forward and backward around the requested
1394 # Cache data both forward and backward around the requested
1395 # data, in a fixed size window. This helps speed up operations
1395 # data, in a fixed size window. This helps speed up operations
1396 # involving reading the revlog backwards.
1396 # involving reading the revlog backwards.
1397 cachesize = self._chunkcachesize
1397 cachesize = self._chunkcachesize
1398 realoffset = offset & ~(cachesize - 1)
1398 realoffset = offset & ~(cachesize - 1)
1399 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1399 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1400 - realoffset)
1400 - realoffset)
1401 with self._datareadfp(df) as df:
1401 with self._datareadfp(df) as df:
1402 df.seek(realoffset)
1402 df.seek(realoffset)
1403 d = df.read(reallength)
1403 d = df.read(reallength)
1404
1404
1405 self._cachesegment(realoffset, d)
1405 self._cachesegment(realoffset, d)
1406 if offset != realoffset or reallength != length:
1406 if offset != realoffset or reallength != length:
1407 startoffset = offset - realoffset
1407 startoffset = offset - realoffset
1408 if len(d) - startoffset < length:
1408 if len(d) - startoffset < length:
1409 raise error.RevlogError(
1409 raise error.RevlogError(
1410 _('partial read of revlog %s; expected %d bytes from '
1410 _('partial read of revlog %s; expected %d bytes from '
1411 'offset %d, got %d') %
1411 'offset %d, got %d') %
1412 (self.indexfile if self._inline else self.datafile,
1412 (self.indexfile if self._inline else self.datafile,
1413 length, realoffset, len(d) - startoffset))
1413 length, realoffset, len(d) - startoffset))
1414
1414
1415 return util.buffer(d, startoffset, length)
1415 return util.buffer(d, startoffset, length)
1416
1416
1417 if len(d) < length:
1417 if len(d) < length:
1418 raise error.RevlogError(
1418 raise error.RevlogError(
1419 _('partial read of revlog %s; expected %d bytes from offset '
1419 _('partial read of revlog %s; expected %d bytes from offset '
1420 '%d, got %d') %
1420 '%d, got %d') %
1421 (self.indexfile if self._inline else self.datafile,
1421 (self.indexfile if self._inline else self.datafile,
1422 length, offset, len(d)))
1422 length, offset, len(d)))
1423
1423
1424 return d
1424 return d
1425
1425
1426 def _getsegment(self, offset, length, df=None):
1426 def _getsegment(self, offset, length, df=None):
1427 """Obtain a segment of raw data from the revlog.
1427 """Obtain a segment of raw data from the revlog.
1428
1428
1429 Accepts an absolute offset, length of bytes to obtain, and an
1429 Accepts an absolute offset, length of bytes to obtain, and an
1430 optional file handle to the already-opened revlog. If the file
1430 optional file handle to the already-opened revlog. If the file
1431 handle is used, it's original seek position will not be preserved.
1431 handle is used, it's original seek position will not be preserved.
1432
1432
1433 Requests for data may be returned from a cache.
1433 Requests for data may be returned from a cache.
1434
1434
1435 Returns a str or a buffer instance of raw byte data.
1435 Returns a str or a buffer instance of raw byte data.
1436 """
1436 """
1437 o, d = self._chunkcache
1437 o, d = self._chunkcache
1438 l = len(d)
1438 l = len(d)
1439
1439
1440 # is it in the cache?
1440 # is it in the cache?
1441 cachestart = offset - o
1441 cachestart = offset - o
1442 cacheend = cachestart + length
1442 cacheend = cachestart + length
1443 if cachestart >= 0 and cacheend <= l:
1443 if cachestart >= 0 and cacheend <= l:
1444 if cachestart == 0 and cacheend == l:
1444 if cachestart == 0 and cacheend == l:
1445 return d # avoid a copy
1445 return d # avoid a copy
1446 return util.buffer(d, cachestart, cacheend - cachestart)
1446 return util.buffer(d, cachestart, cacheend - cachestart)
1447
1447
1448 return self._readsegment(offset, length, df=df)
1448 return self._readsegment(offset, length, df=df)
1449
1449
1450 def _getsegmentforrevs(self, startrev, endrev, df=None):
1450 def _getsegmentforrevs(self, startrev, endrev, df=None):
1451 """Obtain a segment of raw data corresponding to a range of revisions.
1451 """Obtain a segment of raw data corresponding to a range of revisions.
1452
1452
1453 Accepts the start and end revisions and an optional already-open
1453 Accepts the start and end revisions and an optional already-open
1454 file handle to be used for reading. If the file handle is read, its
1454 file handle to be used for reading. If the file handle is read, its
1455 seek position will not be preserved.
1455 seek position will not be preserved.
1456
1456
1457 Requests for data may be satisfied by a cache.
1457 Requests for data may be satisfied by a cache.
1458
1458
1459 Returns a 2-tuple of (offset, data) for the requested range of
1459 Returns a 2-tuple of (offset, data) for the requested range of
1460 revisions. Offset is the integer offset from the beginning of the
1460 revisions. Offset is the integer offset from the beginning of the
1461 revlog and data is a str or buffer of the raw byte data.
1461 revlog and data is a str or buffer of the raw byte data.
1462
1462
1463 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1463 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1464 to determine where each revision's data begins and ends.
1464 to determine where each revision's data begins and ends.
1465 """
1465 """
1466 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1466 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1467 # (functions are expensive).
1467 # (functions are expensive).
1468 index = self.index
1468 index = self.index
1469 istart = index[startrev]
1469 istart = index[startrev]
1470 start = int(istart[0] >> 16)
1470 start = int(istart[0] >> 16)
1471 if startrev == endrev:
1471 if startrev == endrev:
1472 end = start + istart[1]
1472 end = start + istart[1]
1473 else:
1473 else:
1474 iend = index[endrev]
1474 iend = index[endrev]
1475 end = int(iend[0] >> 16) + iend[1]
1475 end = int(iend[0] >> 16) + iend[1]
1476
1476
1477 if self._inline:
1477 if self._inline:
1478 start += (startrev + 1) * self._io.size
1478 start += (startrev + 1) * self._io.size
1479 end += (endrev + 1) * self._io.size
1479 end += (endrev + 1) * self._io.size
1480 length = end - start
1480 length = end - start
1481
1481
1482 return start, self._getsegment(start, length, df=df)
1482 return start, self._getsegment(start, length, df=df)
1483
1483
1484 def _chunk(self, rev, df=None):
1484 def _chunk(self, rev, df=None):
1485 """Obtain a single decompressed chunk for a revision.
1485 """Obtain a single decompressed chunk for a revision.
1486
1486
1487 Accepts an integer revision and an optional already-open file handle
1487 Accepts an integer revision and an optional already-open file handle
1488 to be used for reading. If used, the seek position of the file will not
1488 to be used for reading. If used, the seek position of the file will not
1489 be preserved.
1489 be preserved.
1490
1490
1491 Returns a str holding uncompressed data for the requested revision.
1491 Returns a str holding uncompressed data for the requested revision.
1492 """
1492 """
1493 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1493 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1494
1494
1495 def _chunks(self, revs, df=None, targetsize=None):
1495 def _chunks(self, revs, df=None, targetsize=None):
1496 """Obtain decompressed chunks for the specified revisions.
1496 """Obtain decompressed chunks for the specified revisions.
1497
1497
1498 Accepts an iterable of numeric revisions that are assumed to be in
1498 Accepts an iterable of numeric revisions that are assumed to be in
1499 ascending order. Also accepts an optional already-open file handle
1499 ascending order. Also accepts an optional already-open file handle
1500 to be used for reading. If used, the seek position of the file will
1500 to be used for reading. If used, the seek position of the file will
1501 not be preserved.
1501 not be preserved.
1502
1502
1503 This function is similar to calling ``self._chunk()`` multiple times,
1503 This function is similar to calling ``self._chunk()`` multiple times,
1504 but is faster.
1504 but is faster.
1505
1505
1506 Returns a list with decompressed data for each requested revision.
1506 Returns a list with decompressed data for each requested revision.
1507 """
1507 """
1508 if not revs:
1508 if not revs:
1509 return []
1509 return []
1510 start = self.start
1510 start = self.start
1511 length = self.length
1511 length = self.length
1512 inline = self._inline
1512 inline = self._inline
1513 iosize = self._io.size
1513 iosize = self._io.size
1514 buffer = util.buffer
1514 buffer = util.buffer
1515
1515
1516 l = []
1516 l = []
1517 ladd = l.append
1517 ladd = l.append
1518
1518
1519 if not self._withsparseread:
1519 if not self._withsparseread:
1520 slicedchunks = (revs,)
1520 slicedchunks = (revs,)
1521 else:
1521 else:
1522 slicedchunks = deltautil.slicechunk(self, revs,
1522 slicedchunks = deltautil.slicechunk(self, revs,
1523 targetsize=targetsize)
1523 targetsize=targetsize)
1524
1524
1525 for revschunk in slicedchunks:
1525 for revschunk in slicedchunks:
1526 firstrev = revschunk[0]
1526 firstrev = revschunk[0]
1527 # Skip trailing revisions with empty diff
1527 # Skip trailing revisions with empty diff
1528 for lastrev in revschunk[::-1]:
1528 for lastrev in revschunk[::-1]:
1529 if length(lastrev) != 0:
1529 if length(lastrev) != 0:
1530 break
1530 break
1531
1531
1532 try:
1532 try:
1533 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1533 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1534 except OverflowError:
1534 except OverflowError:
1535 # issue4215 - we can't cache a run of chunks greater than
1535 # issue4215 - we can't cache a run of chunks greater than
1536 # 2G on Windows
1536 # 2G on Windows
1537 return [self._chunk(rev, df=df) for rev in revschunk]
1537 return [self._chunk(rev, df=df) for rev in revschunk]
1538
1538
1539 decomp = self.decompress
1539 decomp = self.decompress
1540 for rev in revschunk:
1540 for rev in revschunk:
1541 chunkstart = start(rev)
1541 chunkstart = start(rev)
1542 if inline:
1542 if inline:
1543 chunkstart += (rev + 1) * iosize
1543 chunkstart += (rev + 1) * iosize
1544 chunklength = length(rev)
1544 chunklength = length(rev)
1545 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1545 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1546
1546
1547 return l
1547 return l
1548
1548
1549 def _chunkclear(self):
1549 def _chunkclear(self):
1550 """Clear the raw chunk cache."""
1550 """Clear the raw chunk cache."""
1551 self._chunkcache = (0, '')
1551 self._chunkcache = (0, '')
1552
1552
1553 def deltaparent(self, rev):
1553 def deltaparent(self, rev):
1554 """return deltaparent of the given revision"""
1554 """return deltaparent of the given revision"""
1555 base = self.index[rev][3]
1555 base = self.index[rev][3]
1556 if base == rev:
1556 if base == rev:
1557 return nullrev
1557 return nullrev
1558 elif self._generaldelta:
1558 elif self._generaldelta:
1559 return base
1559 return base
1560 else:
1560 else:
1561 return rev - 1
1561 return rev - 1
1562
1562
1563 def issnapshot(self, rev):
1563 def issnapshot(self, rev):
1564 """tells whether rev is a snapshot
1564 """tells whether rev is a snapshot
1565 """
1565 """
1566 if not self._sparserevlog:
1566 if not self._sparserevlog:
1567 return self.deltaparent(rev) == nullrev
1567 return self.deltaparent(rev) == nullrev
1568 elif util.safehasattr(self.index, 'issnapshot'):
1568 elif util.safehasattr(self.index, 'issnapshot'):
1569 # directly assign the method to cache the testing and access
1569 # directly assign the method to cache the testing and access
1570 self.issnapshot = self.index.issnapshot
1570 self.issnapshot = self.index.issnapshot
1571 return self.issnapshot(rev)
1571 return self.issnapshot(rev)
1572 if rev == nullrev:
1572 if rev == nullrev:
1573 return True
1573 return True
1574 entry = self.index[rev]
1574 entry = self.index[rev]
1575 base = entry[3]
1575 base = entry[3]
1576 if base == rev:
1576 if base == rev:
1577 return True
1577 return True
1578 if base == nullrev:
1578 if base == nullrev:
1579 return True
1579 return True
1580 p1 = entry[5]
1580 p1 = entry[5]
1581 p2 = entry[6]
1581 p2 = entry[6]
1582 if base == p1 or base == p2:
1582 if base == p1 or base == p2:
1583 return False
1583 return False
1584 return self.issnapshot(base)
1584 return self.issnapshot(base)
1585
1585
1586 def snapshotdepth(self, rev):
1586 def snapshotdepth(self, rev):
1587 """number of snapshot in the chain before this one"""
1587 """number of snapshot in the chain before this one"""
1588 if not self.issnapshot(rev):
1588 if not self.issnapshot(rev):
1589 raise error.ProgrammingError('revision %d not a snapshot')
1589 raise error.ProgrammingError('revision %d not a snapshot')
1590 return len(self._deltachain(rev)[0]) - 1
1590 return len(self._deltachain(rev)[0]) - 1
1591
1591
1592 def revdiff(self, rev1, rev2):
1592 def revdiff(self, rev1, rev2):
1593 """return or calculate a delta between two revisions
1593 """return or calculate a delta between two revisions
1594
1594
1595 The delta calculated is in binary form and is intended to be written to
1595 The delta calculated is in binary form and is intended to be written to
1596 revlog data directly. So this function needs raw revision data.
1596 revlog data directly. So this function needs raw revision data.
1597 """
1597 """
1598 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1598 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1599 return bytes(self._chunk(rev2))
1599 return bytes(self._chunk(rev2))
1600
1600
1601 return mdiff.textdiff(self.rawdata(rev1),
1601 return mdiff.textdiff(self.rawdata(rev1),
1602 self.rawdata(rev2))
1602 self.rawdata(rev2))
1603
1603
1604 def _processflags(self, text, flags, operation, raw=False):
1604 def _processflags(self, text, flags, operation, raw=False):
1605 """deprecated entry point to access flag processors"""
1605 """deprecated entry point to access flag processors"""
1606 msg = ('_processflag(...) use the specialized variant')
1606 msg = ('_processflag(...) use the specialized variant')
1607 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1607 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1608 if raw:
1608 if raw:
1609 return text, flagutil.processflagsraw(self, text, flags)
1609 return text, flagutil.processflagsraw(self, text, flags)
1610 elif operation == 'read':
1610 elif operation == 'read':
1611 return flagutil.processflagsread(self, text, flags)
1611 return flagutil.processflagsread(self, text, flags)
1612 else: # write operation
1612 else: # write operation
1613 return flagutil.processflagswrite(self, text, flags)
1613 return flagutil.processflagswrite(self, text, flags)
1614
1614
1615 def revision(self, nodeorrev, _df=None, raw=False):
1615 def revision(self, nodeorrev, _df=None, raw=False):
1616 """return an uncompressed revision of a given node or revision
1616 """return an uncompressed revision of a given node or revision
1617 number.
1617 number.
1618
1618
1619 _df - an existing file handle to read from. (internal-only)
1619 _df - an existing file handle to read from. (internal-only)
1620 raw - an optional argument specifying if the revision data is to be
1620 raw - an optional argument specifying if the revision data is to be
1621 treated as raw data when applying flag transforms. 'raw' should be set
1621 treated as raw data when applying flag transforms. 'raw' should be set
1622 to True when generating changegroups or in debug commands.
1622 to True when generating changegroups or in debug commands.
1623 """
1623 """
1624 if raw:
1624 if raw:
1625 msg = ('revlog.revision(..., raw=True) is deprecated, '
1625 msg = ('revlog.revision(..., raw=True) is deprecated, '
1626 'use revlog.rawdata(...)')
1626 'use revlog.rawdata(...)')
1627 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1627 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1628 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1628 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1629
1629
1630 def sidedata(self, nodeorrev, _df=None):
1630 def sidedata(self, nodeorrev, _df=None):
1631 """a map of extra data related to the changeset but not part of the hash
1631 """a map of extra data related to the changeset but not part of the hash
1632
1632
1633 This function currently return a dictionary. However, more advanced
1633 This function currently return a dictionary. However, more advanced
1634 mapping object will likely be used in the future for a more
1634 mapping object will likely be used in the future for a more
1635 efficient/lazy code.
1635 efficient/lazy code.
1636 """
1636 """
1637 return self._revisiondata(nodeorrev, _df)[1]
1637 return self._revisiondata(nodeorrev, _df)[1]
1638
1638
1639 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1639 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1640 # deal with <nodeorrev> argument type
1640 # deal with <nodeorrev> argument type
1641 if isinstance(nodeorrev, int):
1641 if isinstance(nodeorrev, int):
1642 rev = nodeorrev
1642 rev = nodeorrev
1643 node = self.node(rev)
1643 node = self.node(rev)
1644 else:
1644 else:
1645 node = nodeorrev
1645 node = nodeorrev
1646 rev = None
1646 rev = None
1647
1647
1648 # fast path the special `nullid` rev
1648 # fast path the special `nullid` rev
1649 if node == nullid:
1649 if node == nullid:
1650 return "", {}
1650 return "", {}
1651
1651
1652 # The text as stored inside the revlog. Might be the revision or might
1652 # The text as stored inside the revlog. Might be the revision or might
1653 # need to be processed to retrieve the revision.
1653 # need to be processed to retrieve the revision.
1654 rawtext = None
1654 rawtext = None
1655
1655
1656 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1656 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1657
1657
1658 if raw and validated:
1658 if raw and validated:
1659 # if we don't want to process the raw text and that raw
1659 # if we don't want to process the raw text and that raw
1660 # text is cached, we can exit early.
1660 # text is cached, we can exit early.
1661 return rawtext, {}
1661 return rawtext, {}
1662 if rev is None:
1662 if rev is None:
1663 rev = self.rev(node)
1663 rev = self.rev(node)
1664 # the revlog's flag for this revision
1664 # the revlog's flag for this revision
1665 # (usually alter its state or content)
1665 # (usually alter its state or content)
1666 flags = self.flags(rev)
1666 flags = self.flags(rev)
1667
1667
1668 if validated and flags == REVIDX_DEFAULT_FLAGS:
1668 if validated and flags == REVIDX_DEFAULT_FLAGS:
1669 # no extra flags set, no flag processor runs, text = rawtext
1669 # no extra flags set, no flag processor runs, text = rawtext
1670 return rawtext, {}
1670 return rawtext, {}
1671
1671
1672 sidedata = {}
1672 sidedata = {}
1673 if raw:
1673 if raw:
1674 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1674 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1675 text = rawtext
1675 text = rawtext
1676 else:
1676 else:
1677 r = flagutil.processflagsread(self, rawtext, flags)
1677 r = flagutil.processflagsread(self, rawtext, flags)
1678 text, validatehash, sidedata = r
1678 text, validatehash, sidedata = r
1679 if validatehash:
1679 if validatehash:
1680 self.checkhash(text, node, rev=rev)
1680 self.checkhash(text, node, rev=rev)
1681 if not validated:
1681 if not validated:
1682 self._revisioncache = (node, rev, rawtext)
1682 self._revisioncache = (node, rev, rawtext)
1683
1683
1684 return text, sidedata
1684 return text, sidedata
1685
1685
1686 def _rawtext(self, node, rev, _df=None):
1686 def _rawtext(self, node, rev, _df=None):
1687 """return the possibly unvalidated rawtext for a revision
1687 """return the possibly unvalidated rawtext for a revision
1688
1688
1689 returns (rev, rawtext, validated)
1689 returns (rev, rawtext, validated)
1690 """
1690 """
1691
1691
1692 # revision in the cache (could be useful to apply delta)
1692 # revision in the cache (could be useful to apply delta)
1693 cachedrev = None
1693 cachedrev = None
1694 # An intermediate text to apply deltas to
1694 # An intermediate text to apply deltas to
1695 basetext = None
1695 basetext = None
1696
1696
1697 # Check if we have the entry in cache
1697 # Check if we have the entry in cache
1698 # The cache entry looks like (node, rev, rawtext)
1698 # The cache entry looks like (node, rev, rawtext)
1699 if self._revisioncache:
1699 if self._revisioncache:
1700 if self._revisioncache[0] == node:
1700 if self._revisioncache[0] == node:
1701 return (rev, self._revisioncache[2], True)
1701 return (rev, self._revisioncache[2], True)
1702 cachedrev = self._revisioncache[1]
1702 cachedrev = self._revisioncache[1]
1703
1703
1704 if rev is None:
1704 if rev is None:
1705 rev = self.rev(node)
1705 rev = self.rev(node)
1706
1706
1707 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1707 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1708 if stopped:
1708 if stopped:
1709 basetext = self._revisioncache[2]
1709 basetext = self._revisioncache[2]
1710
1710
1711 # drop cache to save memory, the caller is expected to
1711 # drop cache to save memory, the caller is expected to
1712 # update self._revisioncache after validating the text
1712 # update self._revisioncache after validating the text
1713 self._revisioncache = None
1713 self._revisioncache = None
1714
1714
1715 targetsize = None
1715 targetsize = None
1716 rawsize = self.index[rev][2]
1716 rawsize = self.index[rev][2]
1717 if 0 <= rawsize:
1717 if 0 <= rawsize:
1718 targetsize = 4 * rawsize
1718 targetsize = 4 * rawsize
1719
1719
1720 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1720 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1721 if basetext is None:
1721 if basetext is None:
1722 basetext = bytes(bins[0])
1722 basetext = bytes(bins[0])
1723 bins = bins[1:]
1723 bins = bins[1:]
1724
1724
1725 rawtext = mdiff.patches(basetext, bins)
1725 rawtext = mdiff.patches(basetext, bins)
1726 del basetext # let us have a chance to free memory early
1726 del basetext # let us have a chance to free memory early
1727 return (rev, rawtext, False)
1727 return (rev, rawtext, False)
1728
1728
1729 def rawdata(self, nodeorrev, _df=None):
1729 def rawdata(self, nodeorrev, _df=None):
1730 """return an uncompressed raw data of a given node or revision number.
1730 """return an uncompressed raw data of a given node or revision number.
1731
1731
1732 _df - an existing file handle to read from. (internal-only)
1732 _df - an existing file handle to read from. (internal-only)
1733 """
1733 """
1734 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1734 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1735
1735
1736 def hash(self, text, p1, p2):
1736 def hash(self, text, p1, p2):
1737 """Compute a node hash.
1737 """Compute a node hash.
1738
1738
1739 Available as a function so that subclasses can replace the hash
1739 Available as a function so that subclasses can replace the hash
1740 as needed.
1740 as needed.
1741 """
1741 """
1742 return storageutil.hashrevisionsha1(text, p1, p2)
1742 return storageutil.hashrevisionsha1(text, p1, p2)
1743
1743
1744 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1744 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1745 """Check node hash integrity.
1745 """Check node hash integrity.
1746
1746
1747 Available as a function so that subclasses can extend hash mismatch
1747 Available as a function so that subclasses can extend hash mismatch
1748 behaviors as needed.
1748 behaviors as needed.
1749 """
1749 """
1750 try:
1750 try:
1751 if p1 is None and p2 is None:
1751 if p1 is None and p2 is None:
1752 p1, p2 = self.parents(node)
1752 p1, p2 = self.parents(node)
1753 if node != self.hash(text, p1, p2):
1753 if node != self.hash(text, p1, p2):
1754 # Clear the revision cache on hash failure. The revision cache
1754 # Clear the revision cache on hash failure. The revision cache
1755 # only stores the raw revision and clearing the cache does have
1755 # only stores the raw revision and clearing the cache does have
1756 # the side-effect that we won't have a cache hit when the raw
1756 # the side-effect that we won't have a cache hit when the raw
1757 # revision data is accessed. But this case should be rare and
1757 # revision data is accessed. But this case should be rare and
1758 # it is extra work to teach the cache about the hash
1758 # it is extra work to teach the cache about the hash
1759 # verification state.
1759 # verification state.
1760 if self._revisioncache and self._revisioncache[0] == node:
1760 if self._revisioncache and self._revisioncache[0] == node:
1761 self._revisioncache = None
1761 self._revisioncache = None
1762
1762
1763 revornode = rev
1763 revornode = rev
1764 if revornode is None:
1764 if revornode is None:
1765 revornode = templatefilters.short(hex(node))
1765 revornode = templatefilters.short(hex(node))
1766 raise error.RevlogError(_("integrity check failed on %s:%s")
1766 raise error.RevlogError(_("integrity check failed on %s:%s")
1767 % (self.indexfile, pycompat.bytestr(revornode)))
1767 % (self.indexfile, pycompat.bytestr(revornode)))
1768 except error.RevlogError:
1768 except error.RevlogError:
1769 if self._censorable and storageutil.iscensoredtext(text):
1769 if self._censorable and storageutil.iscensoredtext(text):
1770 raise error.CensoredNodeError(self.indexfile, node, text)
1770 raise error.CensoredNodeError(self.indexfile, node, text)
1771 raise
1771 raise
1772
1772
1773 def _enforceinlinesize(self, tr, fp=None):
1773 def _enforceinlinesize(self, tr, fp=None):
1774 """Check if the revlog is too big for inline and convert if so.
1774 """Check if the revlog is too big for inline and convert if so.
1775
1775
1776 This should be called after revisions are added to the revlog. If the
1776 This should be called after revisions are added to the revlog. If the
1777 revlog has grown too large to be an inline revlog, it will convert it
1777 revlog has grown too large to be an inline revlog, it will convert it
1778 to use multiple index and data files.
1778 to use multiple index and data files.
1779 """
1779 """
1780 tiprev = len(self) - 1
1780 tiprev = len(self) - 1
1781 if (not self._inline or
1781 if (not self._inline or
1782 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1782 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1783 return
1783 return
1784
1784
1785 trinfo = tr.find(self.indexfile)
1785 trinfo = tr.find(self.indexfile)
1786 if trinfo is None:
1786 if trinfo is None:
1787 raise error.RevlogError(_("%s not found in the transaction")
1787 raise error.RevlogError(_("%s not found in the transaction")
1788 % self.indexfile)
1788 % self.indexfile)
1789
1789
1790 trindex = trinfo[2]
1790 trindex = trinfo[2]
1791 if trindex is not None:
1791 if trindex is not None:
1792 dataoff = self.start(trindex)
1792 dataoff = self.start(trindex)
1793 else:
1793 else:
1794 # revlog was stripped at start of transaction, use all leftover data
1794 # revlog was stripped at start of transaction, use all leftover data
1795 trindex = len(self) - 1
1795 trindex = len(self) - 1
1796 dataoff = self.end(tiprev)
1796 dataoff = self.end(tiprev)
1797
1797
1798 tr.add(self.datafile, dataoff)
1798 tr.add(self.datafile, dataoff)
1799
1799
1800 if fp:
1800 if fp:
1801 fp.flush()
1801 fp.flush()
1802 fp.close()
1802 fp.close()
1803 # We can't use the cached file handle after close(). So prevent
1803 # We can't use the cached file handle after close(). So prevent
1804 # its usage.
1804 # its usage.
1805 self._writinghandles = None
1805 self._writinghandles = None
1806
1806
1807 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1807 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1808 for r in self:
1808 for r in self:
1809 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1809 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1810
1810
1811 with self._indexfp('w') as fp:
1811 with self._indexfp('w') as fp:
1812 self.version &= ~FLAG_INLINE_DATA
1812 self.version &= ~FLAG_INLINE_DATA
1813 self._inline = False
1813 self._inline = False
1814 io = self._io
1814 io = self._io
1815 for i in self:
1815 for i in self:
1816 e = io.packentry(self.index[i], self.node, self.version, i)
1816 e = io.packentry(self.index[i], self.node, self.version, i)
1817 fp.write(e)
1817 fp.write(e)
1818
1818
1819 # the temp file replace the real index when we exit the context
1819 # the temp file replace the real index when we exit the context
1820 # manager
1820 # manager
1821
1821
1822 tr.replace(self.indexfile, trindex * self._io.size)
1822 tr.replace(self.indexfile, trindex * self._io.size)
1823 self._chunkclear()
1823 self._chunkclear()
1824
1824
1825 def _nodeduplicatecallback(self, transaction, node):
1825 def _nodeduplicatecallback(self, transaction, node):
1826 """called when trying to add a node already stored.
1826 """called when trying to add a node already stored.
1827 """
1827 """
1828
1828
1829 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1829 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1830 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1830 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1831 sidedata=None):
1831 sidedata=None):
1832 """add a revision to the log
1832 """add a revision to the log
1833
1833
1834 text - the revision data to add
1834 text - the revision data to add
1835 transaction - the transaction object used for rollback
1835 transaction - the transaction object used for rollback
1836 link - the linkrev data to add
1836 link - the linkrev data to add
1837 p1, p2 - the parent nodeids of the revision
1837 p1, p2 - the parent nodeids of the revision
1838 cachedelta - an optional precomputed delta
1838 cachedelta - an optional precomputed delta
1839 node - nodeid of revision; typically node is not specified, and it is
1839 node - nodeid of revision; typically node is not specified, and it is
1840 computed by default as hash(text, p1, p2), however subclasses might
1840 computed by default as hash(text, p1, p2), however subclasses might
1841 use different hashing method (and override checkhash() in such case)
1841 use different hashing method (and override checkhash() in such case)
1842 flags - the known flags to set on the revision
1842 flags - the known flags to set on the revision
1843 deltacomputer - an optional deltacomputer instance shared between
1843 deltacomputer - an optional deltacomputer instance shared between
1844 multiple calls
1844 multiple calls
1845 """
1845 """
1846 if link == nullrev:
1846 if link == nullrev:
1847 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1847 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1848 % self.indexfile)
1848 % self.indexfile)
1849
1849
1850 if sidedata is None:
1850 if sidedata is None:
1851 sidedata = {}
1851 sidedata = {}
1852
1852
1853 if flags:
1853 if flags:
1854 node = node or self.hash(text, p1, p2)
1854 node = node or self.hash(text, p1, p2)
1855
1855
1856 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1856 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1857 sidedata=sidedata)
1857 sidedata=sidedata)
1858
1858
1859 # If the flag processor modifies the revision data, ignore any provided
1859 # If the flag processor modifies the revision data, ignore any provided
1860 # cachedelta.
1860 # cachedelta.
1861 if rawtext != text:
1861 if rawtext != text:
1862 cachedelta = None
1862 cachedelta = None
1863
1863
1864 if len(rawtext) > _maxentrysize:
1864 if len(rawtext) > _maxentrysize:
1865 raise error.RevlogError(
1865 raise error.RevlogError(
1866 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1866 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1867 % (self.indexfile, len(rawtext)))
1867 % (self.indexfile, len(rawtext)))
1868
1868
1869 node = node or self.hash(rawtext, p1, p2)
1869 node = node or self.hash(rawtext, p1, p2)
1870 if node in self.nodemap:
1870 if node in self.nodemap:
1871 return node
1871 return node
1872
1872
1873 if validatehash:
1873 if validatehash:
1874 self.checkhash(rawtext, node, p1=p1, p2=p2)
1874 self.checkhash(rawtext, node, p1=p1, p2=p2)
1875
1875
1876 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1876 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1877 flags, cachedelta=cachedelta,
1877 flags, cachedelta=cachedelta,
1878 deltacomputer=deltacomputer)
1878 deltacomputer=deltacomputer)
1879
1879
1880 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1880 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1881 cachedelta=None, deltacomputer=None):
1881 cachedelta=None, deltacomputer=None):
1882 """add a raw revision with known flags, node and parents
1882 """add a raw revision with known flags, node and parents
1883 useful when reusing a revision not stored in this revlog (ex: received
1883 useful when reusing a revision not stored in this revlog (ex: received
1884 over wire, or read from an external bundle).
1884 over wire, or read from an external bundle).
1885 """
1885 """
1886 dfh = None
1886 dfh = None
1887 if not self._inline:
1887 if not self._inline:
1888 dfh = self._datafp("a+")
1888 dfh = self._datafp("a+")
1889 ifh = self._indexfp("a+")
1889 ifh = self._indexfp("a+")
1890 try:
1890 try:
1891 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1891 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1892 flags, cachedelta, ifh, dfh,
1892 flags, cachedelta, ifh, dfh,
1893 deltacomputer=deltacomputer)
1893 deltacomputer=deltacomputer)
1894 finally:
1894 finally:
1895 if dfh:
1895 if dfh:
1896 dfh.close()
1896 dfh.close()
1897 ifh.close()
1897 ifh.close()
1898
1898
1899 def compress(self, data):
1899 def compress(self, data):
1900 """Generate a possibly-compressed representation of data."""
1900 """Generate a possibly-compressed representation of data."""
1901 if not data:
1901 if not data:
1902 return '', data
1902 return '', data
1903
1903
1904 compressed = self._compressor.compress(data)
1904 compressed = self._compressor.compress(data)
1905
1905
1906 if compressed:
1906 if compressed:
1907 # The revlog compressor added the header in the returned data.
1907 # The revlog compressor added the header in the returned data.
1908 return '', compressed
1908 return '', compressed
1909
1909
1910 if data[0:1] == '\0':
1910 if data[0:1] == '\0':
1911 return '', data
1911 return '', data
1912 return 'u', data
1912 return 'u', data
1913
1913
1914 def decompress(self, data):
1914 def decompress(self, data):
1915 """Decompress a revlog chunk.
1915 """Decompress a revlog chunk.
1916
1916
1917 The chunk is expected to begin with a header identifying the
1917 The chunk is expected to begin with a header identifying the
1918 format type so it can be routed to an appropriate decompressor.
1918 format type so it can be routed to an appropriate decompressor.
1919 """
1919 """
1920 if not data:
1920 if not data:
1921 return data
1921 return data
1922
1922
1923 # Revlogs are read much more frequently than they are written and many
1923 # Revlogs are read much more frequently than they are written and many
1924 # chunks only take microseconds to decompress, so performance is
1924 # chunks only take microseconds to decompress, so performance is
1925 # important here.
1925 # important here.
1926 #
1926 #
1927 # We can make a few assumptions about revlogs:
1927 # We can make a few assumptions about revlogs:
1928 #
1928 #
1929 # 1) the majority of chunks will be compressed (as opposed to inline
1929 # 1) the majority of chunks will be compressed (as opposed to inline
1930 # raw data).
1930 # raw data).
1931 # 2) decompressing *any* data will likely by at least 10x slower than
1931 # 2) decompressing *any* data will likely by at least 10x slower than
1932 # returning raw inline data.
1932 # returning raw inline data.
1933 # 3) we want to prioritize common and officially supported compression
1933 # 3) we want to prioritize common and officially supported compression
1934 # engines
1934 # engines
1935 #
1935 #
1936 # It follows that we want to optimize for "decompress compressed data
1936 # It follows that we want to optimize for "decompress compressed data
1937 # when encoded with common and officially supported compression engines"
1937 # when encoded with common and officially supported compression engines"
1938 # case over "raw data" and "data encoded by less common or non-official
1938 # case over "raw data" and "data encoded by less common or non-official
1939 # compression engines." That is why we have the inline lookup first
1939 # compression engines." That is why we have the inline lookup first
1940 # followed by the compengines lookup.
1940 # followed by the compengines lookup.
1941 #
1941 #
1942 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1942 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1943 # compressed chunks. And this matters for changelog and manifest reads.
1943 # compressed chunks. And this matters for changelog and manifest reads.
1944 t = data[0:1]
1944 t = data[0:1]
1945
1945
1946 if t == 'x':
1946 if t == 'x':
1947 try:
1947 try:
1948 return _zlibdecompress(data)
1948 return _zlibdecompress(data)
1949 except zlib.error as e:
1949 except zlib.error as e:
1950 raise error.RevlogError(_('revlog decompress error: %s') %
1950 raise error.RevlogError(_('revlog decompress error: %s') %
1951 stringutil.forcebytestr(e))
1951 stringutil.forcebytestr(e))
1952 # '\0' is more common than 'u' so it goes first.
1952 # '\0' is more common than 'u' so it goes first.
1953 elif t == '\0':
1953 elif t == '\0':
1954 return data
1954 return data
1955 elif t == 'u':
1955 elif t == 'u':
1956 return util.buffer(data, 1)
1956 return util.buffer(data, 1)
1957
1957
1958 try:
1958 try:
1959 compressor = self._decompressors[t]
1959 compressor = self._decompressors[t]
1960 except KeyError:
1960 except KeyError:
1961 try:
1961 try:
1962 engine = util.compengines.forrevlogheader(t)
1962 engine = util.compengines.forrevlogheader(t)
1963 compressor = engine.revlogcompressor(self._compengineopts)
1963 compressor = engine.revlogcompressor(self._compengineopts)
1964 self._decompressors[t] = compressor
1964 self._decompressors[t] = compressor
1965 except KeyError:
1965 except KeyError:
1966 raise error.RevlogError(_('unknown compression type %r') % t)
1966 raise error.RevlogError(_('unknown compression type %r') % t)
1967
1967
1968 return compressor.decompress(data)
1968 return compressor.decompress(data)
1969
1969
1970 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1970 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1971 cachedelta, ifh, dfh, alwayscache=False,
1971 cachedelta, ifh, dfh, alwayscache=False,
1972 deltacomputer=None):
1972 deltacomputer=None):
1973 """internal function to add revisions to the log
1973 """internal function to add revisions to the log
1974
1974
1975 see addrevision for argument descriptions.
1975 see addrevision for argument descriptions.
1976
1976
1977 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1977 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1978
1978
1979 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1979 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1980 be used.
1980 be used.
1981
1981
1982 invariants:
1982 invariants:
1983 - rawtext is optional (can be None); if not set, cachedelta must be set.
1983 - rawtext is optional (can be None); if not set, cachedelta must be set.
1984 if both are set, they must correspond to each other.
1984 if both are set, they must correspond to each other.
1985 """
1985 """
1986 if node == nullid:
1986 if node == nullid:
1987 raise error.RevlogError(_("%s: attempt to add null revision") %
1987 raise error.RevlogError(_("%s: attempt to add null revision") %
1988 self.indexfile)
1988 self.indexfile)
1989 if node == wdirid or node in wdirfilenodeids:
1989 if node == wdirid or node in wdirfilenodeids:
1990 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1990 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1991 self.indexfile)
1991 self.indexfile)
1992
1992
1993 if self._inline:
1993 if self._inline:
1994 fh = ifh
1994 fh = ifh
1995 else:
1995 else:
1996 fh = dfh
1996 fh = dfh
1997
1997
1998 btext = [rawtext]
1998 btext = [rawtext]
1999
1999
2000 curr = len(self)
2000 curr = len(self)
2001 prev = curr - 1
2001 prev = curr - 1
2002 offset = self.end(prev)
2002 offset = self.end(prev)
2003 p1r, p2r = self.rev(p1), self.rev(p2)
2003 p1r, p2r = self.rev(p1), self.rev(p2)
2004
2004
2005 # full versions are inserted when the needed deltas
2005 # full versions are inserted when the needed deltas
2006 # become comparable to the uncompressed text
2006 # become comparable to the uncompressed text
2007 if rawtext is None:
2007 if rawtext is None:
2008 # need rawtext size, before changed by flag processors, which is
2008 # need rawtext size, before changed by flag processors, which is
2009 # the non-raw size. use revlog explicitly to avoid filelog's extra
2009 # the non-raw size. use revlog explicitly to avoid filelog's extra
2010 # logic that might remove metadata size.
2010 # logic that might remove metadata size.
2011 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2011 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2012 cachedelta[1])
2012 cachedelta[1])
2013 else:
2013 else:
2014 textlen = len(rawtext)
2014 textlen = len(rawtext)
2015
2015
2016 if deltacomputer is None:
2016 if deltacomputer is None:
2017 deltacomputer = deltautil.deltacomputer(self)
2017 deltacomputer = deltautil.deltacomputer(self)
2018
2018
2019 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2019 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2020
2020
2021 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2021 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2022
2022
2023 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2023 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2024 deltainfo.base, link, p1r, p2r, node)
2024 deltainfo.base, link, p1r, p2r, node)
2025 self.index.append(e)
2025 self.index.append(e)
2026 self.nodemap[node] = curr
2026 self.nodemap[node] = curr
2027
2027
2028 # Reset the pure node cache start lookup offset to account for new
2028 # Reset the pure node cache start lookup offset to account for new
2029 # revision.
2029 # revision.
2030 if self._nodepos is not None:
2030 if self._nodepos is not None:
2031 self._nodepos = curr
2031 self._nodepos = curr
2032
2032
2033 entry = self._io.packentry(e, self.node, self.version, curr)
2033 entry = self._io.packentry(e, self.node, self.version, curr)
2034 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2034 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2035 link, offset)
2035 link, offset)
2036
2036
2037 rawtext = btext[0]
2037 rawtext = btext[0]
2038
2038
2039 if alwayscache and rawtext is None:
2039 if alwayscache and rawtext is None:
2040 rawtext = deltacomputer.buildtext(revinfo, fh)
2040 rawtext = deltacomputer.buildtext(revinfo, fh)
2041
2041
2042 if type(rawtext) == bytes: # only accept immutable objects
2042 if type(rawtext) == bytes: # only accept immutable objects
2043 self._revisioncache = (node, curr, rawtext)
2043 self._revisioncache = (node, curr, rawtext)
2044 self._chainbasecache[curr] = deltainfo.chainbase
2044 self._chainbasecache[curr] = deltainfo.chainbase
2045 return node
2045 return node
2046
2046
2047 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2047 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2048 # Files opened in a+ mode have inconsistent behavior on various
2048 # Files opened in a+ mode have inconsistent behavior on various
2049 # platforms. Windows requires that a file positioning call be made
2049 # platforms. Windows requires that a file positioning call be made
2050 # when the file handle transitions between reads and writes. See
2050 # when the file handle transitions between reads and writes. See
2051 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2051 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2052 # platforms, Python or the platform itself can be buggy. Some versions
2052 # platforms, Python or the platform itself can be buggy. Some versions
2053 # of Solaris have been observed to not append at the end of the file
2053 # of Solaris have been observed to not append at the end of the file
2054 # if the file was seeked to before the end. See issue4943 for more.
2054 # if the file was seeked to before the end. See issue4943 for more.
2055 #
2055 #
2056 # We work around this issue by inserting a seek() before writing.
2056 # We work around this issue by inserting a seek() before writing.
2057 # Note: This is likely not necessary on Python 3. However, because
2057 # Note: This is likely not necessary on Python 3. However, because
2058 # the file handle is reused for reads and may be seeked there, we need
2058 # the file handle is reused for reads and may be seeked there, we need
2059 # to be careful before changing this.
2059 # to be careful before changing this.
2060 ifh.seek(0, os.SEEK_END)
2060 ifh.seek(0, os.SEEK_END)
2061 if dfh:
2061 if dfh:
2062 dfh.seek(0, os.SEEK_END)
2062 dfh.seek(0, os.SEEK_END)
2063
2063
2064 curr = len(self) - 1
2064 curr = len(self) - 1
2065 if not self._inline:
2065 if not self._inline:
2066 transaction.add(self.datafile, offset)
2066 transaction.add(self.datafile, offset)
2067 transaction.add(self.indexfile, curr * len(entry))
2067 transaction.add(self.indexfile, curr * len(entry))
2068 if data[0]:
2068 if data[0]:
2069 dfh.write(data[0])
2069 dfh.write(data[0])
2070 dfh.write(data[1])
2070 dfh.write(data[1])
2071 ifh.write(entry)
2071 ifh.write(entry)
2072 else:
2072 else:
2073 offset += curr * self._io.size
2073 offset += curr * self._io.size
2074 transaction.add(self.indexfile, offset, curr)
2074 transaction.add(self.indexfile, offset, curr)
2075 ifh.write(entry)
2075 ifh.write(entry)
2076 ifh.write(data[0])
2076 ifh.write(data[0])
2077 ifh.write(data[1])
2077 ifh.write(data[1])
2078 self._enforceinlinesize(transaction, ifh)
2078 self._enforceinlinesize(transaction, ifh)
2079
2079
2080 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2080 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2081 """
2081 """
2082 add a delta group
2082 add a delta group
2083
2083
2084 given a set of deltas, add them to the revision log. the
2084 given a set of deltas, add them to the revision log. the
2085 first delta is against its parent, which should be in our
2085 first delta is against its parent, which should be in our
2086 log, the rest are against the previous delta.
2086 log, the rest are against the previous delta.
2087
2087
2088 If ``addrevisioncb`` is defined, it will be called with arguments of
2088 If ``addrevisioncb`` is defined, it will be called with arguments of
2089 this revlog and the node that was added.
2089 this revlog and the node that was added.
2090 """
2090 """
2091
2091
2092 if self._writinghandles:
2092 if self._writinghandles:
2093 raise error.ProgrammingError('cannot nest addgroup() calls')
2093 raise error.ProgrammingError('cannot nest addgroup() calls')
2094
2094
2095 nodes = []
2095 nodes = []
2096
2096
2097 r = len(self)
2097 r = len(self)
2098 end = 0
2098 end = 0
2099 if r:
2099 if r:
2100 end = self.end(r - 1)
2100 end = self.end(r - 1)
2101 ifh = self._indexfp("a+")
2101 ifh = self._indexfp("a+")
2102 isize = r * self._io.size
2102 isize = r * self._io.size
2103 if self._inline:
2103 if self._inline:
2104 transaction.add(self.indexfile, end + isize, r)
2104 transaction.add(self.indexfile, end + isize, r)
2105 dfh = None
2105 dfh = None
2106 else:
2106 else:
2107 transaction.add(self.indexfile, isize, r)
2107 transaction.add(self.indexfile, isize, r)
2108 transaction.add(self.datafile, end)
2108 transaction.add(self.datafile, end)
2109 dfh = self._datafp("a+")
2109 dfh = self._datafp("a+")
2110 def flush():
2110 def flush():
2111 if dfh:
2111 if dfh:
2112 dfh.flush()
2112 dfh.flush()
2113 ifh.flush()
2113 ifh.flush()
2114
2114
2115 self._writinghandles = (ifh, dfh)
2115 self._writinghandles = (ifh, dfh)
2116
2116
2117 try:
2117 try:
2118 deltacomputer = deltautil.deltacomputer(self)
2118 deltacomputer = deltautil.deltacomputer(self)
2119 # loop through our set of deltas
2119 # loop through our set of deltas
2120 for data in deltas:
2120 for data in deltas:
2121 node, p1, p2, linknode, deltabase, delta, flags = data
2121 node, p1, p2, linknode, deltabase, delta, flags = data
2122 link = linkmapper(linknode)
2122 link = linkmapper(linknode)
2123 flags = flags or REVIDX_DEFAULT_FLAGS
2123 flags = flags or REVIDX_DEFAULT_FLAGS
2124
2124
2125 nodes.append(node)
2125 nodes.append(node)
2126
2126
2127 if node in self.nodemap:
2127 if node in self.nodemap:
2128 self._nodeduplicatecallback(transaction, node)
2128 self._nodeduplicatecallback(transaction, node)
2129 # this can happen if two branches make the same change
2129 # this can happen if two branches make the same change
2130 continue
2130 continue
2131
2131
2132 for p in (p1, p2):
2132 for p in (p1, p2):
2133 if p not in self.nodemap:
2133 if p not in self.nodemap:
2134 raise error.LookupError(p, self.indexfile,
2134 raise error.LookupError(p, self.indexfile,
2135 _('unknown parent'))
2135 _('unknown parent'))
2136
2136
2137 if deltabase not in self.nodemap:
2137 if deltabase not in self.nodemap:
2138 raise error.LookupError(deltabase, self.indexfile,
2138 raise error.LookupError(deltabase, self.indexfile,
2139 _('unknown delta base'))
2139 _('unknown delta base'))
2140
2140
2141 baserev = self.rev(deltabase)
2141 baserev = self.rev(deltabase)
2142
2142
2143 if baserev != nullrev and self.iscensored(baserev):
2143 if baserev != nullrev and self.iscensored(baserev):
2144 # if base is censored, delta must be full replacement in a
2144 # if base is censored, delta must be full replacement in a
2145 # single patch operation
2145 # single patch operation
2146 hlen = struct.calcsize(">lll")
2146 hlen = struct.calcsize(">lll")
2147 oldlen = self.rawsize(baserev)
2147 oldlen = self.rawsize(baserev)
2148 newlen = len(delta) - hlen
2148 newlen = len(delta) - hlen
2149 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2149 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2150 raise error.CensoredBaseError(self.indexfile,
2150 raise error.CensoredBaseError(self.indexfile,
2151 self.node(baserev))
2151 self.node(baserev))
2152
2152
2153 if not flags and self._peek_iscensored(baserev, delta, flush):
2153 if not flags and self._peek_iscensored(baserev, delta, flush):
2154 flags |= REVIDX_ISCENSORED
2154 flags |= REVIDX_ISCENSORED
2155
2155
2156 # We assume consumers of addrevisioncb will want to retrieve
2156 # We assume consumers of addrevisioncb will want to retrieve
2157 # the added revision, which will require a call to
2157 # the added revision, which will require a call to
2158 # revision(). revision() will fast path if there is a cache
2158 # revision(). revision() will fast path if there is a cache
2159 # hit. So, we tell _addrevision() to always cache in this case.
2159 # hit. So, we tell _addrevision() to always cache in this case.
2160 # We're only using addgroup() in the context of changegroup
2160 # We're only using addgroup() in the context of changegroup
2161 # generation so the revision data can always be handled as raw
2161 # generation so the revision data can always be handled as raw
2162 # by the flagprocessor.
2162 # by the flagprocessor.
2163 self._addrevision(node, None, transaction, link,
2163 self._addrevision(node, None, transaction, link,
2164 p1, p2, flags, (baserev, delta),
2164 p1, p2, flags, (baserev, delta),
2165 ifh, dfh,
2165 ifh, dfh,
2166 alwayscache=bool(addrevisioncb),
2166 alwayscache=bool(addrevisioncb),
2167 deltacomputer=deltacomputer)
2167 deltacomputer=deltacomputer)
2168
2168
2169 if addrevisioncb:
2169 if addrevisioncb:
2170 addrevisioncb(self, node)
2170 addrevisioncb(self, node)
2171
2171
2172 if not dfh and not self._inline:
2172 if not dfh and not self._inline:
2173 # addrevision switched from inline to conventional
2173 # addrevision switched from inline to conventional
2174 # reopen the index
2174 # reopen the index
2175 ifh.close()
2175 ifh.close()
2176 dfh = self._datafp("a+")
2176 dfh = self._datafp("a+")
2177 ifh = self._indexfp("a+")
2177 ifh = self._indexfp("a+")
2178 self._writinghandles = (ifh, dfh)
2178 self._writinghandles = (ifh, dfh)
2179 finally:
2179 finally:
2180 self._writinghandles = None
2180 self._writinghandles = None
2181
2181
2182 if dfh:
2182 if dfh:
2183 dfh.close()
2183 dfh.close()
2184 ifh.close()
2184 ifh.close()
2185
2185
2186 return nodes
2186 return nodes
2187
2187
2188 def iscensored(self, rev):
2188 def iscensored(self, rev):
2189 """Check if a file revision is censored."""
2189 """Check if a file revision is censored."""
2190 if not self._censorable:
2190 if not self._censorable:
2191 return False
2191 return False
2192
2192
2193 return self.flags(rev) & REVIDX_ISCENSORED
2193 return self.flags(rev) & REVIDX_ISCENSORED
2194
2194
2195 def _peek_iscensored(self, baserev, delta, flush):
2195 def _peek_iscensored(self, baserev, delta, flush):
2196 """Quickly check if a delta produces a censored revision."""
2196 """Quickly check if a delta produces a censored revision."""
2197 if not self._censorable:
2197 if not self._censorable:
2198 return False
2198 return False
2199
2199
2200 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2200 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2201
2201
2202 def getstrippoint(self, minlink):
2202 def getstrippoint(self, minlink):
2203 """find the minimum rev that must be stripped to strip the linkrev
2203 """find the minimum rev that must be stripped to strip the linkrev
2204
2204
2205 Returns a tuple containing the minimum rev and a set of all revs that
2205 Returns a tuple containing the minimum rev and a set of all revs that
2206 have linkrevs that will be broken by this strip.
2206 have linkrevs that will be broken by this strip.
2207 """
2207 """
2208 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2208 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2209 self.headrevs(),
2209 self.headrevs(),
2210 self.linkrev, self.parentrevs)
2210 self.linkrev, self.parentrevs)
2211
2211
2212 def strip(self, minlink, transaction):
2212 def strip(self, minlink, transaction):
2213 """truncate the revlog on the first revision with a linkrev >= minlink
2213 """truncate the revlog on the first revision with a linkrev >= minlink
2214
2214
2215 This function is called when we're stripping revision minlink and
2215 This function is called when we're stripping revision minlink and
2216 its descendants from the repository.
2216 its descendants from the repository.
2217
2217
2218 We have to remove all revisions with linkrev >= minlink, because
2218 We have to remove all revisions with linkrev >= minlink, because
2219 the equivalent changelog revisions will be renumbered after the
2219 the equivalent changelog revisions will be renumbered after the
2220 strip.
2220 strip.
2221
2221
2222 So we truncate the revlog on the first of these revisions, and
2222 So we truncate the revlog on the first of these revisions, and
2223 trust that the caller has saved the revisions that shouldn't be
2223 trust that the caller has saved the revisions that shouldn't be
2224 removed and that it'll re-add them after this truncation.
2224 removed and that it'll re-add them after this truncation.
2225 """
2225 """
2226 if len(self) == 0:
2226 if len(self) == 0:
2227 return
2227 return
2228
2228
2229 rev, _ = self.getstrippoint(minlink)
2229 rev, _ = self.getstrippoint(minlink)
2230 if rev == len(self):
2230 if rev == len(self):
2231 return
2231 return
2232
2232
2233 # first truncate the files on disk
2233 # first truncate the files on disk
2234 end = self.start(rev)
2234 end = self.start(rev)
2235 if not self._inline:
2235 if not self._inline:
2236 transaction.add(self.datafile, end)
2236 transaction.add(self.datafile, end)
2237 end = rev * self._io.size
2237 end = rev * self._io.size
2238 else:
2238 else:
2239 end += rev * self._io.size
2239 end += rev * self._io.size
2240
2240
2241 transaction.add(self.indexfile, end)
2241 transaction.add(self.indexfile, end)
2242
2242
2243 # then reset internal state in memory to forget those revisions
2243 # then reset internal state in memory to forget those revisions
2244 self._revisioncache = None
2244 self._revisioncache = None
2245 self._chaininfocache = {}
2245 self._chaininfocache = {}
2246 self._chunkclear()
2246 self._chunkclear()
2247 for x in pycompat.xrange(rev, len(self)):
2247 for x in pycompat.xrange(rev, len(self)):
2248 del self.nodemap[self.node(x)]
2248 del self.nodemap[self.node(x)]
2249
2249
2250 del self.index[rev:-1]
2250 del self.index[rev:-1]
2251 self._nodepos = None
2251 self._nodepos = None
2252
2252
2253 def checksize(self):
2253 def checksize(self):
2254 """Check size of index and data files
2254 """Check size of index and data files
2255
2255
2256 return a (dd, di) tuple.
2256 return a (dd, di) tuple.
2257 - dd: extra bytes for the "data" file
2257 - dd: extra bytes for the "data" file
2258 - di: extra bytes for the "index" file
2258 - di: extra bytes for the "index" file
2259
2259
2260 A healthy revlog will return (0, 0).
2260 A healthy revlog will return (0, 0).
2261 """
2261 """
2262 expected = 0
2262 expected = 0
2263 if len(self):
2263 if len(self):
2264 expected = max(0, self.end(len(self) - 1))
2264 expected = max(0, self.end(len(self) - 1))
2265
2265
2266 try:
2266 try:
2267 with self._datafp() as f:
2267 with self._datafp() as f:
2268 f.seek(0, io.SEEK_END)
2268 f.seek(0, io.SEEK_END)
2269 actual = f.tell()
2269 actual = f.tell()
2270 dd = actual - expected
2270 dd = actual - expected
2271 except IOError as inst:
2271 except IOError as inst:
2272 if inst.errno != errno.ENOENT:
2272 if inst.errno != errno.ENOENT:
2273 raise
2273 raise
2274 dd = 0
2274 dd = 0
2275
2275
2276 try:
2276 try:
2277 f = self.opener(self.indexfile)
2277 f = self.opener(self.indexfile)
2278 f.seek(0, io.SEEK_END)
2278 f.seek(0, io.SEEK_END)
2279 actual = f.tell()
2279 actual = f.tell()
2280 f.close()
2280 f.close()
2281 s = self._io.size
2281 s = self._io.size
2282 i = max(0, actual // s)
2282 i = max(0, actual // s)
2283 di = actual - (i * s)
2283 di = actual - (i * s)
2284 if self._inline:
2284 if self._inline:
2285 databytes = 0
2285 databytes = 0
2286 for r in self:
2286 for r in self:
2287 databytes += max(0, self.length(r))
2287 databytes += max(0, self.length(r))
2288 dd = 0
2288 dd = 0
2289 di = actual - len(self) * s - databytes
2289 di = actual - len(self) * s - databytes
2290 except IOError as inst:
2290 except IOError as inst:
2291 if inst.errno != errno.ENOENT:
2291 if inst.errno != errno.ENOENT:
2292 raise
2292 raise
2293 di = 0
2293 di = 0
2294
2294
2295 return (dd, di)
2295 return (dd, di)
2296
2296
2297 def files(self):
2297 def files(self):
2298 res = [self.indexfile]
2298 res = [self.indexfile]
2299 if not self._inline:
2299 if not self._inline:
2300 res.append(self.datafile)
2300 res.append(self.datafile)
2301 return res
2301 return res
2302
2302
2303 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2303 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2304 assumehaveparentrevisions=False,
2304 assumehaveparentrevisions=False,
2305 deltamode=repository.CG_DELTAMODE_STD):
2305 deltamode=repository.CG_DELTAMODE_STD):
2306 if nodesorder not in ('nodes', 'storage', 'linear', None):
2306 if nodesorder not in ('nodes', 'storage', 'linear', None):
2307 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2307 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2308 nodesorder)
2308 nodesorder)
2309
2309
2310 if nodesorder is None and not self._generaldelta:
2310 if nodesorder is None and not self._generaldelta:
2311 nodesorder = 'storage'
2311 nodesorder = 'storage'
2312
2312
2313 if (not self._storedeltachains and
2313 if (not self._storedeltachains and
2314 deltamode != repository.CG_DELTAMODE_PREV):
2314 deltamode != repository.CG_DELTAMODE_PREV):
2315 deltamode = repository.CG_DELTAMODE_FULL
2315 deltamode = repository.CG_DELTAMODE_FULL
2316
2316
2317 return storageutil.emitrevisions(
2317 return storageutil.emitrevisions(
2318 self, nodes, nodesorder, revlogrevisiondelta,
2318 self, nodes, nodesorder, revlogrevisiondelta,
2319 deltaparentfn=self.deltaparent,
2319 deltaparentfn=self.deltaparent,
2320 candeltafn=self.candelta,
2320 candeltafn=self.candelta,
2321 rawsizefn=self.rawsize,
2321 rawsizefn=self.rawsize,
2322 revdifffn=self.revdiff,
2322 revdifffn=self.revdiff,
2323 flagsfn=self.flags,
2323 flagsfn=self.flags,
2324 deltamode=deltamode,
2324 deltamode=deltamode,
2325 revisiondata=revisiondata,
2325 revisiondata=revisiondata,
2326 assumehaveparentrevisions=assumehaveparentrevisions)
2326 assumehaveparentrevisions=assumehaveparentrevisions)
2327
2327
2328 DELTAREUSEALWAYS = 'always'
2328 DELTAREUSEALWAYS = 'always'
2329 DELTAREUSESAMEREVS = 'samerevs'
2329 DELTAREUSESAMEREVS = 'samerevs'
2330 DELTAREUSENEVER = 'never'
2330 DELTAREUSENEVER = 'never'
2331
2331
2332 DELTAREUSEFULLADD = 'fulladd'
2332 DELTAREUSEFULLADD = 'fulladd'
2333
2333
2334 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2334 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2335
2335
2336 def clone(self, tr, destrevlog, addrevisioncb=None,
2336 def clone(self, tr, destrevlog, addrevisioncb=None,
2337 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2337 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2338 """Copy this revlog to another, possibly with format changes.
2338 """Copy this revlog to another, possibly with format changes.
2339
2339
2340 The destination revlog will contain the same revisions and nodes.
2340 The destination revlog will contain the same revisions and nodes.
2341 However, it may not be bit-for-bit identical due to e.g. delta encoding
2341 However, it may not be bit-for-bit identical due to e.g. delta encoding
2342 differences.
2342 differences.
2343
2343
2344 The ``deltareuse`` argument control how deltas from the existing revlog
2344 The ``deltareuse`` argument control how deltas from the existing revlog
2345 are preserved in the destination revlog. The argument can have the
2345 are preserved in the destination revlog. The argument can have the
2346 following values:
2346 following values:
2347
2347
2348 DELTAREUSEALWAYS
2348 DELTAREUSEALWAYS
2349 Deltas will always be reused (if possible), even if the destination
2349 Deltas will always be reused (if possible), even if the destination
2350 revlog would not select the same revisions for the delta. This is the
2350 revlog would not select the same revisions for the delta. This is the
2351 fastest mode of operation.
2351 fastest mode of operation.
2352 DELTAREUSESAMEREVS
2352 DELTAREUSESAMEREVS
2353 Deltas will be reused if the destination revlog would pick the same
2353 Deltas will be reused if the destination revlog would pick the same
2354 revisions for the delta. This mode strikes a balance between speed
2354 revisions for the delta. This mode strikes a balance between speed
2355 and optimization.
2355 and optimization.
2356 DELTAREUSENEVER
2356 DELTAREUSENEVER
2357 Deltas will never be reused. This is the slowest mode of execution.
2357 Deltas will never be reused. This is the slowest mode of execution.
2358 This mode can be used to recompute deltas (e.g. if the diff/delta
2358 This mode can be used to recompute deltas (e.g. if the diff/delta
2359 algorithm changes).
2359 algorithm changes).
2360 DELTAREUSEFULLADD
2360 DELTAREUSEFULLADD
2361 Revision will be re-added as if their were new content. This is
2361 Revision will be re-added as if their were new content. This is
2362 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2362 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2363 eg: large file detection and handling.
2363 eg: large file detection and handling.
2364
2364
2365 Delta computation can be slow, so the choice of delta reuse policy can
2365 Delta computation can be slow, so the choice of delta reuse policy can
2366 significantly affect run time.
2366 significantly affect run time.
2367
2367
2368 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2368 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2369 two extremes. Deltas will be reused if they are appropriate. But if the
2369 two extremes. Deltas will be reused if they are appropriate. But if the
2370 delta could choose a better revision, it will do so. This means if you
2370 delta could choose a better revision, it will do so. This means if you
2371 are converting a non-generaldelta revlog to a generaldelta revlog,
2371 are converting a non-generaldelta revlog to a generaldelta revlog,
2372 deltas will be recomputed if the delta's parent isn't a parent of the
2372 deltas will be recomputed if the delta's parent isn't a parent of the
2373 revision.
2373 revision.
2374
2374
2375 In addition to the delta policy, the ``forcedeltabothparents``
2375 In addition to the delta policy, the ``forcedeltabothparents``
2376 argument controls whether to force compute deltas against both parents
2376 argument controls whether to force compute deltas against both parents
2377 for merges. By default, the current default is used.
2377 for merges. By default, the current default is used.
2378 """
2378 """
2379 if deltareuse not in self.DELTAREUSEALL:
2379 if deltareuse not in self.DELTAREUSEALL:
2380 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2380 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2381
2381
2382 if len(destrevlog):
2382 if len(destrevlog):
2383 raise ValueError(_('destination revlog is not empty'))
2383 raise ValueError(_('destination revlog is not empty'))
2384
2384
2385 if getattr(self, 'filteredrevs', None):
2385 if getattr(self, 'filteredrevs', None):
2386 raise ValueError(_('source revlog has filtered revisions'))
2386 raise ValueError(_('source revlog has filtered revisions'))
2387 if getattr(destrevlog, 'filteredrevs', None):
2387 if getattr(destrevlog, 'filteredrevs', None):
2388 raise ValueError(_('destination revlog has filtered revisions'))
2388 raise ValueError(_('destination revlog has filtered revisions'))
2389
2389
2390 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2390 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2391 # if possible.
2391 # if possible.
2392 oldlazydelta = destrevlog._lazydelta
2392 oldlazydelta = destrevlog._lazydelta
2393 oldlazydeltabase = destrevlog._lazydeltabase
2393 oldlazydeltabase = destrevlog._lazydeltabase
2394 oldamd = destrevlog._deltabothparents
2394 oldamd = destrevlog._deltabothparents
2395
2395
2396 try:
2396 try:
2397 if deltareuse == self.DELTAREUSEALWAYS:
2397 if deltareuse == self.DELTAREUSEALWAYS:
2398 destrevlog._lazydeltabase = True
2398 destrevlog._lazydeltabase = True
2399 destrevlog._lazydelta = True
2399 destrevlog._lazydelta = True
2400 elif deltareuse == self.DELTAREUSESAMEREVS:
2400 elif deltareuse == self.DELTAREUSESAMEREVS:
2401 destrevlog._lazydeltabase = False
2401 destrevlog._lazydeltabase = False
2402 destrevlog._lazydelta = True
2402 destrevlog._lazydelta = True
2403 elif deltareuse == self.DELTAREUSENEVER:
2403 elif deltareuse == self.DELTAREUSENEVER:
2404 destrevlog._lazydeltabase = False
2404 destrevlog._lazydeltabase = False
2405 destrevlog._lazydelta = False
2405 destrevlog._lazydelta = False
2406
2406
2407 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2407 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2408
2408
2409 self._clone(tr, destrevlog, addrevisioncb, deltareuse,
2409 self._clone(tr, destrevlog, addrevisioncb, deltareuse,
2410 forcedeltabothparents)
2410 forcedeltabothparents)
2411
2411
2412 finally:
2412 finally:
2413 destrevlog._lazydelta = oldlazydelta
2413 destrevlog._lazydelta = oldlazydelta
2414 destrevlog._lazydeltabase = oldlazydeltabase
2414 destrevlog._lazydeltabase = oldlazydeltabase
2415 destrevlog._deltabothparents = oldamd
2415 destrevlog._deltabothparents = oldamd
2416
2416
2417 def _clone(self, tr, destrevlog, addrevisioncb, deltareuse,
2417 def _clone(self, tr, destrevlog, addrevisioncb, deltareuse,
2418 forcedeltabothparents):
2418 forcedeltabothparents):
2419 """perform the core duty of `revlog.clone` after parameter processing"""
2419 """perform the core duty of `revlog.clone` after parameter processing"""
2420 deltacomputer = deltautil.deltacomputer(destrevlog)
2420 deltacomputer = deltautil.deltacomputer(destrevlog)
2421 index = self.index
2421 index = self.index
2422 for rev in self:
2422 for rev in self:
2423 entry = index[rev]
2423 entry = index[rev]
2424
2424
2425 # Some classes override linkrev to take filtered revs into
2425 # Some classes override linkrev to take filtered revs into
2426 # account. Use raw entry from index.
2426 # account. Use raw entry from index.
2427 flags = entry[0] & 0xffff
2427 flags = entry[0] & 0xffff
2428 linkrev = entry[4]
2428 linkrev = entry[4]
2429 p1 = index[entry[5]][7]
2429 p1 = index[entry[5]][7]
2430 p2 = index[entry[6]][7]
2430 p2 = index[entry[6]][7]
2431 node = entry[7]
2431 node = entry[7]
2432
2432
2433 # (Possibly) reuse the delta from the revlog if allowed and
2433 # (Possibly) reuse the delta from the revlog if allowed and
2434 # the revlog chunk is a delta.
2434 # the revlog chunk is a delta.
2435 cachedelta = None
2435 cachedelta = None
2436 rawtext = None
2436 rawtext = None
2437 if deltareuse == self.DELTAREUSEFULLADD:
2437 if deltareuse == self.DELTAREUSEFULLADD:
2438 text = self.revision(rev)
2438 text = self.revision(rev)
2439 destrevlog.addrevision(text, tr, linkrev, p1, p2,
2439 destrevlog.addrevision(text, tr, linkrev, p1, p2,
2440 cachedelta=cachedelta,
2440 cachedelta=cachedelta,
2441 node=node, flags=flags,
2441 node=node, flags=flags,
2442 deltacomputer=deltacomputer)
2442 deltacomputer=deltacomputer)
2443 else:
2443 else:
2444 if destrevlog._lazydelta:
2444 if destrevlog._lazydelta:
2445 dp = self.deltaparent(rev)
2445 dp = self.deltaparent(rev)
2446 if dp != nullrev:
2446 if dp != nullrev:
2447 cachedelta = (dp, bytes(self._chunk(rev)))
2447 cachedelta = (dp, bytes(self._chunk(rev)))
2448
2448
2449 if not cachedelta:
2449 if not cachedelta:
2450 rawtext = self.rawdata(rev)
2450 rawtext = self.rawdata(rev)
2451
2451
2452 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2452 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2453 checkambig=False)
2453 checkambig=False)
2454 dfh = None
2454 dfh = None
2455 if not destrevlog._inline:
2455 if not destrevlog._inline:
2456 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2456 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2457 try:
2457 try:
2458 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2458 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2459 p2, flags, cachedelta, ifh, dfh,
2459 p2, flags, cachedelta, ifh, dfh,
2460 deltacomputer=deltacomputer)
2460 deltacomputer=deltacomputer)
2461 finally:
2461 finally:
2462 if dfh:
2462 if dfh:
2463 dfh.close()
2463 dfh.close()
2464 ifh.close()
2464 ifh.close()
2465
2465
2466 if addrevisioncb:
2466 if addrevisioncb:
2467 addrevisioncb(self, rev, node)
2467 addrevisioncb(self, rev, node)
2468
2468
2469 def censorrevision(self, tr, censornode, tombstone=b''):
2469 def censorrevision(self, tr, censornode, tombstone=b''):
2470 if (self.version & 0xFFFF) == REVLOGV0:
2470 if (self.version & 0xFFFF) == REVLOGV0:
2471 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2471 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2472 self.version)
2472 self.version)
2473
2473
2474 censorrev = self.rev(censornode)
2474 censorrev = self.rev(censornode)
2475 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2475 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2476
2476
2477 if len(tombstone) > self.rawsize(censorrev):
2477 if len(tombstone) > self.rawsize(censorrev):
2478 raise error.Abort(_('censor tombstone must be no longer than '
2478 raise error.Abort(_('censor tombstone must be no longer than '
2479 'censored data'))
2479 'censored data'))
2480
2480
2481 # Rewriting the revlog in place is hard. Our strategy for censoring is
2481 # Rewriting the revlog in place is hard. Our strategy for censoring is
2482 # to create a new revlog, copy all revisions to it, then replace the
2482 # to create a new revlog, copy all revisions to it, then replace the
2483 # revlogs on transaction close.
2483 # revlogs on transaction close.
2484
2484
2485 newindexfile = self.indexfile + b'.tmpcensored'
2485 newindexfile = self.indexfile + b'.tmpcensored'
2486 newdatafile = self.datafile + b'.tmpcensored'
2486 newdatafile = self.datafile + b'.tmpcensored'
2487
2487
2488 # This is a bit dangerous. We could easily have a mismatch of state.
2488 # This is a bit dangerous. We could easily have a mismatch of state.
2489 newrl = revlog(self.opener, newindexfile, newdatafile,
2489 newrl = revlog(self.opener, newindexfile, newdatafile,
2490 censorable=True)
2490 censorable=True)
2491 newrl.version = self.version
2491 newrl.version = self.version
2492 newrl._generaldelta = self._generaldelta
2492 newrl._generaldelta = self._generaldelta
2493 newrl._io = self._io
2493 newrl._io = self._io
2494
2494
2495 for rev in self.revs():
2495 for rev in self.revs():
2496 node = self.node(rev)
2496 node = self.node(rev)
2497 p1, p2 = self.parents(node)
2497 p1, p2 = self.parents(node)
2498
2498
2499 if rev == censorrev:
2499 if rev == censorrev:
2500 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2500 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2501 p1, p2, censornode, REVIDX_ISCENSORED)
2501 p1, p2, censornode, REVIDX_ISCENSORED)
2502
2502
2503 if newrl.deltaparent(rev) != nullrev:
2503 if newrl.deltaparent(rev) != nullrev:
2504 raise error.Abort(_('censored revision stored as delta; '
2504 raise error.Abort(_('censored revision stored as delta; '
2505 'cannot censor'),
2505 'cannot censor'),
2506 hint=_('censoring of revlogs is not '
2506 hint=_('censoring of revlogs is not '
2507 'fully implemented; please report '
2507 'fully implemented; please report '
2508 'this bug'))
2508 'this bug'))
2509 continue
2509 continue
2510
2510
2511 if self.iscensored(rev):
2511 if self.iscensored(rev):
2512 if self.deltaparent(rev) != nullrev:
2512 if self.deltaparent(rev) != nullrev:
2513 raise error.Abort(_('cannot censor due to censored '
2513 raise error.Abort(_('cannot censor due to censored '
2514 'revision having delta stored'))
2514 'revision having delta stored'))
2515 rawtext = self._chunk(rev)
2515 rawtext = self._chunk(rev)
2516 else:
2516 else:
2517 rawtext = self.rawdata(rev)
2517 rawtext = self.rawdata(rev)
2518
2518
2519 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2519 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2520 self.flags(rev))
2520 self.flags(rev))
2521
2521
2522 tr.addbackup(self.indexfile, location='store')
2522 tr.addbackup(self.indexfile, location='store')
2523 if not self._inline:
2523 if not self._inline:
2524 tr.addbackup(self.datafile, location='store')
2524 tr.addbackup(self.datafile, location='store')
2525
2525
2526 self.opener.rename(newrl.indexfile, self.indexfile)
2526 self.opener.rename(newrl.indexfile, self.indexfile)
2527 if not self._inline:
2527 if not self._inline:
2528 self.opener.rename(newrl.datafile, self.datafile)
2528 self.opener.rename(newrl.datafile, self.datafile)
2529
2529
2530 self.clearcaches()
2530 self.clearcaches()
2531 self._loadindex()
2531 self._loadindex()
2532
2532
2533 def verifyintegrity(self, state):
2533 def verifyintegrity(self, state):
2534 """Verifies the integrity of the revlog.
2534 """Verifies the integrity of the revlog.
2535
2535
2536 Yields ``revlogproblem`` instances describing problems that are
2536 Yields ``revlogproblem`` instances describing problems that are
2537 found.
2537 found.
2538 """
2538 """
2539 dd, di = self.checksize()
2539 dd, di = self.checksize()
2540 if dd:
2540 if dd:
2541 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2541 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2542 if di:
2542 if di:
2543 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2543 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2544
2544
2545 version = self.version & 0xFFFF
2545 version = self.version & 0xFFFF
2546
2546
2547 # The verifier tells us what version revlog we should be.
2547 # The verifier tells us what version revlog we should be.
2548 if version != state['expectedversion']:
2548 if version != state['expectedversion']:
2549 yield revlogproblem(
2549 yield revlogproblem(
2550 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2550 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2551 (self.indexfile, version, state['expectedversion']))
2551 (self.indexfile, version, state['expectedversion']))
2552
2552
2553 state['skipread'] = set()
2553 state['skipread'] = set()
2554
2554
2555 for rev in self:
2555 for rev in self:
2556 node = self.node(rev)
2556 node = self.node(rev)
2557
2557
2558 # Verify contents. 4 cases to care about:
2558 # Verify contents. 4 cases to care about:
2559 #
2559 #
2560 # common: the most common case
2560 # common: the most common case
2561 # rename: with a rename
2561 # rename: with a rename
2562 # meta: file content starts with b'\1\n', the metadata
2562 # meta: file content starts with b'\1\n', the metadata
2563 # header defined in filelog.py, but without a rename
2563 # header defined in filelog.py, but without a rename
2564 # ext: content stored externally
2564 # ext: content stored externally
2565 #
2565 #
2566 # More formally, their differences are shown below:
2566 # More formally, their differences are shown below:
2567 #
2567 #
2568 # | common | rename | meta | ext
2568 # | common | rename | meta | ext
2569 # -------------------------------------------------------
2569 # -------------------------------------------------------
2570 # flags() | 0 | 0 | 0 | not 0
2570 # flags() | 0 | 0 | 0 | not 0
2571 # renamed() | False | True | False | ?
2571 # renamed() | False | True | False | ?
2572 # rawtext[0:2]=='\1\n'| False | True | True | ?
2572 # rawtext[0:2]=='\1\n'| False | True | True | ?
2573 #
2573 #
2574 # "rawtext" means the raw text stored in revlog data, which
2574 # "rawtext" means the raw text stored in revlog data, which
2575 # could be retrieved by "rawdata(rev)". "text"
2575 # could be retrieved by "rawdata(rev)". "text"
2576 # mentioned below is "revision(rev)".
2576 # mentioned below is "revision(rev)".
2577 #
2577 #
2578 # There are 3 different lengths stored physically:
2578 # There are 3 different lengths stored physically:
2579 # 1. L1: rawsize, stored in revlog index
2579 # 1. L1: rawsize, stored in revlog index
2580 # 2. L2: len(rawtext), stored in revlog data
2580 # 2. L2: len(rawtext), stored in revlog data
2581 # 3. L3: len(text), stored in revlog data if flags==0, or
2581 # 3. L3: len(text), stored in revlog data if flags==0, or
2582 # possibly somewhere else if flags!=0
2582 # possibly somewhere else if flags!=0
2583 #
2583 #
2584 # L1 should be equal to L2. L3 could be different from them.
2584 # L1 should be equal to L2. L3 could be different from them.
2585 # "text" may or may not affect commit hash depending on flag
2585 # "text" may or may not affect commit hash depending on flag
2586 # processors (see flagutil.addflagprocessor).
2586 # processors (see flagutil.addflagprocessor).
2587 #
2587 #
2588 # | common | rename | meta | ext
2588 # | common | rename | meta | ext
2589 # -------------------------------------------------
2589 # -------------------------------------------------
2590 # rawsize() | L1 | L1 | L1 | L1
2590 # rawsize() | L1 | L1 | L1 | L1
2591 # size() | L1 | L2-LM | L1(*) | L1 (?)
2591 # size() | L1 | L2-LM | L1(*) | L1 (?)
2592 # len(rawtext) | L2 | L2 | L2 | L2
2592 # len(rawtext) | L2 | L2 | L2 | L2
2593 # len(text) | L2 | L2 | L2 | L3
2593 # len(text) | L2 | L2 | L2 | L3
2594 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2594 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2595 #
2595 #
2596 # LM: length of metadata, depending on rawtext
2596 # LM: length of metadata, depending on rawtext
2597 # (*): not ideal, see comment in filelog.size
2597 # (*): not ideal, see comment in filelog.size
2598 # (?): could be "- len(meta)" if the resolved content has
2598 # (?): could be "- len(meta)" if the resolved content has
2599 # rename metadata
2599 # rename metadata
2600 #
2600 #
2601 # Checks needed to be done:
2601 # Checks needed to be done:
2602 # 1. length check: L1 == L2, in all cases.
2602 # 1. length check: L1 == L2, in all cases.
2603 # 2. hash check: depending on flag processor, we may need to
2603 # 2. hash check: depending on flag processor, we may need to
2604 # use either "text" (external), or "rawtext" (in revlog).
2604 # use either "text" (external), or "rawtext" (in revlog).
2605
2605
2606 try:
2606 try:
2607 skipflags = state.get('skipflags', 0)
2607 skipflags = state.get('skipflags', 0)
2608 if skipflags:
2608 if skipflags:
2609 skipflags &= self.flags(rev)
2609 skipflags &= self.flags(rev)
2610
2610
2611 if skipflags:
2611 if skipflags:
2612 state['skipread'].add(node)
2612 state['skipread'].add(node)
2613 else:
2613 else:
2614 # Side-effect: read content and verify hash.
2614 # Side-effect: read content and verify hash.
2615 self.revision(node)
2615 self.revision(node)
2616
2616
2617 l1 = self.rawsize(rev)
2617 l1 = self.rawsize(rev)
2618 l2 = len(self.rawdata(node))
2618 l2 = len(self.rawdata(node))
2619
2619
2620 if l1 != l2:
2620 if l1 != l2:
2621 yield revlogproblem(
2621 yield revlogproblem(
2622 error=_('unpacked size is %d, %d expected') % (l2, l1),
2622 error=_('unpacked size is %d, %d expected') % (l2, l1),
2623 node=node)
2623 node=node)
2624
2624
2625 except error.CensoredNodeError:
2625 except error.CensoredNodeError:
2626 if state['erroroncensored']:
2626 if state['erroroncensored']:
2627 yield revlogproblem(error=_('censored file data'),
2627 yield revlogproblem(error=_('censored file data'),
2628 node=node)
2628 node=node)
2629 state['skipread'].add(node)
2629 state['skipread'].add(node)
2630 except Exception as e:
2630 except Exception as e:
2631 yield revlogproblem(
2631 yield revlogproblem(
2632 error=_('unpacking %s: %s') % (short(node),
2632 error=_('unpacking %s: %s') % (short(node),
2633 stringutil.forcebytestr(e)),
2633 stringutil.forcebytestr(e)),
2634 node=node)
2634 node=node)
2635 state['skipread'].add(node)
2635 state['skipread'].add(node)
2636
2636
2637 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2637 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2638 revisionscount=False, trackedsize=False,
2638 revisionscount=False, trackedsize=False,
2639 storedsize=False):
2639 storedsize=False):
2640 d = {}
2640 d = {}
2641
2641
2642 if exclusivefiles:
2642 if exclusivefiles:
2643 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2643 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2644 if not self._inline:
2644 if not self._inline:
2645 d['exclusivefiles'].append((self.opener, self.datafile))
2645 d['exclusivefiles'].append((self.opener, self.datafile))
2646
2646
2647 if sharedfiles:
2647 if sharedfiles:
2648 d['sharedfiles'] = []
2648 d['sharedfiles'] = []
2649
2649
2650 if revisionscount:
2650 if revisionscount:
2651 d['revisionscount'] = len(self)
2651 d['revisionscount'] = len(self)
2652
2652
2653 if trackedsize:
2653 if trackedsize:
2654 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2654 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2655
2655
2656 if storedsize:
2656 if storedsize:
2657 d['storedsize'] = sum(self.opener.stat(path).st_size
2657 d['storedsize'] = sum(self.opener.stat(path).st_size
2658 for path in self.files())
2658 for path in self.files())
2659
2659
2660 return d
2660 return d
@@ -1,230 +1,231 b''
1 # statichttprepo.py - simple http repository class for mercurial
1 # statichttprepo.py - simple http repository class for mercurial
2 #
2 #
3 # This provides read-only repo access to repositories exported via static http
3 # This provides read-only repo access to repositories exported via static http
4 #
4 #
5 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import errno
12 import errno
13
13
14 from .i18n import _
14 from .i18n import _
15 from . import (
15 from . import (
16 branchmap,
16 branchmap,
17 changelog,
17 changelog,
18 error,
18 error,
19 localrepo,
19 localrepo,
20 manifest,
20 manifest,
21 namespaces,
21 namespaces,
22 pathutil,
22 pathutil,
23 pycompat,
23 pycompat,
24 url,
24 url,
25 util,
25 util,
26 vfs as vfsmod,
26 vfs as vfsmod,
27 )
27 )
28
28
29 urlerr = util.urlerr
29 urlerr = util.urlerr
30 urlreq = util.urlreq
30 urlreq = util.urlreq
31
31
32 class httprangereader(object):
32 class httprangereader(object):
33 def __init__(self, url, opener):
33 def __init__(self, url, opener):
34 # we assume opener has HTTPRangeHandler
34 # we assume opener has HTTPRangeHandler
35 self.url = url
35 self.url = url
36 self.pos = 0
36 self.pos = 0
37 self.opener = opener
37 self.opener = opener
38 self.name = url
38 self.name = url
39
39
40 def __enter__(self):
40 def __enter__(self):
41 return self
41 return self
42
42
43 def __exit__(self, exc_type, exc_value, traceback):
43 def __exit__(self, exc_type, exc_value, traceback):
44 self.close()
44 self.close()
45
45
46 def seek(self, pos):
46 def seek(self, pos):
47 self.pos = pos
47 self.pos = pos
48 def read(self, bytes=None):
48 def read(self, bytes=None):
49 req = urlreq.request(pycompat.strurl(self.url))
49 req = urlreq.request(pycompat.strurl(self.url))
50 end = ''
50 end = ''
51 if bytes:
51 if bytes:
52 end = self.pos + bytes - 1
52 end = self.pos + bytes - 1
53 if self.pos or end:
53 if self.pos or end:
54 req.add_header(r'Range', r'bytes=%d-%s' % (self.pos, end))
54 req.add_header(r'Range', r'bytes=%d-%s' % (self.pos, end))
55
55
56 try:
56 try:
57 f = self.opener.open(req)
57 f = self.opener.open(req)
58 data = f.read()
58 data = f.read()
59 code = f.code
59 code = f.code
60 except urlerr.httperror as inst:
60 except urlerr.httperror as inst:
61 num = inst.code == 404 and errno.ENOENT or None
61 num = inst.code == 404 and errno.ENOENT or None
62 raise IOError(num, inst)
62 raise IOError(num, inst)
63 except urlerr.urlerror as inst:
63 except urlerr.urlerror as inst:
64 raise IOError(None, inst.reason)
64 raise IOError(None, inst.reason)
65
65
66 if code == 200:
66 if code == 200:
67 # HTTPRangeHandler does nothing if remote does not support
67 # HTTPRangeHandler does nothing if remote does not support
68 # Range headers and returns the full entity. Let's slice it.
68 # Range headers and returns the full entity. Let's slice it.
69 if bytes:
69 if bytes:
70 data = data[self.pos:self.pos + bytes]
70 data = data[self.pos:self.pos + bytes]
71 else:
71 else:
72 data = data[self.pos:]
72 data = data[self.pos:]
73 elif bytes:
73 elif bytes:
74 data = data[:bytes]
74 data = data[:bytes]
75 self.pos += len(data)
75 self.pos += len(data)
76 return data
76 return data
77 def readlines(self):
77 def readlines(self):
78 return self.read().splitlines(True)
78 return self.read().splitlines(True)
79 def __iter__(self):
79 def __iter__(self):
80 return iter(self.readlines())
80 return iter(self.readlines())
81 def close(self):
81 def close(self):
82 pass
82 pass
83
83
84 # _RangeError and _HTTPRangeHandler were originally in byterange.py,
84 # _RangeError and _HTTPRangeHandler were originally in byterange.py,
85 # which was itself extracted from urlgrabber. See the last version of
85 # which was itself extracted from urlgrabber. See the last version of
86 # byterange.py from history if you need more information.
86 # byterange.py from history if you need more information.
87 class _RangeError(IOError):
87 class _RangeError(IOError):
88 """Error raised when an unsatisfiable range is requested."""
88 """Error raised when an unsatisfiable range is requested."""
89
89
90 class _HTTPRangeHandler(urlreq.basehandler):
90 class _HTTPRangeHandler(urlreq.basehandler):
91 """Handler that enables HTTP Range headers.
91 """Handler that enables HTTP Range headers.
92
92
93 This was extremely simple. The Range header is a HTTP feature to
93 This was extremely simple. The Range header is a HTTP feature to
94 begin with so all this class does is tell urllib2 that the
94 begin with so all this class does is tell urllib2 that the
95 "206 Partial Content" response from the HTTP server is what we
95 "206 Partial Content" response from the HTTP server is what we
96 expected.
96 expected.
97 """
97 """
98
98
99 def http_error_206(self, req, fp, code, msg, hdrs):
99 def http_error_206(self, req, fp, code, msg, hdrs):
100 # 206 Partial Content Response
100 # 206 Partial Content Response
101 r = urlreq.addinfourl(fp, hdrs, req.get_full_url())
101 r = urlreq.addinfourl(fp, hdrs, req.get_full_url())
102 r.code = code
102 r.code = code
103 r.msg = msg
103 r.msg = msg
104 return r
104 return r
105
105
106 def http_error_416(self, req, fp, code, msg, hdrs):
106 def http_error_416(self, req, fp, code, msg, hdrs):
107 # HTTP's Range Not Satisfiable error
107 # HTTP's Range Not Satisfiable error
108 raise _RangeError('Requested Range Not Satisfiable')
108 raise _RangeError('Requested Range Not Satisfiable')
109
109
110 def build_opener(ui, authinfo):
110 def build_opener(ui, authinfo):
111 # urllib cannot handle URLs with embedded user or passwd
111 # urllib cannot handle URLs with embedded user or passwd
112 urlopener = url.opener(ui, authinfo)
112 urlopener = url.opener(ui, authinfo)
113 urlopener.add_handler(_HTTPRangeHandler())
113 urlopener.add_handler(_HTTPRangeHandler())
114
114
115 class statichttpvfs(vfsmod.abstractvfs):
115 class statichttpvfs(vfsmod.abstractvfs):
116 def __init__(self, base):
116 def __init__(self, base):
117 self.base = base
117 self.base = base
118 self.options = {}
118
119
119 def __call__(self, path, mode='r', *args, **kw):
120 def __call__(self, path, mode='r', *args, **kw):
120 if mode not in ('r', 'rb'):
121 if mode not in ('r', 'rb'):
121 raise IOError('Permission denied')
122 raise IOError('Permission denied')
122 f = "/".join((self.base, urlreq.quote(path)))
123 f = "/".join((self.base, urlreq.quote(path)))
123 return httprangereader(f, urlopener)
124 return httprangereader(f, urlopener)
124
125
125 def join(self, path):
126 def join(self, path):
126 if path:
127 if path:
127 return pathutil.join(self.base, path)
128 return pathutil.join(self.base, path)
128 else:
129 else:
129 return self.base
130 return self.base
130
131
131 return statichttpvfs
132 return statichttpvfs
132
133
133 class statichttppeer(localrepo.localpeer):
134 class statichttppeer(localrepo.localpeer):
134 def local(self):
135 def local(self):
135 return None
136 return None
136 def canpush(self):
137 def canpush(self):
137 return False
138 return False
138
139
139 class statichttprepository(localrepo.localrepository,
140 class statichttprepository(localrepo.localrepository,
140 localrepo.revlogfilestorage):
141 localrepo.revlogfilestorage):
141 supported = localrepo.localrepository._basesupported
142 supported = localrepo.localrepository._basesupported
142
143
143 def __init__(self, ui, path):
144 def __init__(self, ui, path):
144 self._url = path
145 self._url = path
145 self.ui = ui
146 self.ui = ui
146
147
147 self.root = path
148 self.root = path
148 u = util.url(path.rstrip('/') + "/.hg")
149 u = util.url(path.rstrip('/') + "/.hg")
149 self.path, authinfo = u.authinfo()
150 self.path, authinfo = u.authinfo()
150
151
151 vfsclass = build_opener(ui, authinfo)
152 vfsclass = build_opener(ui, authinfo)
152 self.vfs = vfsclass(self.path)
153 self.vfs = vfsclass(self.path)
153 self.cachevfs = vfsclass(self.vfs.join('cache'))
154 self.cachevfs = vfsclass(self.vfs.join('cache'))
154 self._phasedefaults = []
155 self._phasedefaults = []
155
156
156 self.names = namespaces.namespaces()
157 self.names = namespaces.namespaces()
157 self.filtername = None
158 self.filtername = None
158 self._extrafilterid = None
159 self._extrafilterid = None
159
160
160 try:
161 try:
161 requirements = set(self.vfs.read(b'requires').splitlines())
162 requirements = set(self.vfs.read(b'requires').splitlines())
162 except IOError as inst:
163 except IOError as inst:
163 if inst.errno != errno.ENOENT:
164 if inst.errno != errno.ENOENT:
164 raise
165 raise
165 requirements = set()
166 requirements = set()
166
167
167 # check if it is a non-empty old-style repository
168 # check if it is a non-empty old-style repository
168 try:
169 try:
169 fp = self.vfs("00changelog.i")
170 fp = self.vfs("00changelog.i")
170 fp.read(1)
171 fp.read(1)
171 fp.close()
172 fp.close()
172 except IOError as inst:
173 except IOError as inst:
173 if inst.errno != errno.ENOENT:
174 if inst.errno != errno.ENOENT:
174 raise
175 raise
175 # we do not care about empty old-style repositories here
176 # we do not care about empty old-style repositories here
176 msg = _("'%s' does not appear to be an hg repository") % path
177 msg = _("'%s' does not appear to be an hg repository") % path
177 raise error.RepoError(msg)
178 raise error.RepoError(msg)
178
179
179 supportedrequirements = localrepo.gathersupportedrequirements(ui)
180 supportedrequirements = localrepo.gathersupportedrequirements(ui)
180 localrepo.ensurerequirementsrecognized(requirements,
181 localrepo.ensurerequirementsrecognized(requirements,
181 supportedrequirements)
182 supportedrequirements)
182 localrepo.ensurerequirementscompatible(ui, requirements)
183 localrepo.ensurerequirementscompatible(ui, requirements)
183
184
184 # setup store
185 # setup store
185 self.store = localrepo.makestore(requirements, self.path, vfsclass)
186 self.store = localrepo.makestore(requirements, self.path, vfsclass)
186 self.spath = self.store.path
187 self.spath = self.store.path
187 self.svfs = self.store.opener
188 self.svfs = self.store.opener
188 self.sjoin = self.store.join
189 self.sjoin = self.store.join
189 self._filecache = {}
190 self._filecache = {}
190 self.requirements = requirements
191 self.requirements = requirements
191
192
192 rootmanifest = manifest.manifestrevlog(self.svfs)
193 rootmanifest = manifest.manifestrevlog(self.svfs)
193 self.manifestlog = manifest.manifestlog(self.svfs, self, rootmanifest,
194 self.manifestlog = manifest.manifestlog(self.svfs, self, rootmanifest,
194 self.narrowmatch())
195 self.narrowmatch())
195 self.changelog = changelog.changelog(self.svfs)
196 self.changelog = changelog.changelog(self.svfs)
196 self._tags = None
197 self._tags = None
197 self.nodetagscache = None
198 self.nodetagscache = None
198 self._branchcaches = branchmap.BranchMapCache()
199 self._branchcaches = branchmap.BranchMapCache()
199 self._revbranchcache = None
200 self._revbranchcache = None
200 self.encodepats = None
201 self.encodepats = None
201 self.decodepats = None
202 self.decodepats = None
202 self._transref = None
203 self._transref = None
203
204
204 def _restrictcapabilities(self, caps):
205 def _restrictcapabilities(self, caps):
205 caps = super(statichttprepository, self)._restrictcapabilities(caps)
206 caps = super(statichttprepository, self)._restrictcapabilities(caps)
206 return caps.difference(["pushkey"])
207 return caps.difference(["pushkey"])
207
208
208 def url(self):
209 def url(self):
209 return self._url
210 return self._url
210
211
211 def local(self):
212 def local(self):
212 return False
213 return False
213
214
214 def peer(self):
215 def peer(self):
215 return statichttppeer(self)
216 return statichttppeer(self)
216
217
217 def wlock(self, wait=True):
218 def wlock(self, wait=True):
218 raise error.LockUnavailable(0, _('lock not available'), 'lock',
219 raise error.LockUnavailable(0, _('lock not available'), 'lock',
219 _('cannot lock static-http repository'))
220 _('cannot lock static-http repository'))
220
221
221 def lock(self, wait=True):
222 def lock(self, wait=True):
222 raise error.Abort(_('cannot lock static-http repository'))
223 raise error.Abort(_('cannot lock static-http repository'))
223
224
224 def _writecaches(self):
225 def _writecaches(self):
225 pass # statichttprepository are read only
226 pass # statichttprepository are read only
226
227
227 def instance(ui, path, create, intents=None, createopts=None):
228 def instance(ui, path, create, intents=None, createopts=None):
228 if create:
229 if create:
229 raise error.Abort(_('cannot create new static-http repository'))
230 raise error.Abort(_('cannot create new static-http repository'))
230 return statichttprepository(ui, path[7:])
231 return statichttprepository(ui, path[7:])
@@ -1,674 +1,675 b''
1 # vfs.py - Mercurial 'vfs' classes
1 # vfs.py - Mercurial 'vfs' classes
2 #
2 #
3 # Copyright Matt Mackall <mpm@selenic.com>
3 # Copyright Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7 from __future__ import absolute_import
7 from __future__ import absolute_import
8
8
9 import contextlib
9 import contextlib
10 import errno
10 import errno
11 import os
11 import os
12 import shutil
12 import shutil
13 import stat
13 import stat
14 import threading
14 import threading
15
15
16 from .i18n import _
16 from .i18n import _
17 from . import (
17 from . import (
18 encoding,
18 encoding,
19 error,
19 error,
20 pathutil,
20 pathutil,
21 pycompat,
21 pycompat,
22 util,
22 util,
23 )
23 )
24
24
25 def _avoidambig(path, oldstat):
25 def _avoidambig(path, oldstat):
26 """Avoid file stat ambiguity forcibly
26 """Avoid file stat ambiguity forcibly
27
27
28 This function causes copying ``path`` file, if it is owned by
28 This function causes copying ``path`` file, if it is owned by
29 another (see issue5418 and issue5584 for detail).
29 another (see issue5418 and issue5584 for detail).
30 """
30 """
31 def checkandavoid():
31 def checkandavoid():
32 newstat = util.filestat.frompath(path)
32 newstat = util.filestat.frompath(path)
33 # return whether file stat ambiguity is (already) avoided
33 # return whether file stat ambiguity is (already) avoided
34 return (not newstat.isambig(oldstat) or
34 return (not newstat.isambig(oldstat) or
35 newstat.avoidambig(path, oldstat))
35 newstat.avoidambig(path, oldstat))
36 if not checkandavoid():
36 if not checkandavoid():
37 # simply copy to change owner of path to get privilege to
37 # simply copy to change owner of path to get privilege to
38 # advance mtime (see issue5418)
38 # advance mtime (see issue5418)
39 util.rename(util.mktempcopy(path), path)
39 util.rename(util.mktempcopy(path), path)
40 checkandavoid()
40 checkandavoid()
41
41
42 class abstractvfs(object):
42 class abstractvfs(object):
43 """Abstract base class; cannot be instantiated"""
43 """Abstract base class; cannot be instantiated"""
44
44
45 def __init__(self, *args, **kwargs):
45 def __init__(self, *args, **kwargs):
46 '''Prevent instantiation; don't call this from subclasses.'''
46 '''Prevent instantiation; don't call this from subclasses.'''
47 raise NotImplementedError('attempted instantiating ' + str(type(self)))
47 raise NotImplementedError('attempted instantiating ' + str(type(self)))
48
48
49 def _auditpath(self, path, mode):
49 def _auditpath(self, path, mode):
50 raise NotImplementedError
50 raise NotImplementedError
51
51
52 def tryread(self, path):
52 def tryread(self, path):
53 '''gracefully return an empty string for missing files'''
53 '''gracefully return an empty string for missing files'''
54 try:
54 try:
55 return self.read(path)
55 return self.read(path)
56 except IOError as inst:
56 except IOError as inst:
57 if inst.errno != errno.ENOENT:
57 if inst.errno != errno.ENOENT:
58 raise
58 raise
59 return ""
59 return ""
60
60
61 def tryreadlines(self, path, mode='rb'):
61 def tryreadlines(self, path, mode='rb'):
62 '''gracefully return an empty array for missing files'''
62 '''gracefully return an empty array for missing files'''
63 try:
63 try:
64 return self.readlines(path, mode=mode)
64 return self.readlines(path, mode=mode)
65 except IOError as inst:
65 except IOError as inst:
66 if inst.errno != errno.ENOENT:
66 if inst.errno != errno.ENOENT:
67 raise
67 raise
68 return []
68 return []
69
69
70 @util.propertycache
70 @util.propertycache
71 def open(self):
71 def open(self):
72 '''Open ``path`` file, which is relative to vfs root.
72 '''Open ``path`` file, which is relative to vfs root.
73
73
74 Newly created directories are marked as "not to be indexed by
74 Newly created directories are marked as "not to be indexed by
75 the content indexing service", if ``notindexed`` is specified
75 the content indexing service", if ``notindexed`` is specified
76 for "write" mode access.
76 for "write" mode access.
77 '''
77 '''
78 return self.__call__
78 return self.__call__
79
79
80 def read(self, path):
80 def read(self, path):
81 with self(path, 'rb') as fp:
81 with self(path, 'rb') as fp:
82 return fp.read()
82 return fp.read()
83
83
84 def readlines(self, path, mode='rb'):
84 def readlines(self, path, mode='rb'):
85 with self(path, mode=mode) as fp:
85 with self(path, mode=mode) as fp:
86 return fp.readlines()
86 return fp.readlines()
87
87
88 def write(self, path, data, backgroundclose=False, **kwargs):
88 def write(self, path, data, backgroundclose=False, **kwargs):
89 with self(path, 'wb', backgroundclose=backgroundclose, **kwargs) as fp:
89 with self(path, 'wb', backgroundclose=backgroundclose, **kwargs) as fp:
90 return fp.write(data)
90 return fp.write(data)
91
91
92 def writelines(self, path, data, mode='wb', notindexed=False):
92 def writelines(self, path, data, mode='wb', notindexed=False):
93 with self(path, mode=mode, notindexed=notindexed) as fp:
93 with self(path, mode=mode, notindexed=notindexed) as fp:
94 return fp.writelines(data)
94 return fp.writelines(data)
95
95
96 def append(self, path, data):
96 def append(self, path, data):
97 with self(path, 'ab') as fp:
97 with self(path, 'ab') as fp:
98 return fp.write(data)
98 return fp.write(data)
99
99
100 def basename(self, path):
100 def basename(self, path):
101 """return base element of a path (as os.path.basename would do)
101 """return base element of a path (as os.path.basename would do)
102
102
103 This exists to allow handling of strange encoding if needed."""
103 This exists to allow handling of strange encoding if needed."""
104 return os.path.basename(path)
104 return os.path.basename(path)
105
105
106 def chmod(self, path, mode):
106 def chmod(self, path, mode):
107 return os.chmod(self.join(path), mode)
107 return os.chmod(self.join(path), mode)
108
108
109 def dirname(self, path):
109 def dirname(self, path):
110 """return dirname element of a path (as os.path.dirname would do)
110 """return dirname element of a path (as os.path.dirname would do)
111
111
112 This exists to allow handling of strange encoding if needed."""
112 This exists to allow handling of strange encoding if needed."""
113 return os.path.dirname(path)
113 return os.path.dirname(path)
114
114
115 def exists(self, path=None):
115 def exists(self, path=None):
116 return os.path.exists(self.join(path))
116 return os.path.exists(self.join(path))
117
117
118 def fstat(self, fp):
118 def fstat(self, fp):
119 return util.fstat(fp)
119 return util.fstat(fp)
120
120
121 def isdir(self, path=None):
121 def isdir(self, path=None):
122 return os.path.isdir(self.join(path))
122 return os.path.isdir(self.join(path))
123
123
124 def isfile(self, path=None):
124 def isfile(self, path=None):
125 return os.path.isfile(self.join(path))
125 return os.path.isfile(self.join(path))
126
126
127 def islink(self, path=None):
127 def islink(self, path=None):
128 return os.path.islink(self.join(path))
128 return os.path.islink(self.join(path))
129
129
130 def isfileorlink(self, path=None):
130 def isfileorlink(self, path=None):
131 '''return whether path is a regular file or a symlink
131 '''return whether path is a regular file or a symlink
132
132
133 Unlike isfile, this doesn't follow symlinks.'''
133 Unlike isfile, this doesn't follow symlinks.'''
134 try:
134 try:
135 st = self.lstat(path)
135 st = self.lstat(path)
136 except OSError:
136 except OSError:
137 return False
137 return False
138 mode = st.st_mode
138 mode = st.st_mode
139 return stat.S_ISREG(mode) or stat.S_ISLNK(mode)
139 return stat.S_ISREG(mode) or stat.S_ISLNK(mode)
140
140
141 def reljoin(self, *paths):
141 def reljoin(self, *paths):
142 """join various elements of a path together (as os.path.join would do)
142 """join various elements of a path together (as os.path.join would do)
143
143
144 The vfs base is not injected so that path stay relative. This exists
144 The vfs base is not injected so that path stay relative. This exists
145 to allow handling of strange encoding if needed."""
145 to allow handling of strange encoding if needed."""
146 return os.path.join(*paths)
146 return os.path.join(*paths)
147
147
148 def split(self, path):
148 def split(self, path):
149 """split top-most element of a path (as os.path.split would do)
149 """split top-most element of a path (as os.path.split would do)
150
150
151 This exists to allow handling of strange encoding if needed."""
151 This exists to allow handling of strange encoding if needed."""
152 return os.path.split(path)
152 return os.path.split(path)
153
153
154 def lexists(self, path=None):
154 def lexists(self, path=None):
155 return os.path.lexists(self.join(path))
155 return os.path.lexists(self.join(path))
156
156
157 def lstat(self, path=None):
157 def lstat(self, path=None):
158 return os.lstat(self.join(path))
158 return os.lstat(self.join(path))
159
159
160 def listdir(self, path=None):
160 def listdir(self, path=None):
161 return os.listdir(self.join(path))
161 return os.listdir(self.join(path))
162
162
163 def makedir(self, path=None, notindexed=True):
163 def makedir(self, path=None, notindexed=True):
164 return util.makedir(self.join(path), notindexed)
164 return util.makedir(self.join(path), notindexed)
165
165
166 def makedirs(self, path=None, mode=None):
166 def makedirs(self, path=None, mode=None):
167 return util.makedirs(self.join(path), mode)
167 return util.makedirs(self.join(path), mode)
168
168
169 def makelock(self, info, path):
169 def makelock(self, info, path):
170 return util.makelock(info, self.join(path))
170 return util.makelock(info, self.join(path))
171
171
172 def mkdir(self, path=None):
172 def mkdir(self, path=None):
173 return os.mkdir(self.join(path))
173 return os.mkdir(self.join(path))
174
174
175 def mkstemp(self, suffix='', prefix='tmp', dir=None):
175 def mkstemp(self, suffix='', prefix='tmp', dir=None):
176 fd, name = pycompat.mkstemp(suffix=suffix, prefix=prefix,
176 fd, name = pycompat.mkstemp(suffix=suffix, prefix=prefix,
177 dir=self.join(dir))
177 dir=self.join(dir))
178 dname, fname = util.split(name)
178 dname, fname = util.split(name)
179 if dir:
179 if dir:
180 return fd, os.path.join(dir, fname)
180 return fd, os.path.join(dir, fname)
181 else:
181 else:
182 return fd, fname
182 return fd, fname
183
183
184 def readdir(self, path=None, stat=None, skip=None):
184 def readdir(self, path=None, stat=None, skip=None):
185 return util.listdir(self.join(path), stat, skip)
185 return util.listdir(self.join(path), stat, skip)
186
186
187 def readlock(self, path):
187 def readlock(self, path):
188 return util.readlock(self.join(path))
188 return util.readlock(self.join(path))
189
189
190 def rename(self, src, dst, checkambig=False):
190 def rename(self, src, dst, checkambig=False):
191 """Rename from src to dst
191 """Rename from src to dst
192
192
193 checkambig argument is used with util.filestat, and is useful
193 checkambig argument is used with util.filestat, and is useful
194 only if destination file is guarded by any lock
194 only if destination file is guarded by any lock
195 (e.g. repo.lock or repo.wlock).
195 (e.g. repo.lock or repo.wlock).
196
196
197 To avoid file stat ambiguity forcibly, checkambig=True involves
197 To avoid file stat ambiguity forcibly, checkambig=True involves
198 copying ``src`` file, if it is owned by another. Therefore, use
198 copying ``src`` file, if it is owned by another. Therefore, use
199 checkambig=True only in limited cases (see also issue5418 and
199 checkambig=True only in limited cases (see also issue5418 and
200 issue5584 for detail).
200 issue5584 for detail).
201 """
201 """
202 self._auditpath(dst, 'w')
202 self._auditpath(dst, 'w')
203 srcpath = self.join(src)
203 srcpath = self.join(src)
204 dstpath = self.join(dst)
204 dstpath = self.join(dst)
205 oldstat = checkambig and util.filestat.frompath(dstpath)
205 oldstat = checkambig and util.filestat.frompath(dstpath)
206 if oldstat and oldstat.stat:
206 if oldstat and oldstat.stat:
207 ret = util.rename(srcpath, dstpath)
207 ret = util.rename(srcpath, dstpath)
208 _avoidambig(dstpath, oldstat)
208 _avoidambig(dstpath, oldstat)
209 return ret
209 return ret
210 return util.rename(srcpath, dstpath)
210 return util.rename(srcpath, dstpath)
211
211
212 def readlink(self, path):
212 def readlink(self, path):
213 return util.readlink(self.join(path))
213 return util.readlink(self.join(path))
214
214
215 def removedirs(self, path=None):
215 def removedirs(self, path=None):
216 """Remove a leaf directory and all empty intermediate ones
216 """Remove a leaf directory and all empty intermediate ones
217 """
217 """
218 return util.removedirs(self.join(path))
218 return util.removedirs(self.join(path))
219
219
220 def rmdir(self, path=None):
220 def rmdir(self, path=None):
221 """Remove an empty directory."""
221 """Remove an empty directory."""
222 return os.rmdir(self.join(path))
222 return os.rmdir(self.join(path))
223
223
224 def rmtree(self, path=None, ignore_errors=False, forcibly=False):
224 def rmtree(self, path=None, ignore_errors=False, forcibly=False):
225 """Remove a directory tree recursively
225 """Remove a directory tree recursively
226
226
227 If ``forcibly``, this tries to remove READ-ONLY files, too.
227 If ``forcibly``, this tries to remove READ-ONLY files, too.
228 """
228 """
229 if forcibly:
229 if forcibly:
230 def onerror(function, path, excinfo):
230 def onerror(function, path, excinfo):
231 if function is not os.remove:
231 if function is not os.remove:
232 raise
232 raise
233 # read-only files cannot be unlinked under Windows
233 # read-only files cannot be unlinked under Windows
234 s = os.stat(path)
234 s = os.stat(path)
235 if (s.st_mode & stat.S_IWRITE) != 0:
235 if (s.st_mode & stat.S_IWRITE) != 0:
236 raise
236 raise
237 os.chmod(path, stat.S_IMODE(s.st_mode) | stat.S_IWRITE)
237 os.chmod(path, stat.S_IMODE(s.st_mode) | stat.S_IWRITE)
238 os.remove(path)
238 os.remove(path)
239 else:
239 else:
240 onerror = None
240 onerror = None
241 return shutil.rmtree(self.join(path),
241 return shutil.rmtree(self.join(path),
242 ignore_errors=ignore_errors, onerror=onerror)
242 ignore_errors=ignore_errors, onerror=onerror)
243
243
244 def setflags(self, path, l, x):
244 def setflags(self, path, l, x):
245 return util.setflags(self.join(path), l, x)
245 return util.setflags(self.join(path), l, x)
246
246
247 def stat(self, path=None):
247 def stat(self, path=None):
248 return os.stat(self.join(path))
248 return os.stat(self.join(path))
249
249
250 def unlink(self, path=None):
250 def unlink(self, path=None):
251 return util.unlink(self.join(path))
251 return util.unlink(self.join(path))
252
252
253 def tryunlink(self, path=None):
253 def tryunlink(self, path=None):
254 """Attempt to remove a file, ignoring missing file errors."""
254 """Attempt to remove a file, ignoring missing file errors."""
255 util.tryunlink(self.join(path))
255 util.tryunlink(self.join(path))
256
256
257 def unlinkpath(self, path=None, ignoremissing=False, rmdir=True):
257 def unlinkpath(self, path=None, ignoremissing=False, rmdir=True):
258 return util.unlinkpath(self.join(path), ignoremissing=ignoremissing,
258 return util.unlinkpath(self.join(path), ignoremissing=ignoremissing,
259 rmdir=rmdir)
259 rmdir=rmdir)
260
260
261 def utime(self, path=None, t=None):
261 def utime(self, path=None, t=None):
262 return os.utime(self.join(path), t)
262 return os.utime(self.join(path), t)
263
263
264 def walk(self, path=None, onerror=None):
264 def walk(self, path=None, onerror=None):
265 """Yield (dirpath, dirs, files) tuple for each directories under path
265 """Yield (dirpath, dirs, files) tuple for each directories under path
266
266
267 ``dirpath`` is relative one from the root of this vfs. This
267 ``dirpath`` is relative one from the root of this vfs. This
268 uses ``os.sep`` as path separator, even you specify POSIX
268 uses ``os.sep`` as path separator, even you specify POSIX
269 style ``path``.
269 style ``path``.
270
270
271 "The root of this vfs" is represented as empty ``dirpath``.
271 "The root of this vfs" is represented as empty ``dirpath``.
272 """
272 """
273 root = os.path.normpath(self.join(None))
273 root = os.path.normpath(self.join(None))
274 # when dirpath == root, dirpath[prefixlen:] becomes empty
274 # when dirpath == root, dirpath[prefixlen:] becomes empty
275 # because len(dirpath) < prefixlen.
275 # because len(dirpath) < prefixlen.
276 prefixlen = len(pathutil.normasprefix(root))
276 prefixlen = len(pathutil.normasprefix(root))
277 for dirpath, dirs, files in os.walk(self.join(path), onerror=onerror):
277 for dirpath, dirs, files in os.walk(self.join(path), onerror=onerror):
278 yield (dirpath[prefixlen:], dirs, files)
278 yield (dirpath[prefixlen:], dirs, files)
279
279
280 @contextlib.contextmanager
280 @contextlib.contextmanager
281 def backgroundclosing(self, ui, expectedcount=-1):
281 def backgroundclosing(self, ui, expectedcount=-1):
282 """Allow files to be closed asynchronously.
282 """Allow files to be closed asynchronously.
283
283
284 When this context manager is active, ``backgroundclose`` can be passed
284 When this context manager is active, ``backgroundclose`` can be passed
285 to ``__call__``/``open`` to result in the file possibly being closed
285 to ``__call__``/``open`` to result in the file possibly being closed
286 asynchronously, on a background thread.
286 asynchronously, on a background thread.
287 """
287 """
288 # Sharing backgroundfilecloser between threads is complex and using
288 # Sharing backgroundfilecloser between threads is complex and using
289 # multiple instances puts us at risk of running out of file descriptors
289 # multiple instances puts us at risk of running out of file descriptors
290 # only allow to use backgroundfilecloser when in main thread.
290 # only allow to use backgroundfilecloser when in main thread.
291 if not isinstance(threading.currentThread(), threading._MainThread):
291 if not isinstance(threading.currentThread(), threading._MainThread):
292 yield
292 yield
293 return
293 return
294 vfs = getattr(self, 'vfs', self)
294 vfs = getattr(self, 'vfs', self)
295 if getattr(vfs, '_backgroundfilecloser', None):
295 if getattr(vfs, '_backgroundfilecloser', None):
296 raise error.Abort(
296 raise error.Abort(
297 _('can only have 1 active background file closer'))
297 _('can only have 1 active background file closer'))
298
298
299 with backgroundfilecloser(ui, expectedcount=expectedcount) as bfc:
299 with backgroundfilecloser(ui, expectedcount=expectedcount) as bfc:
300 try:
300 try:
301 vfs._backgroundfilecloser = bfc
301 vfs._backgroundfilecloser = bfc
302 yield bfc
302 yield bfc
303 finally:
303 finally:
304 vfs._backgroundfilecloser = None
304 vfs._backgroundfilecloser = None
305
305
306 class vfs(abstractvfs):
306 class vfs(abstractvfs):
307 '''Operate files relative to a base directory
307 '''Operate files relative to a base directory
308
308
309 This class is used to hide the details of COW semantics and
309 This class is used to hide the details of COW semantics and
310 remote file access from higher level code.
310 remote file access from higher level code.
311
311
312 'cacheaudited' should be enabled only if (a) vfs object is short-lived, or
312 'cacheaudited' should be enabled only if (a) vfs object is short-lived, or
313 (b) the base directory is managed by hg and considered sort-of append-only.
313 (b) the base directory is managed by hg and considered sort-of append-only.
314 See pathutil.pathauditor() for details.
314 See pathutil.pathauditor() for details.
315 '''
315 '''
316 def __init__(self, base, audit=True, cacheaudited=False, expandpath=False,
316 def __init__(self, base, audit=True, cacheaudited=False, expandpath=False,
317 realpath=False):
317 realpath=False):
318 if expandpath:
318 if expandpath:
319 base = util.expandpath(base)
319 base = util.expandpath(base)
320 if realpath:
320 if realpath:
321 base = os.path.realpath(base)
321 base = os.path.realpath(base)
322 self.base = base
322 self.base = base
323 self._audit = audit
323 self._audit = audit
324 if audit:
324 if audit:
325 self.audit = pathutil.pathauditor(self.base, cached=cacheaudited)
325 self.audit = pathutil.pathauditor(self.base, cached=cacheaudited)
326 else:
326 else:
327 self.audit = (lambda path, mode=None: True)
327 self.audit = (lambda path, mode=None: True)
328 self.createmode = None
328 self.createmode = None
329 self._trustnlink = None
329 self._trustnlink = None
330 self.options = {}
330
331
331 @util.propertycache
332 @util.propertycache
332 def _cansymlink(self):
333 def _cansymlink(self):
333 return util.checklink(self.base)
334 return util.checklink(self.base)
334
335
335 @util.propertycache
336 @util.propertycache
336 def _chmod(self):
337 def _chmod(self):
337 return util.checkexec(self.base)
338 return util.checkexec(self.base)
338
339
339 def _fixfilemode(self, name):
340 def _fixfilemode(self, name):
340 if self.createmode is None or not self._chmod:
341 if self.createmode is None or not self._chmod:
341 return
342 return
342 os.chmod(name, self.createmode & 0o666)
343 os.chmod(name, self.createmode & 0o666)
343
344
344 def _auditpath(self, path, mode):
345 def _auditpath(self, path, mode):
345 if self._audit:
346 if self._audit:
346 if os.path.isabs(path) and path.startswith(self.base):
347 if os.path.isabs(path) and path.startswith(self.base):
347 path = os.path.relpath(path, self.base)
348 path = os.path.relpath(path, self.base)
348 r = util.checkosfilename(path)
349 r = util.checkosfilename(path)
349 if r:
350 if r:
350 raise error.Abort("%s: %r" % (r, path))
351 raise error.Abort("%s: %r" % (r, path))
351 self.audit(path, mode=mode)
352 self.audit(path, mode=mode)
352
353
353 def __call__(self, path, mode="r", atomictemp=False, notindexed=False,
354 def __call__(self, path, mode="r", atomictemp=False, notindexed=False,
354 backgroundclose=False, checkambig=False, auditpath=True,
355 backgroundclose=False, checkambig=False, auditpath=True,
355 makeparentdirs=True):
356 makeparentdirs=True):
356 '''Open ``path`` file, which is relative to vfs root.
357 '''Open ``path`` file, which is relative to vfs root.
357
358
358 By default, parent directories are created as needed. Newly created
359 By default, parent directories are created as needed. Newly created
359 directories are marked as "not to be indexed by the content indexing
360 directories are marked as "not to be indexed by the content indexing
360 service", if ``notindexed`` is specified for "write" mode access.
361 service", if ``notindexed`` is specified for "write" mode access.
361 Set ``makeparentdirs=False`` to not create directories implicitly.
362 Set ``makeparentdirs=False`` to not create directories implicitly.
362
363
363 If ``backgroundclose`` is passed, the file may be closed asynchronously.
364 If ``backgroundclose`` is passed, the file may be closed asynchronously.
364 It can only be used if the ``self.backgroundclosing()`` context manager
365 It can only be used if the ``self.backgroundclosing()`` context manager
365 is active. This should only be specified if the following criteria hold:
366 is active. This should only be specified if the following criteria hold:
366
367
367 1. There is a potential for writing thousands of files. Unless you
368 1. There is a potential for writing thousands of files. Unless you
368 are writing thousands of files, the performance benefits of
369 are writing thousands of files, the performance benefits of
369 asynchronously closing files is not realized.
370 asynchronously closing files is not realized.
370 2. Files are opened exactly once for the ``backgroundclosing``
371 2. Files are opened exactly once for the ``backgroundclosing``
371 active duration and are therefore free of race conditions between
372 active duration and are therefore free of race conditions between
372 closing a file on a background thread and reopening it. (If the
373 closing a file on a background thread and reopening it. (If the
373 file were opened multiple times, there could be unflushed data
374 file were opened multiple times, there could be unflushed data
374 because the original file handle hasn't been flushed/closed yet.)
375 because the original file handle hasn't been flushed/closed yet.)
375
376
376 ``checkambig`` argument is passed to atomictemplfile (valid
377 ``checkambig`` argument is passed to atomictemplfile (valid
377 only for writing), and is useful only if target file is
378 only for writing), and is useful only if target file is
378 guarded by any lock (e.g. repo.lock or repo.wlock).
379 guarded by any lock (e.g. repo.lock or repo.wlock).
379
380
380 To avoid file stat ambiguity forcibly, checkambig=True involves
381 To avoid file stat ambiguity forcibly, checkambig=True involves
381 copying ``path`` file opened in "append" mode (e.g. for
382 copying ``path`` file opened in "append" mode (e.g. for
382 truncation), if it is owned by another. Therefore, use
383 truncation), if it is owned by another. Therefore, use
383 combination of append mode and checkambig=True only in limited
384 combination of append mode and checkambig=True only in limited
384 cases (see also issue5418 and issue5584 for detail).
385 cases (see also issue5418 and issue5584 for detail).
385 '''
386 '''
386 if auditpath:
387 if auditpath:
387 self._auditpath(path, mode)
388 self._auditpath(path, mode)
388 f = self.join(path)
389 f = self.join(path)
389
390
390 if "b" not in mode:
391 if "b" not in mode:
391 mode += "b" # for that other OS
392 mode += "b" # for that other OS
392
393
393 nlink = -1
394 nlink = -1
394 if mode not in ('r', 'rb'):
395 if mode not in ('r', 'rb'):
395 dirname, basename = util.split(f)
396 dirname, basename = util.split(f)
396 # If basename is empty, then the path is malformed because it points
397 # If basename is empty, then the path is malformed because it points
397 # to a directory. Let the posixfile() call below raise IOError.
398 # to a directory. Let the posixfile() call below raise IOError.
398 if basename:
399 if basename:
399 if atomictemp:
400 if atomictemp:
400 if makeparentdirs:
401 if makeparentdirs:
401 util.makedirs(dirname, self.createmode, notindexed)
402 util.makedirs(dirname, self.createmode, notindexed)
402 return util.atomictempfile(f, mode, self.createmode,
403 return util.atomictempfile(f, mode, self.createmode,
403 checkambig=checkambig)
404 checkambig=checkambig)
404 try:
405 try:
405 if 'w' in mode:
406 if 'w' in mode:
406 util.unlink(f)
407 util.unlink(f)
407 nlink = 0
408 nlink = 0
408 else:
409 else:
409 # nlinks() may behave differently for files on Windows
410 # nlinks() may behave differently for files on Windows
410 # shares if the file is open.
411 # shares if the file is open.
411 with util.posixfile(f):
412 with util.posixfile(f):
412 nlink = util.nlinks(f)
413 nlink = util.nlinks(f)
413 if nlink < 1:
414 if nlink < 1:
414 nlink = 2 # force mktempcopy (issue1922)
415 nlink = 2 # force mktempcopy (issue1922)
415 except (OSError, IOError) as e:
416 except (OSError, IOError) as e:
416 if e.errno != errno.ENOENT:
417 if e.errno != errno.ENOENT:
417 raise
418 raise
418 nlink = 0
419 nlink = 0
419 if makeparentdirs:
420 if makeparentdirs:
420 util.makedirs(dirname, self.createmode, notindexed)
421 util.makedirs(dirname, self.createmode, notindexed)
421 if nlink > 0:
422 if nlink > 0:
422 if self._trustnlink is None:
423 if self._trustnlink is None:
423 self._trustnlink = nlink > 1 or util.checknlink(f)
424 self._trustnlink = nlink > 1 or util.checknlink(f)
424 if nlink > 1 or not self._trustnlink:
425 if nlink > 1 or not self._trustnlink:
425 util.rename(util.mktempcopy(f), f)
426 util.rename(util.mktempcopy(f), f)
426 fp = util.posixfile(f, mode)
427 fp = util.posixfile(f, mode)
427 if nlink == 0:
428 if nlink == 0:
428 self._fixfilemode(f)
429 self._fixfilemode(f)
429
430
430 if checkambig:
431 if checkambig:
431 if mode in ('r', 'rb'):
432 if mode in ('r', 'rb'):
432 raise error.Abort(_('implementation error: mode %s is not'
433 raise error.Abort(_('implementation error: mode %s is not'
433 ' valid for checkambig=True') % mode)
434 ' valid for checkambig=True') % mode)
434 fp = checkambigatclosing(fp)
435 fp = checkambigatclosing(fp)
435
436
436 if (backgroundclose and
437 if (backgroundclose and
437 isinstance(threading.currentThread(), threading._MainThread)):
438 isinstance(threading.currentThread(), threading._MainThread)):
438 if not self._backgroundfilecloser:
439 if not self._backgroundfilecloser:
439 raise error.Abort(_('backgroundclose can only be used when a '
440 raise error.Abort(_('backgroundclose can only be used when a '
440 'backgroundclosing context manager is active')
441 'backgroundclosing context manager is active')
441 )
442 )
442
443
443 fp = delayclosedfile(fp, self._backgroundfilecloser)
444 fp = delayclosedfile(fp, self._backgroundfilecloser)
444
445
445 return fp
446 return fp
446
447
447 def symlink(self, src, dst):
448 def symlink(self, src, dst):
448 self.audit(dst)
449 self.audit(dst)
449 linkname = self.join(dst)
450 linkname = self.join(dst)
450 util.tryunlink(linkname)
451 util.tryunlink(linkname)
451
452
452 util.makedirs(os.path.dirname(linkname), self.createmode)
453 util.makedirs(os.path.dirname(linkname), self.createmode)
453
454
454 if self._cansymlink:
455 if self._cansymlink:
455 try:
456 try:
456 os.symlink(src, linkname)
457 os.symlink(src, linkname)
457 except OSError as err:
458 except OSError as err:
458 raise OSError(err.errno, _('could not symlink to %r: %s') %
459 raise OSError(err.errno, _('could not symlink to %r: %s') %
459 (src, encoding.strtolocal(err.strerror)),
460 (src, encoding.strtolocal(err.strerror)),
460 linkname)
461 linkname)
461 else:
462 else:
462 self.write(dst, src)
463 self.write(dst, src)
463
464
464 def join(self, path, *insidef):
465 def join(self, path, *insidef):
465 if path:
466 if path:
466 return os.path.join(self.base, path, *insidef)
467 return os.path.join(self.base, path, *insidef)
467 else:
468 else:
468 return self.base
469 return self.base
469
470
470 opener = vfs
471 opener = vfs
471
472
472 class proxyvfs(abstractvfs):
473 class proxyvfs(abstractvfs):
473 def __init__(self, vfs):
474 def __init__(self, vfs):
474 self.vfs = vfs
475 self.vfs = vfs
475
476
476 def _auditpath(self, path, mode):
477 def _auditpath(self, path, mode):
477 return self.vfs._auditpath(path, mode)
478 return self.vfs._auditpath(path, mode)
478
479
479 @property
480 @property
480 def options(self):
481 def options(self):
481 return self.vfs.options
482 return self.vfs.options
482
483
483 @options.setter
484 @options.setter
484 def options(self, value):
485 def options(self, value):
485 self.vfs.options = value
486 self.vfs.options = value
486
487
487 class filtervfs(proxyvfs, abstractvfs):
488 class filtervfs(proxyvfs, abstractvfs):
488 '''Wrapper vfs for filtering filenames with a function.'''
489 '''Wrapper vfs for filtering filenames with a function.'''
489
490
490 def __init__(self, vfs, filter):
491 def __init__(self, vfs, filter):
491 proxyvfs.__init__(self, vfs)
492 proxyvfs.__init__(self, vfs)
492 self._filter = filter
493 self._filter = filter
493
494
494 def __call__(self, path, *args, **kwargs):
495 def __call__(self, path, *args, **kwargs):
495 return self.vfs(self._filter(path), *args, **kwargs)
496 return self.vfs(self._filter(path), *args, **kwargs)
496
497
497 def join(self, path, *insidef):
498 def join(self, path, *insidef):
498 if path:
499 if path:
499 return self.vfs.join(self._filter(self.vfs.reljoin(path, *insidef)))
500 return self.vfs.join(self._filter(self.vfs.reljoin(path, *insidef)))
500 else:
501 else:
501 return self.vfs.join(path)
502 return self.vfs.join(path)
502
503
503 filteropener = filtervfs
504 filteropener = filtervfs
504
505
505 class readonlyvfs(proxyvfs):
506 class readonlyvfs(proxyvfs):
506 '''Wrapper vfs preventing any writing.'''
507 '''Wrapper vfs preventing any writing.'''
507
508
508 def __init__(self, vfs):
509 def __init__(self, vfs):
509 proxyvfs.__init__(self, vfs)
510 proxyvfs.__init__(self, vfs)
510
511
511 def __call__(self, path, mode='r', *args, **kw):
512 def __call__(self, path, mode='r', *args, **kw):
512 if mode not in ('r', 'rb'):
513 if mode not in ('r', 'rb'):
513 raise error.Abort(_('this vfs is read only'))
514 raise error.Abort(_('this vfs is read only'))
514 return self.vfs(path, mode, *args, **kw)
515 return self.vfs(path, mode, *args, **kw)
515
516
516 def join(self, path, *insidef):
517 def join(self, path, *insidef):
517 return self.vfs.join(path, *insidef)
518 return self.vfs.join(path, *insidef)
518
519
519 class closewrapbase(object):
520 class closewrapbase(object):
520 """Base class of wrapper, which hooks closing
521 """Base class of wrapper, which hooks closing
521
522
522 Do not instantiate outside of the vfs layer.
523 Do not instantiate outside of the vfs layer.
523 """
524 """
524 def __init__(self, fh):
525 def __init__(self, fh):
525 object.__setattr__(self, r'_origfh', fh)
526 object.__setattr__(self, r'_origfh', fh)
526
527
527 def __getattr__(self, attr):
528 def __getattr__(self, attr):
528 return getattr(self._origfh, attr)
529 return getattr(self._origfh, attr)
529
530
530 def __setattr__(self, attr, value):
531 def __setattr__(self, attr, value):
531 return setattr(self._origfh, attr, value)
532 return setattr(self._origfh, attr, value)
532
533
533 def __delattr__(self, attr):
534 def __delattr__(self, attr):
534 return delattr(self._origfh, attr)
535 return delattr(self._origfh, attr)
535
536
536 def __enter__(self):
537 def __enter__(self):
537 self._origfh.__enter__()
538 self._origfh.__enter__()
538 return self
539 return self
539
540
540 def __exit__(self, exc_type, exc_value, exc_tb):
541 def __exit__(self, exc_type, exc_value, exc_tb):
541 raise NotImplementedError('attempted instantiating ' + str(type(self)))
542 raise NotImplementedError('attempted instantiating ' + str(type(self)))
542
543
543 def close(self):
544 def close(self):
544 raise NotImplementedError('attempted instantiating ' + str(type(self)))
545 raise NotImplementedError('attempted instantiating ' + str(type(self)))
545
546
546 class delayclosedfile(closewrapbase):
547 class delayclosedfile(closewrapbase):
547 """Proxy for a file object whose close is delayed.
548 """Proxy for a file object whose close is delayed.
548
549
549 Do not instantiate outside of the vfs layer.
550 Do not instantiate outside of the vfs layer.
550 """
551 """
551 def __init__(self, fh, closer):
552 def __init__(self, fh, closer):
552 super(delayclosedfile, self).__init__(fh)
553 super(delayclosedfile, self).__init__(fh)
553 object.__setattr__(self, r'_closer', closer)
554 object.__setattr__(self, r'_closer', closer)
554
555
555 def __exit__(self, exc_type, exc_value, exc_tb):
556 def __exit__(self, exc_type, exc_value, exc_tb):
556 self._closer.close(self._origfh)
557 self._closer.close(self._origfh)
557
558
558 def close(self):
559 def close(self):
559 self._closer.close(self._origfh)
560 self._closer.close(self._origfh)
560
561
561 class backgroundfilecloser(object):
562 class backgroundfilecloser(object):
562 """Coordinates background closing of file handles on multiple threads."""
563 """Coordinates background closing of file handles on multiple threads."""
563 def __init__(self, ui, expectedcount=-1):
564 def __init__(self, ui, expectedcount=-1):
564 self._running = False
565 self._running = False
565 self._entered = False
566 self._entered = False
566 self._threads = []
567 self._threads = []
567 self._threadexception = None
568 self._threadexception = None
568
569
569 # Only Windows/NTFS has slow file closing. So only enable by default
570 # Only Windows/NTFS has slow file closing. So only enable by default
570 # on that platform. But allow to be enabled elsewhere for testing.
571 # on that platform. But allow to be enabled elsewhere for testing.
571 defaultenabled = pycompat.iswindows
572 defaultenabled = pycompat.iswindows
572 enabled = ui.configbool('worker', 'backgroundclose', defaultenabled)
573 enabled = ui.configbool('worker', 'backgroundclose', defaultenabled)
573
574
574 if not enabled:
575 if not enabled:
575 return
576 return
576
577
577 # There is overhead to starting and stopping the background threads.
578 # There is overhead to starting and stopping the background threads.
578 # Don't do background processing unless the file count is large enough
579 # Don't do background processing unless the file count is large enough
579 # to justify it.
580 # to justify it.
580 minfilecount = ui.configint('worker', 'backgroundcloseminfilecount')
581 minfilecount = ui.configint('worker', 'backgroundcloseminfilecount')
581 # FUTURE dynamically start background threads after minfilecount closes.
582 # FUTURE dynamically start background threads after minfilecount closes.
582 # (We don't currently have any callers that don't know their file count)
583 # (We don't currently have any callers that don't know their file count)
583 if expectedcount > 0 and expectedcount < minfilecount:
584 if expectedcount > 0 and expectedcount < minfilecount:
584 return
585 return
585
586
586 maxqueue = ui.configint('worker', 'backgroundclosemaxqueue')
587 maxqueue = ui.configint('worker', 'backgroundclosemaxqueue')
587 threadcount = ui.configint('worker', 'backgroundclosethreadcount')
588 threadcount = ui.configint('worker', 'backgroundclosethreadcount')
588
589
589 ui.debug('starting %d threads for background file closing\n' %
590 ui.debug('starting %d threads for background file closing\n' %
590 threadcount)
591 threadcount)
591
592
592 self._queue = pycompat.queue.Queue(maxsize=maxqueue)
593 self._queue = pycompat.queue.Queue(maxsize=maxqueue)
593 self._running = True
594 self._running = True
594
595
595 for i in range(threadcount):
596 for i in range(threadcount):
596 t = threading.Thread(target=self._worker, name='backgroundcloser')
597 t = threading.Thread(target=self._worker, name='backgroundcloser')
597 self._threads.append(t)
598 self._threads.append(t)
598 t.start()
599 t.start()
599
600
600 def __enter__(self):
601 def __enter__(self):
601 self._entered = True
602 self._entered = True
602 return self
603 return self
603
604
604 def __exit__(self, exc_type, exc_value, exc_tb):
605 def __exit__(self, exc_type, exc_value, exc_tb):
605 self._running = False
606 self._running = False
606
607
607 # Wait for threads to finish closing so open files don't linger for
608 # Wait for threads to finish closing so open files don't linger for
608 # longer than lifetime of context manager.
609 # longer than lifetime of context manager.
609 for t in self._threads:
610 for t in self._threads:
610 t.join()
611 t.join()
611
612
612 def _worker(self):
613 def _worker(self):
613 """Main routine for worker thread."""
614 """Main routine for worker thread."""
614 while True:
615 while True:
615 try:
616 try:
616 fh = self._queue.get(block=True, timeout=0.100)
617 fh = self._queue.get(block=True, timeout=0.100)
617 # Need to catch or the thread will terminate and
618 # Need to catch or the thread will terminate and
618 # we could orphan file descriptors.
619 # we could orphan file descriptors.
619 try:
620 try:
620 fh.close()
621 fh.close()
621 except Exception as e:
622 except Exception as e:
622 # Stash so can re-raise from main thread later.
623 # Stash so can re-raise from main thread later.
623 self._threadexception = e
624 self._threadexception = e
624 except pycompat.queue.Empty:
625 except pycompat.queue.Empty:
625 if not self._running:
626 if not self._running:
626 break
627 break
627
628
628 def close(self, fh):
629 def close(self, fh):
629 """Schedule a file for closing."""
630 """Schedule a file for closing."""
630 if not self._entered:
631 if not self._entered:
631 raise error.Abort(_('can only call close() when context manager '
632 raise error.Abort(_('can only call close() when context manager '
632 'active'))
633 'active'))
633
634
634 # If a background thread encountered an exception, raise now so we fail
635 # If a background thread encountered an exception, raise now so we fail
635 # fast. Otherwise we may potentially go on for minutes until the error
636 # fast. Otherwise we may potentially go on for minutes until the error
636 # is acted on.
637 # is acted on.
637 if self._threadexception:
638 if self._threadexception:
638 e = self._threadexception
639 e = self._threadexception
639 self._threadexception = None
640 self._threadexception = None
640 raise e
641 raise e
641
642
642 # If we're not actively running, close synchronously.
643 # If we're not actively running, close synchronously.
643 if not self._running:
644 if not self._running:
644 fh.close()
645 fh.close()
645 return
646 return
646
647
647 self._queue.put(fh, block=True, timeout=None)
648 self._queue.put(fh, block=True, timeout=None)
648
649
649 class checkambigatclosing(closewrapbase):
650 class checkambigatclosing(closewrapbase):
650 """Proxy for a file object, to avoid ambiguity of file stat
651 """Proxy for a file object, to avoid ambiguity of file stat
651
652
652 See also util.filestat for detail about "ambiguity of file stat".
653 See also util.filestat for detail about "ambiguity of file stat".
653
654
654 This proxy is useful only if the target file is guarded by any
655 This proxy is useful only if the target file is guarded by any
655 lock (e.g. repo.lock or repo.wlock)
656 lock (e.g. repo.lock or repo.wlock)
656
657
657 Do not instantiate outside of the vfs layer.
658 Do not instantiate outside of the vfs layer.
658 """
659 """
659 def __init__(self, fh):
660 def __init__(self, fh):
660 super(checkambigatclosing, self).__init__(fh)
661 super(checkambigatclosing, self).__init__(fh)
661 object.__setattr__(self, r'_oldstat', util.filestat.frompath(fh.name))
662 object.__setattr__(self, r'_oldstat', util.filestat.frompath(fh.name))
662
663
663 def _checkambig(self):
664 def _checkambig(self):
664 oldstat = self._oldstat
665 oldstat = self._oldstat
665 if oldstat.stat:
666 if oldstat.stat:
666 _avoidambig(self._origfh.name, oldstat)
667 _avoidambig(self._origfh.name, oldstat)
667
668
668 def __exit__(self, exc_type, exc_value, exc_tb):
669 def __exit__(self, exc_type, exc_value, exc_tb):
669 self._origfh.__exit__(exc_type, exc_value, exc_tb)
670 self._origfh.__exit__(exc_type, exc_value, exc_tb)
670 self._checkambig()
671 self._checkambig()
671
672
672 def close(self):
673 def close(self):
673 self._origfh.close()
674 self._origfh.close()
674 self._checkambig()
675 self._checkambig()
@@ -1,222 +1,223 b''
1 revlog.parseindex must be able to parse the index file even if
1 revlog.parseindex must be able to parse the index file even if
2 an index entry is split between two 64k blocks. The ideal test
2 an index entry is split between two 64k blocks. The ideal test
3 would be to create an index file with inline data where
3 would be to create an index file with inline data where
4 64k < size < 64k + 64 (64k is the size of the read buffer, 64 is
4 64k < size < 64k + 64 (64k is the size of the read buffer, 64 is
5 the size of an index entry) and with an index entry starting right
5 the size of an index entry) and with an index entry starting right
6 before the 64k block boundary, and try to read it.
6 before the 64k block boundary, and try to read it.
7 We approximate that by reducing the read buffer to 1 byte.
7 We approximate that by reducing the read buffer to 1 byte.
8
8
9 $ hg init a
9 $ hg init a
10 $ cd a
10 $ cd a
11 $ echo abc > foo
11 $ echo abc > foo
12 $ hg add foo
12 $ hg add foo
13 $ hg commit -m 'add foo'
13 $ hg commit -m 'add foo'
14 $ echo >> foo
14 $ echo >> foo
15 $ hg commit -m 'change foo'
15 $ hg commit -m 'change foo'
16 $ hg log -r 0:
16 $ hg log -r 0:
17 changeset: 0:7c31755bf9b5
17 changeset: 0:7c31755bf9b5
18 user: test
18 user: test
19 date: Thu Jan 01 00:00:00 1970 +0000
19 date: Thu Jan 01 00:00:00 1970 +0000
20 summary: add foo
20 summary: add foo
21
21
22 changeset: 1:26333235a41c
22 changeset: 1:26333235a41c
23 tag: tip
23 tag: tip
24 user: test
24 user: test
25 date: Thu Jan 01 00:00:00 1970 +0000
25 date: Thu Jan 01 00:00:00 1970 +0000
26 summary: change foo
26 summary: change foo
27
27
28 $ cat >> test.py << EOF
28 $ cat >> test.py << EOF
29 > from __future__ import print_function
29 > from __future__ import print_function
30 > from mercurial import changelog, node, pycompat, vfs
30 > from mercurial import changelog, node, pycompat, vfs
31 >
31 >
32 > class singlebyteread(object):
32 > class singlebyteread(object):
33 > def __init__(self, real):
33 > def __init__(self, real):
34 > self.real = real
34 > self.real = real
35 >
35 >
36 > def read(self, size=-1):
36 > def read(self, size=-1):
37 > if size == 65536:
37 > if size == 65536:
38 > size = 1
38 > size = 1
39 > return self.real.read(size)
39 > return self.real.read(size)
40 >
40 >
41 > def __getattr__(self, key):
41 > def __getattr__(self, key):
42 > return getattr(self.real, key)
42 > return getattr(self.real, key)
43 >
43 >
44 > def __enter__(self):
44 > def __enter__(self):
45 > self.real.__enter__()
45 > self.real.__enter__()
46 > return self
46 > return self
47 >
47 >
48 > def __exit__(self, *args, **kwargs):
48 > def __exit__(self, *args, **kwargs):
49 > return self.real.__exit__(*args, **kwargs)
49 > return self.real.__exit__(*args, **kwargs)
50 >
50 >
51 > def opener(*args):
51 > def opener(*args):
52 > o = vfs.vfs(*args)
52 > o = vfs.vfs(*args)
53 > def wrapper(*a, **kwargs):
53 > def wrapper(*a, **kwargs):
54 > f = o(*a, **kwargs)
54 > f = o(*a, **kwargs)
55 > return singlebyteread(f)
55 > return singlebyteread(f)
56 > wrapper.options = o.options
56 > return wrapper
57 > return wrapper
57 >
58 >
58 > cl = changelog.changelog(opener(b'.hg/store'))
59 > cl = changelog.changelog(opener(b'.hg/store'))
59 > print(len(cl), 'revisions:')
60 > print(len(cl), 'revisions:')
60 > for r in cl:
61 > for r in cl:
61 > print(pycompat.sysstr(node.short(cl.node(r))))
62 > print(pycompat.sysstr(node.short(cl.node(r))))
62 > EOF
63 > EOF
63 $ "$PYTHON" test.py
64 $ "$PYTHON" test.py
64 2 revisions:
65 2 revisions:
65 7c31755bf9b5
66 7c31755bf9b5
66 26333235a41c
67 26333235a41c
67
68
68 $ cd ..
69 $ cd ..
69
70
70 #if no-pure
71 #if no-pure
71
72
72 Test SEGV caused by bad revision passed to reachableroots() (issue4775):
73 Test SEGV caused by bad revision passed to reachableroots() (issue4775):
73
74
74 $ cd a
75 $ cd a
75
76
76 $ "$PYTHON" <<EOF
77 $ "$PYTHON" <<EOF
77 > from __future__ import print_function
78 > from __future__ import print_function
78 > from mercurial import changelog, vfs
79 > from mercurial import changelog, vfs
79 > cl = changelog.changelog(vfs.vfs(b'.hg/store'))
80 > cl = changelog.changelog(vfs.vfs(b'.hg/store'))
80 > print('good heads:')
81 > print('good heads:')
81 > for head in [0, len(cl) - 1, -1]:
82 > for head in [0, len(cl) - 1, -1]:
82 > print('%s: %r' % (head, cl.reachableroots(0, [head], [0])))
83 > print('%s: %r' % (head, cl.reachableroots(0, [head], [0])))
83 > print('bad heads:')
84 > print('bad heads:')
84 > for head in [len(cl), 10000, -2, -10000, None]:
85 > for head in [len(cl), 10000, -2, -10000, None]:
85 > print('%s:' % head, end=' ')
86 > print('%s:' % head, end=' ')
86 > try:
87 > try:
87 > cl.reachableroots(0, [head], [0])
88 > cl.reachableroots(0, [head], [0])
88 > print('uncaught buffer overflow?')
89 > print('uncaught buffer overflow?')
89 > except (IndexError, TypeError) as inst:
90 > except (IndexError, TypeError) as inst:
90 > print(inst)
91 > print(inst)
91 > print('good roots:')
92 > print('good roots:')
92 > for root in [0, len(cl) - 1, -1]:
93 > for root in [0, len(cl) - 1, -1]:
93 > print('%s: %r' % (root, cl.reachableroots(root, [len(cl) - 1], [root])))
94 > print('%s: %r' % (root, cl.reachableroots(root, [len(cl) - 1], [root])))
94 > print('out-of-range roots are ignored:')
95 > print('out-of-range roots are ignored:')
95 > for root in [len(cl), 10000, -2, -10000]:
96 > for root in [len(cl), 10000, -2, -10000]:
96 > print('%s: %r' % (root, cl.reachableroots(root, [len(cl) - 1], [root])))
97 > print('%s: %r' % (root, cl.reachableroots(root, [len(cl) - 1], [root])))
97 > print('bad roots:')
98 > print('bad roots:')
98 > for root in [None]:
99 > for root in [None]:
99 > print('%s:' % root, end=' ')
100 > print('%s:' % root, end=' ')
100 > try:
101 > try:
101 > cl.reachableroots(root, [len(cl) - 1], [root])
102 > cl.reachableroots(root, [len(cl) - 1], [root])
102 > print('uncaught error?')
103 > print('uncaught error?')
103 > except TypeError as inst:
104 > except TypeError as inst:
104 > print(inst)
105 > print(inst)
105 > EOF
106 > EOF
106 good heads:
107 good heads:
107 0: [0]
108 0: [0]
108 1: [0]
109 1: [0]
109 -1: []
110 -1: []
110 bad heads:
111 bad heads:
111 2: head out of range
112 2: head out of range
112 10000: head out of range
113 10000: head out of range
113 -2: head out of range
114 -2: head out of range
114 -10000: head out of range
115 -10000: head out of range
115 None: an integer is required( .got type NoneType.)? (re)
116 None: an integer is required( .got type NoneType.)? (re)
116 good roots:
117 good roots:
117 0: [0]
118 0: [0]
118 1: [1]
119 1: [1]
119 -1: [-1]
120 -1: [-1]
120 out-of-range roots are ignored:
121 out-of-range roots are ignored:
121 2: []
122 2: []
122 10000: []
123 10000: []
123 -2: []
124 -2: []
124 -10000: []
125 -10000: []
125 bad roots:
126 bad roots:
126 None: an integer is required( .got type NoneType.)? (re)
127 None: an integer is required( .got type NoneType.)? (re)
127
128
128 $ cd ..
129 $ cd ..
129
130
130 Test corrupted p1/p2 fields that could cause SEGV at parsers.c:
131 Test corrupted p1/p2 fields that could cause SEGV at parsers.c:
131
132
132 $ mkdir invalidparent
133 $ mkdir invalidparent
133 $ cd invalidparent
134 $ cd invalidparent
134
135
135 $ hg clone --pull -q --config phases.publish=False ../a limit --config format.sparse-revlog=no
136 $ hg clone --pull -q --config phases.publish=False ../a limit --config format.sparse-revlog=no
136 $ hg clone --pull -q --config phases.publish=False ../a neglimit --config format.sparse-revlog=no
137 $ hg clone --pull -q --config phases.publish=False ../a neglimit --config format.sparse-revlog=no
137 $ hg clone --pull -q --config phases.publish=False ../a segv --config format.sparse-revlog=no
138 $ hg clone --pull -q --config phases.publish=False ../a segv --config format.sparse-revlog=no
138 $ rm -R limit/.hg/cache neglimit/.hg/cache segv/.hg/cache
139 $ rm -R limit/.hg/cache neglimit/.hg/cache segv/.hg/cache
139
140
140 $ "$PYTHON" <<EOF
141 $ "$PYTHON" <<EOF
141 > data = open("limit/.hg/store/00changelog.i", "rb").read()
142 > data = open("limit/.hg/store/00changelog.i", "rb").read()
142 > poisons = [
143 > poisons = [
143 > (b'limit', b'\0\0\0\x02'),
144 > (b'limit', b'\0\0\0\x02'),
144 > (b'neglimit', b'\xff\xff\xff\xfe'),
145 > (b'neglimit', b'\xff\xff\xff\xfe'),
145 > (b'segv', b'\0\x01\0\0'),
146 > (b'segv', b'\0\x01\0\0'),
146 > ]
147 > ]
147 > for n, p in poisons:
148 > for n, p in poisons:
148 > # corrupt p1 at rev0 and p2 at rev1
149 > # corrupt p1 at rev0 and p2 at rev1
149 > d = data[:24] + p + data[28:127 + 28] + p + data[127 + 32:]
150 > d = data[:24] + p + data[28:127 + 28] + p + data[127 + 32:]
150 > open(n + b"/.hg/store/00changelog.i", "wb").write(d)
151 > open(n + b"/.hg/store/00changelog.i", "wb").write(d)
151 > EOF
152 > EOF
152
153
153 $ hg -R limit debugrevlogindex -f1 -c
154 $ hg -R limit debugrevlogindex -f1 -c
154 rev flag size link p1 p2 nodeid
155 rev flag size link p1 p2 nodeid
155 0 0000 62 0 2 -1 7c31755bf9b5
156 0 0000 62 0 2 -1 7c31755bf9b5
156 1 0000 65 1 0 2 26333235a41c
157 1 0000 65 1 0 2 26333235a41c
157
158
158 $ hg -R limit debugdeltachain -c
159 $ hg -R limit debugdeltachain -c
159 rev chain# chainlen prev delta size rawsize chainsize ratio lindist extradist extraratio
160 rev chain# chainlen prev delta size rawsize chainsize ratio lindist extradist extraratio
160 0 1 1 -1 base 63 62 63 1.01613 63 0 0.00000
161 0 1 1 -1 base 63 62 63 1.01613 63 0 0.00000
161 1 2 1 -1 base 66 65 66 1.01538 66 0 0.00000
162 1 2 1 -1 base 66 65 66 1.01538 66 0 0.00000
162
163
163 $ hg -R neglimit debugrevlogindex -f1 -c
164 $ hg -R neglimit debugrevlogindex -f1 -c
164 rev flag size link p1 p2 nodeid
165 rev flag size link p1 p2 nodeid
165 0 0000 62 0 -2 -1 7c31755bf9b5
166 0 0000 62 0 -2 -1 7c31755bf9b5
166 1 0000 65 1 0 -2 26333235a41c
167 1 0000 65 1 0 -2 26333235a41c
167
168
168 $ hg -R segv debugrevlogindex -f1 -c
169 $ hg -R segv debugrevlogindex -f1 -c
169 rev flag size link p1 p2 nodeid
170 rev flag size link p1 p2 nodeid
170 0 0000 62 0 65536 -1 7c31755bf9b5
171 0 0000 62 0 65536 -1 7c31755bf9b5
171 1 0000 65 1 0 65536 26333235a41c
172 1 0000 65 1 0 65536 26333235a41c
172
173
173 $ hg -R segv debugdeltachain -c
174 $ hg -R segv debugdeltachain -c
174 rev chain# chainlen prev delta size rawsize chainsize ratio lindist extradist extraratio
175 rev chain# chainlen prev delta size rawsize chainsize ratio lindist extradist extraratio
175 0 1 1 -1 base 63 62 63 1.01613 63 0 0.00000
176 0 1 1 -1 base 63 62 63 1.01613 63 0 0.00000
176 1 2 1 -1 base 66 65 66 1.01538 66 0 0.00000
177 1 2 1 -1 base 66 65 66 1.01538 66 0 0.00000
177
178
178 $ cat <<EOF > test.py
179 $ cat <<EOF > test.py
179 > from __future__ import print_function
180 > from __future__ import print_function
180 > import sys
181 > import sys
181 > from mercurial import changelog, pycompat, vfs
182 > from mercurial import changelog, pycompat, vfs
182 > cl = changelog.changelog(vfs.vfs(pycompat.fsencode(sys.argv[1])))
183 > cl = changelog.changelog(vfs.vfs(pycompat.fsencode(sys.argv[1])))
183 > n0, n1 = cl.node(0), cl.node(1)
184 > n0, n1 = cl.node(0), cl.node(1)
184 > ops = [
185 > ops = [
185 > ('reachableroots',
186 > ('reachableroots',
186 > lambda: cl.index.reachableroots2(0, [1], [0], False)),
187 > lambda: cl.index.reachableroots2(0, [1], [0], False)),
187 > ('compute_phases_map_sets', lambda: cl.computephases([[0], []])),
188 > ('compute_phases_map_sets', lambda: cl.computephases([[0], []])),
188 > ('index_headrevs', lambda: cl.headrevs()),
189 > ('index_headrevs', lambda: cl.headrevs()),
189 > ('find_gca_candidates', lambda: cl.commonancestorsheads(n0, n1)),
190 > ('find_gca_candidates', lambda: cl.commonancestorsheads(n0, n1)),
190 > ('find_deepest', lambda: cl.ancestor(n0, n1)),
191 > ('find_deepest', lambda: cl.ancestor(n0, n1)),
191 > ]
192 > ]
192 > for l, f in ops:
193 > for l, f in ops:
193 > print(l + ':', end=' ')
194 > print(l + ':', end=' ')
194 > try:
195 > try:
195 > f()
196 > f()
196 > print('uncaught buffer overflow?')
197 > print('uncaught buffer overflow?')
197 > except ValueError as inst:
198 > except ValueError as inst:
198 > print(inst)
199 > print(inst)
199 > EOF
200 > EOF
200
201
201 $ "$PYTHON" test.py limit/.hg/store
202 $ "$PYTHON" test.py limit/.hg/store
202 reachableroots: parent out of range
203 reachableroots: parent out of range
203 compute_phases_map_sets: parent out of range
204 compute_phases_map_sets: parent out of range
204 index_headrevs: parent out of range
205 index_headrevs: parent out of range
205 find_gca_candidates: parent out of range
206 find_gca_candidates: parent out of range
206 find_deepest: parent out of range
207 find_deepest: parent out of range
207 $ "$PYTHON" test.py neglimit/.hg/store
208 $ "$PYTHON" test.py neglimit/.hg/store
208 reachableroots: parent out of range
209 reachableroots: parent out of range
209 compute_phases_map_sets: parent out of range
210 compute_phases_map_sets: parent out of range
210 index_headrevs: parent out of range
211 index_headrevs: parent out of range
211 find_gca_candidates: parent out of range
212 find_gca_candidates: parent out of range
212 find_deepest: parent out of range
213 find_deepest: parent out of range
213 $ "$PYTHON" test.py segv/.hg/store
214 $ "$PYTHON" test.py segv/.hg/store
214 reachableroots: parent out of range
215 reachableroots: parent out of range
215 compute_phases_map_sets: parent out of range
216 compute_phases_map_sets: parent out of range
216 index_headrevs: parent out of range
217 index_headrevs: parent out of range
217 find_gca_candidates: parent out of range
218 find_gca_candidates: parent out of range
218 find_deepest: parent out of range
219 find_deepest: parent out of range
219
220
220 $ cd ..
221 $ cd ..
221
222
222 #endif
223 #endif
General Comments 0
You need to be logged in to leave comments. Login now