##// END OF EJS Templates
revlog: add function to slice chunk down to a given size...
Boris Feld -
r38664:e59e27e5 default
parent child Browse files
Show More
@@ -1,2798 +1,2875 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 CensoredNodeError = error.CensoredNodeError
94 CensoredNodeError = error.CensoredNodeError
95 ProgrammingError = error.ProgrammingError
95 ProgrammingError = error.ProgrammingError
96
96
97 # Store flag processors (cf. 'addflagprocessor()' to register)
97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 _flagprocessors = {
98 _flagprocessors = {
99 REVIDX_ISCENSORED: None,
99 REVIDX_ISCENSORED: None,
100 }
100 }
101
101
102 _mdre = re.compile('\1\n')
102 _mdre = re.compile('\1\n')
103 def parsemeta(text):
103 def parsemeta(text):
104 """return (metadatadict, metadatasize)"""
104 """return (metadatadict, metadatasize)"""
105 # text can be buffer, so we can't use .startswith or .index
105 # text can be buffer, so we can't use .startswith or .index
106 if text[:2] != '\1\n':
106 if text[:2] != '\1\n':
107 return None, None
107 return None, None
108 s = _mdre.search(text, 2).start()
108 s = _mdre.search(text, 2).start()
109 mtext = text[2:s]
109 mtext = text[2:s]
110 meta = {}
110 meta = {}
111 for l in mtext.splitlines():
111 for l in mtext.splitlines():
112 k, v = l.split(": ", 1)
112 k, v = l.split(": ", 1)
113 meta[k] = v
113 meta[k] = v
114 return meta, (s + 2)
114 return meta, (s + 2)
115
115
116 def packmeta(meta, text):
116 def packmeta(meta, text):
117 keys = sorted(meta)
117 keys = sorted(meta)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 return "\1\n%s\1\n%s" % (metatext, text)
119 return "\1\n%s\1\n%s" % (metatext, text)
120
120
121 def _censoredtext(text):
121 def _censoredtext(text):
122 m, offs = parsemeta(text)
122 m, offs = parsemeta(text)
123 return m and "censored" in m
123 return m and "censored" in m
124
124
125 def addflagprocessor(flag, processor):
125 def addflagprocessor(flag, processor):
126 """Register a flag processor on a revision data flag.
126 """Register a flag processor on a revision data flag.
127
127
128 Invariant:
128 Invariant:
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 - Only one flag processor can be registered on a specific flag.
131 - Only one flag processor can be registered on a specific flag.
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 following signatures:
133 following signatures:
134 - (read) f(self, rawtext) -> text, bool
134 - (read) f(self, rawtext) -> text, bool
135 - (write) f(self, text) -> rawtext, bool
135 - (write) f(self, text) -> rawtext, bool
136 - (raw) f(self, rawtext) -> bool
136 - (raw) f(self, rawtext) -> bool
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 directly visible to the user.
138 directly visible to the user.
139 The boolean returned by these transforms is used to determine whether
139 The boolean returned by these transforms is used to determine whether
140 the returned text can be used for hash integrity checking. For example,
140 the returned text can be used for hash integrity checking. For example,
141 if "write" returns False, then "text" is used to generate hash. If
141 if "write" returns False, then "text" is used to generate hash. If
142 "write" returns True, that basically means "rawtext" returned by "write"
142 "write" returns True, that basically means "rawtext" returned by "write"
143 should be used to generate hash. Usually, "write" and "read" return
143 should be used to generate hash. Usually, "write" and "read" return
144 different booleans. And "raw" returns a same boolean as "write".
144 different booleans. And "raw" returns a same boolean as "write".
145
145
146 Note: The 'raw' transform is used for changegroup generation and in some
146 Note: The 'raw' transform is used for changegroup generation and in some
147 debug commands. In this case the transform only indicates whether the
147 debug commands. In this case the transform only indicates whether the
148 contents can be used for hash integrity checks.
148 contents can be used for hash integrity checks.
149 """
149 """
150 if not flag & REVIDX_KNOWN_FLAGS:
150 if not flag & REVIDX_KNOWN_FLAGS:
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 raise ProgrammingError(msg)
152 raise ProgrammingError(msg)
153 if flag not in REVIDX_FLAGS_ORDER:
153 if flag not in REVIDX_FLAGS_ORDER:
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 raise ProgrammingError(msg)
155 raise ProgrammingError(msg)
156 if flag in _flagprocessors:
156 if flag in _flagprocessors:
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 raise error.Abort(msg)
158 raise error.Abort(msg)
159 _flagprocessors[flag] = processor
159 _flagprocessors[flag] = processor
160
160
161 def getoffset(q):
161 def getoffset(q):
162 return int(q >> 16)
162 return int(q >> 16)
163
163
164 def gettype(q):
164 def gettype(q):
165 return int(q & 0xFFFF)
165 return int(q & 0xFFFF)
166
166
167 def offset_type(offset, type):
167 def offset_type(offset, type):
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 raise ValueError('unknown revlog index flags')
169 raise ValueError('unknown revlog index flags')
170 return int(int(offset) << 16 | type)
170 return int(int(offset) << 16 | type)
171
171
172 _nullhash = hashlib.sha1(nullid)
172 _nullhash = hashlib.sha1(nullid)
173
173
174 def hash(text, p1, p2):
174 def hash(text, p1, p2):
175 """generate a hash from the given text and its parent hashes
175 """generate a hash from the given text and its parent hashes
176
176
177 This hash combines both the current file contents and its history
177 This hash combines both the current file contents and its history
178 in a manner that makes it easy to distinguish nodes with the same
178 in a manner that makes it easy to distinguish nodes with the same
179 content in the revision graph.
179 content in the revision graph.
180 """
180 """
181 # As of now, if one of the parent node is null, p2 is null
181 # As of now, if one of the parent node is null, p2 is null
182 if p2 == nullid:
182 if p2 == nullid:
183 # deep copy of a hash is faster than creating one
183 # deep copy of a hash is faster than creating one
184 s = _nullhash.copy()
184 s = _nullhash.copy()
185 s.update(p1)
185 s.update(p1)
186 else:
186 else:
187 # none of the parent nodes are nullid
187 # none of the parent nodes are nullid
188 if p1 < p2:
188 if p1 < p2:
189 a = p1
189 a = p1
190 b = p2
190 b = p2
191 else:
191 else:
192 a = p2
192 a = p2
193 b = p1
193 b = p1
194 s = hashlib.sha1(a)
194 s = hashlib.sha1(a)
195 s.update(b)
195 s.update(b)
196 s.update(text)
196 s.update(text)
197 return s.digest()
197 return s.digest()
198
198
199 class _testrevlog(object):
199 class _testrevlog(object):
200 """minimalist fake revlog to use in doctests"""
200 """minimalist fake revlog to use in doctests"""
201
201
202 def __init__(self, data, density=0.5, mingap=0):
202 def __init__(self, data, density=0.5, mingap=0):
203 """data is an list of revision payload boundaries"""
203 """data is an list of revision payload boundaries"""
204 self._data = data
204 self._data = data
205 self._srdensitythreshold = density
205 self._srdensitythreshold = density
206 self._srmingapsize = mingap
206 self._srmingapsize = mingap
207
207
208 def start(self, rev):
208 def start(self, rev):
209 if rev == 0:
209 if rev == 0:
210 return 0
210 return 0
211 return self._data[rev - 1]
211 return self._data[rev - 1]
212
212
213 def end(self, rev):
213 def end(self, rev):
214 return self._data[rev]
214 return self._data[rev]
215
215
216 def length(self, rev):
216 def length(self, rev):
217 return self.end(rev) - self.start(rev)
217 return self.end(rev) - self.start(rev)
218
218
219 def _trimchunk(revlog, revs, startidx, endidx=None):
219 def _trimchunk(revlog, revs, startidx, endidx=None):
220 """returns revs[startidx:endidx] without empty trailing revs
220 """returns revs[startidx:endidx] without empty trailing revs
221
221
222 Doctest Setup
222 Doctest Setup
223 >>> revlog = _testrevlog([
223 >>> revlog = _testrevlog([
224 ... 5, #0
224 ... 5, #0
225 ... 10, #1
225 ... 10, #1
226 ... 12, #2
226 ... 12, #2
227 ... 12, #3 (empty)
227 ... 12, #3 (empty)
228 ... 17, #4
228 ... 17, #4
229 ... 21, #5
229 ... 21, #5
230 ... 21, #6 (empty)
230 ... 21, #6 (empty)
231 ... ])
231 ... ])
232
232
233 Contiguous cases:
233 Contiguous cases:
234 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
234 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
235 [0, 1, 2, 3, 4, 5]
235 [0, 1, 2, 3, 4, 5]
236 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
236 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
237 [0, 1, 2, 3, 4]
237 [0, 1, 2, 3, 4]
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
239 [0, 1, 2]
239 [0, 1, 2]
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
241 [2]
241 [2]
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
243 [3, 4, 5]
243 [3, 4, 5]
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
245 [3, 4]
245 [3, 4]
246
246
247 Discontiguous cases:
247 Discontiguous cases:
248 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
248 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
249 [1, 3, 5]
249 [1, 3, 5]
250 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
250 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
251 [1]
251 [1]
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
253 [3, 5]
253 [3, 5]
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
255 [3, 5]
255 [3, 5]
256 """
256 """
257 length = revlog.length
257 length = revlog.length
258
258
259 if endidx is None:
259 if endidx is None:
260 endidx = len(revs)
260 endidx = len(revs)
261
261
262 # Trim empty revs at the end, but never the very first revision of a chain
262 # Trim empty revs at the end, but never the very first revision of a chain
263 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
263 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
264 endidx -= 1
264 endidx -= 1
265
265
266 return revs[startidx:endidx]
266 return revs[startidx:endidx]
267
267
268 def _segmentspan(revlog, revs):
268 def _segmentspan(revlog, revs):
269 """Get the byte span of a segment of revisions
269 """Get the byte span of a segment of revisions
270
270
271 revs is a sorted array of revision numbers
271 revs is a sorted array of revision numbers
272
272
273 >>> revlog = _testrevlog([
273 >>> revlog = _testrevlog([
274 ... 5, #0
274 ... 5, #0
275 ... 10, #1
275 ... 10, #1
276 ... 12, #2
276 ... 12, #2
277 ... 12, #3 (empty)
277 ... 12, #3 (empty)
278 ... 17, #4
278 ... 17, #4
279 ... ])
279 ... ])
280
280
281 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
281 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
282 17
282 17
283 >>> _segmentspan(revlog, [0, 4])
283 >>> _segmentspan(revlog, [0, 4])
284 17
284 17
285 >>> _segmentspan(revlog, [3, 4])
285 >>> _segmentspan(revlog, [3, 4])
286 5
286 5
287 >>> _segmentspan(revlog, [1, 2, 3,])
287 >>> _segmentspan(revlog, [1, 2, 3,])
288 7
288 7
289 >>> _segmentspan(revlog, [1, 3])
289 >>> _segmentspan(revlog, [1, 3])
290 7
290 7
291 """
291 """
292 if not revs:
292 if not revs:
293 return 0
293 return 0
294 return revlog.end(revs[-1]) - revlog.start(revs[0])
294 return revlog.end(revs[-1]) - revlog.start(revs[0])
295
295
296 def _slicechunk(revlog, revs):
296 def _slicechunk(revlog, revs):
297 """slice revs to reduce the amount of unrelated data to be read from disk.
297 """slice revs to reduce the amount of unrelated data to be read from disk.
298
298
299 ``revs`` is sliced into groups that should be read in one time.
299 ``revs`` is sliced into groups that should be read in one time.
300 Assume that revs are sorted.
300 Assume that revs are sorted.
301
301
302 The initial chunk is sliced until the overall density (payload/chunks-span
302 The initial chunk is sliced until the overall density (payload/chunks-span
303 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
303 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
304 `revlog._srmingapsize` is skipped.
304 `revlog._srmingapsize` is skipped.
305
305
306 >>> revlog = _testrevlog([
306 >>> revlog = _testrevlog([
307 ... 5, #00 (5)
307 ... 5, #00 (5)
308 ... 10, #01 (5)
308 ... 10, #01 (5)
309 ... 12, #02 (2)
309 ... 12, #02 (2)
310 ... 12, #03 (empty)
310 ... 12, #03 (empty)
311 ... 27, #04 (15)
311 ... 27, #04 (15)
312 ... 31, #05 (4)
312 ... 31, #05 (4)
313 ... 31, #06 (empty)
313 ... 31, #06 (empty)
314 ... 42, #07 (11)
314 ... 42, #07 (11)
315 ... 47, #08 (5)
315 ... 47, #08 (5)
316 ... 47, #09 (empty)
316 ... 47, #09 (empty)
317 ... 48, #10 (1)
317 ... 48, #10 (1)
318 ... 51, #11 (3)
318 ... 51, #11 (3)
319 ... 74, #12 (23)
319 ... 74, #12 (23)
320 ... 85, #13 (11)
320 ... 85, #13 (11)
321 ... 86, #14 (1)
321 ... 86, #14 (1)
322 ... 91, #15 (5)
322 ... 91, #15 (5)
323 ... ])
323 ... ])
324
324
325 >>> list(_slicechunk(revlog, range(16)))
325 >>> list(_slicechunk(revlog, range(16)))
326 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
326 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
327 >>> list(_slicechunk(revlog, [0, 15]))
327 >>> list(_slicechunk(revlog, [0, 15]))
328 [[0], [15]]
328 [[0], [15]]
329 >>> list(_slicechunk(revlog, [0, 11, 15]))
329 >>> list(_slicechunk(revlog, [0, 11, 15]))
330 [[0], [11], [15]]
330 [[0], [11], [15]]
331 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
331 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
332 [[0], [11, 13, 15]]
332 [[0], [11, 13, 15]]
333 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
333 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
334 [[1, 2], [5, 8, 10, 11], [14]]
334 [[1, 2], [5, 8, 10, 11], [14]]
335 """
335 """
336 for chunk in _slicechunktodensity(revlog, revs,
336 for chunk in _slicechunktodensity(revlog, revs,
337 revlog._srdensitythreshold,
337 revlog._srdensitythreshold,
338 revlog._srmingapsize):
338 revlog._srmingapsize):
339 yield chunk
339 yield chunk
340
340
341 def _slicechunktosize(revlog, revs, targetsize):
342 """slice revs to match the target size
343
344 This is intended to be used on chunk that density slicing selected by that
345 are still too large compared to the read garantee of revlog. This might
346 happens when "minimal gap size" interrupted the slicing or when chain are
347 built in a way that create large blocks next to each other.
348
349 >>> revlog = _testrevlog([
350 ... 3, #0 (3)
351 ... 5, #1 (2)
352 ... 6, #2 (1)
353 ... 8, #3 (2)
354 ... 8, #4 (empty)
355 ... 11, #5 (3)
356 ... 12, #6 (1)
357 ... 13, #7 (1)
358 ... 14, #8 (1)
359 ... ])
360
361 Cases where chunk is already small enough
362 >>> list(_slicechunktosize(revlog, [0], 3))
363 [[0]]
364 >>> list(_slicechunktosize(revlog, [6, 7], 3))
365 [[6, 7]]
366 >>> list(_slicechunktosize(revlog, [0], None))
367 [[0]]
368 >>> list(_slicechunktosize(revlog, [6, 7], None))
369 [[6, 7]]
370
371 cases where we need actual slicing
372 >>> list(_slicechunktosize(revlog, [0, 1], 3))
373 [[0], [1]]
374 >>> list(_slicechunktosize(revlog, [1, 3], 3))
375 [[1], [3]]
376 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
377 [[1, 2], [3]]
378 >>> list(_slicechunktosize(revlog, [3, 5], 3))
379 [[3], [5]]
380 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
381 [[3], [5]]
382 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
383 [[5], [6, 7, 8]]
384 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
385 [[0], [1, 2], [3], [5], [6, 7, 8]]
386
387 Case with too large individual chunk (must return valid chunk)
388 >>> list(_slicechunktosize(revlog, [0, 1], 2))
389 [[0], [1]]
390 >>> list(_slicechunktosize(revlog, [1, 3], 1))
391 [[1], [3]]
392 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
393 [[3], [5]]
394 """
395 assert targetsize is None or 0 <= targetsize
396 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
397 yield revs
398 return
399
400 startrevidx = 0
401 startdata = revlog.start(revs[0])
402 endrevidx = 0
403 iterrevs = enumerate(revs)
404 next(iterrevs) # skip first rev.
405 for idx, r in iterrevs:
406 span = revlog.end(r) - startdata
407 if span <= targetsize:
408 endrevidx = idx
409 else:
410 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
411 if chunk:
412 yield chunk
413 startrevidx = idx
414 startdata = revlog.start(r)
415 endrevidx = idx
416 yield _trimchunk(revlog, revs, startrevidx)
417
341 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
418 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
342 """slice revs to reduce the amount of unrelated data to be read from disk.
419 """slice revs to reduce the amount of unrelated data to be read from disk.
343
420
344 ``revs`` is sliced into groups that should be read in one time.
421 ``revs`` is sliced into groups that should be read in one time.
345 Assume that revs are sorted.
422 Assume that revs are sorted.
346
423
347 The initial chunk is sliced until the overall density (payload/chunks-span
424 The initial chunk is sliced until the overall density (payload/chunks-span
348 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
425 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
349 skipped.
426 skipped.
350
427
351 >>> revlog = _testrevlog([
428 >>> revlog = _testrevlog([
352 ... 5, #00 (5)
429 ... 5, #00 (5)
353 ... 10, #01 (5)
430 ... 10, #01 (5)
354 ... 12, #02 (2)
431 ... 12, #02 (2)
355 ... 12, #03 (empty)
432 ... 12, #03 (empty)
356 ... 27, #04 (15)
433 ... 27, #04 (15)
357 ... 31, #05 (4)
434 ... 31, #05 (4)
358 ... 31, #06 (empty)
435 ... 31, #06 (empty)
359 ... 42, #07 (11)
436 ... 42, #07 (11)
360 ... 47, #08 (5)
437 ... 47, #08 (5)
361 ... 47, #09 (empty)
438 ... 47, #09 (empty)
362 ... 48, #10 (1)
439 ... 48, #10 (1)
363 ... 51, #11 (3)
440 ... 51, #11 (3)
364 ... 74, #12 (23)
441 ... 74, #12 (23)
365 ... 85, #13 (11)
442 ... 85, #13 (11)
366 ... 86, #14 (1)
443 ... 86, #14 (1)
367 ... 91, #15 (5)
444 ... 91, #15 (5)
368 ... ])
445 ... ])
369
446
370 >>> list(_slicechunktodensity(revlog, range(16)))
447 >>> list(_slicechunktodensity(revlog, range(16)))
371 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
448 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
372 >>> list(_slicechunktodensity(revlog, [0, 15]))
449 >>> list(_slicechunktodensity(revlog, [0, 15]))
373 [[0], [15]]
450 [[0], [15]]
374 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
451 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
375 [[0], [11], [15]]
452 [[0], [11], [15]]
376 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
453 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
377 [[0], [11, 13, 15]]
454 [[0], [11, 13, 15]]
378 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
455 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
379 [[1, 2], [5, 8, 10, 11], [14]]
456 [[1, 2], [5, 8, 10, 11], [14]]
380 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
457 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
381 ... mingapsize=20))
458 ... mingapsize=20))
382 [[1, 2, 3, 5, 8, 10, 11], [14]]
459 [[1, 2, 3, 5, 8, 10, 11], [14]]
383 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
460 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
384 ... targetdensity=0.95))
461 ... targetdensity=0.95))
385 [[1, 2], [5], [8, 10, 11], [14]]
462 [[1, 2], [5], [8, 10, 11], [14]]
386 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
463 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
387 ... targetdensity=0.95, mingapsize=12))
464 ... targetdensity=0.95, mingapsize=12))
388 [[1, 2], [5, 8, 10, 11], [14]]
465 [[1, 2], [5, 8, 10, 11], [14]]
389 """
466 """
390 start = revlog.start
467 start = revlog.start
391 length = revlog.length
468 length = revlog.length
392
469
393 if len(revs) <= 1:
470 if len(revs) <= 1:
394 yield revs
471 yield revs
395 return
472 return
396
473
397 readdata = deltachainspan = _segmentspan(revlog, revs)
474 readdata = deltachainspan = _segmentspan(revlog, revs)
398
475
399 if deltachainspan < mingapsize:
476 if deltachainspan < mingapsize:
400 yield revs
477 yield revs
401 return
478 return
402
479
403 chainpayload = sum(length(r) for r in revs)
480 chainpayload = sum(length(r) for r in revs)
404
481
405 if deltachainspan:
482 if deltachainspan:
406 density = chainpayload / float(deltachainspan)
483 density = chainpayload / float(deltachainspan)
407 else:
484 else:
408 density = 1.0
485 density = 1.0
409
486
410 if density >= targetdensity:
487 if density >= targetdensity:
411 yield revs
488 yield revs
412 return
489 return
413
490
414 # Store the gaps in a heap to have them sorted by decreasing size
491 # Store the gaps in a heap to have them sorted by decreasing size
415 gapsheap = []
492 gapsheap = []
416 heapq.heapify(gapsheap)
493 heapq.heapify(gapsheap)
417 prevend = None
494 prevend = None
418 for i, rev in enumerate(revs):
495 for i, rev in enumerate(revs):
419 revstart = start(rev)
496 revstart = start(rev)
420 revlen = length(rev)
497 revlen = length(rev)
421
498
422 # Skip empty revisions to form larger holes
499 # Skip empty revisions to form larger holes
423 if revlen == 0:
500 if revlen == 0:
424 continue
501 continue
425
502
426 if prevend is not None:
503 if prevend is not None:
427 gapsize = revstart - prevend
504 gapsize = revstart - prevend
428 # only consider holes that are large enough
505 # only consider holes that are large enough
429 if gapsize > mingapsize:
506 if gapsize > mingapsize:
430 heapq.heappush(gapsheap, (-gapsize, i))
507 heapq.heappush(gapsheap, (-gapsize, i))
431
508
432 prevend = revstart + revlen
509 prevend = revstart + revlen
433
510
434 # Collect the indices of the largest holes until the density is acceptable
511 # Collect the indices of the largest holes until the density is acceptable
435 indicesheap = []
512 indicesheap = []
436 heapq.heapify(indicesheap)
513 heapq.heapify(indicesheap)
437 while gapsheap and density < targetdensity:
514 while gapsheap and density < targetdensity:
438 oppgapsize, gapidx = heapq.heappop(gapsheap)
515 oppgapsize, gapidx = heapq.heappop(gapsheap)
439
516
440 heapq.heappush(indicesheap, gapidx)
517 heapq.heappush(indicesheap, gapidx)
441
518
442 # the gap sizes are stored as negatives to be sorted decreasingly
519 # the gap sizes are stored as negatives to be sorted decreasingly
443 # by the heap
520 # by the heap
444 readdata -= (-oppgapsize)
521 readdata -= (-oppgapsize)
445 if readdata > 0:
522 if readdata > 0:
446 density = chainpayload / float(readdata)
523 density = chainpayload / float(readdata)
447 else:
524 else:
448 density = 1.0
525 density = 1.0
449
526
450 # Cut the revs at collected indices
527 # Cut the revs at collected indices
451 previdx = 0
528 previdx = 0
452 while indicesheap:
529 while indicesheap:
453 idx = heapq.heappop(indicesheap)
530 idx = heapq.heappop(indicesheap)
454
531
455 chunk = _trimchunk(revlog, revs, previdx, idx)
532 chunk = _trimchunk(revlog, revs, previdx, idx)
456 if chunk:
533 if chunk:
457 yield chunk
534 yield chunk
458
535
459 previdx = idx
536 previdx = idx
460
537
461 chunk = _trimchunk(revlog, revs, previdx)
538 chunk = _trimchunk(revlog, revs, previdx)
462 if chunk:
539 if chunk:
463 yield chunk
540 yield chunk
464
541
465 @attr.s(slots=True, frozen=True)
542 @attr.s(slots=True, frozen=True)
466 class _deltainfo(object):
543 class _deltainfo(object):
467 distance = attr.ib()
544 distance = attr.ib()
468 deltalen = attr.ib()
545 deltalen = attr.ib()
469 data = attr.ib()
546 data = attr.ib()
470 base = attr.ib()
547 base = attr.ib()
471 chainbase = attr.ib()
548 chainbase = attr.ib()
472 chainlen = attr.ib()
549 chainlen = attr.ib()
473 compresseddeltalen = attr.ib()
550 compresseddeltalen = attr.ib()
474
551
475 class _deltacomputer(object):
552 class _deltacomputer(object):
476 def __init__(self, revlog):
553 def __init__(self, revlog):
477 self.revlog = revlog
554 self.revlog = revlog
478
555
479 def _getcandidaterevs(self, p1, p2, cachedelta):
556 def _getcandidaterevs(self, p1, p2, cachedelta):
480 """
557 """
481 Provides revisions that present an interest to be diffed against,
558 Provides revisions that present an interest to be diffed against,
482 grouped by level of easiness.
559 grouped by level of easiness.
483 """
560 """
484 revlog = self.revlog
561 revlog = self.revlog
485 gdelta = revlog._generaldelta
562 gdelta = revlog._generaldelta
486 curr = len(revlog)
563 curr = len(revlog)
487 prev = curr - 1
564 prev = curr - 1
488 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
565 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
489
566
490 # should we try to build a delta?
567 # should we try to build a delta?
491 if prev != nullrev and revlog.storedeltachains:
568 if prev != nullrev and revlog.storedeltachains:
492 tested = set()
569 tested = set()
493 # This condition is true most of the time when processing
570 # This condition is true most of the time when processing
494 # changegroup data into a generaldelta repo. The only time it
571 # changegroup data into a generaldelta repo. The only time it
495 # isn't true is if this is the first revision in a delta chain
572 # isn't true is if this is the first revision in a delta chain
496 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
573 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
497 if cachedelta and gdelta and revlog._lazydeltabase:
574 if cachedelta and gdelta and revlog._lazydeltabase:
498 # Assume what we received from the server is a good choice
575 # Assume what we received from the server is a good choice
499 # build delta will reuse the cache
576 # build delta will reuse the cache
500 yield (cachedelta[0],)
577 yield (cachedelta[0],)
501 tested.add(cachedelta[0])
578 tested.add(cachedelta[0])
502
579
503 if gdelta:
580 if gdelta:
504 # exclude already lazy tested base if any
581 # exclude already lazy tested base if any
505 parents = [p for p in (p1r, p2r)
582 parents = [p for p in (p1r, p2r)
506 if p != nullrev and p not in tested]
583 if p != nullrev and p not in tested]
507
584
508 if not revlog._aggressivemergedeltas and len(parents) == 2:
585 if not revlog._aggressivemergedeltas and len(parents) == 2:
509 parents.sort()
586 parents.sort()
510 # To minimize the chance of having to build a fulltext,
587 # To minimize the chance of having to build a fulltext,
511 # pick first whichever parent is closest to us (max rev)
588 # pick first whichever parent is closest to us (max rev)
512 yield (parents[1],)
589 yield (parents[1],)
513 # then the other one (min rev) if the first did not fit
590 # then the other one (min rev) if the first did not fit
514 yield (parents[0],)
591 yield (parents[0],)
515 tested.update(parents)
592 tested.update(parents)
516 elif len(parents) > 0:
593 elif len(parents) > 0:
517 # Test all parents (1 or 2), and keep the best candidate
594 # Test all parents (1 or 2), and keep the best candidate
518 yield parents
595 yield parents
519 tested.update(parents)
596 tested.update(parents)
520
597
521 if prev not in tested:
598 if prev not in tested:
522 # other approach failed try against prev to hopefully save us a
599 # other approach failed try against prev to hopefully save us a
523 # fulltext.
600 # fulltext.
524 yield (prev,)
601 yield (prev,)
525 tested.add(prev)
602 tested.add(prev)
526
603
527 def buildtext(self, revinfo, fh):
604 def buildtext(self, revinfo, fh):
528 """Builds a fulltext version of a revision
605 """Builds a fulltext version of a revision
529
606
530 revinfo: _revisioninfo instance that contains all needed info
607 revinfo: _revisioninfo instance that contains all needed info
531 fh: file handle to either the .i or the .d revlog file,
608 fh: file handle to either the .i or the .d revlog file,
532 depending on whether it is inlined or not
609 depending on whether it is inlined or not
533 """
610 """
534 btext = revinfo.btext
611 btext = revinfo.btext
535 if btext[0] is not None:
612 if btext[0] is not None:
536 return btext[0]
613 return btext[0]
537
614
538 revlog = self.revlog
615 revlog = self.revlog
539 cachedelta = revinfo.cachedelta
616 cachedelta = revinfo.cachedelta
540 flags = revinfo.flags
617 flags = revinfo.flags
541 node = revinfo.node
618 node = revinfo.node
542
619
543 baserev = cachedelta[0]
620 baserev = cachedelta[0]
544 delta = cachedelta[1]
621 delta = cachedelta[1]
545 # special case deltas which replace entire base; no need to decode
622 # special case deltas which replace entire base; no need to decode
546 # base revision. this neatly avoids censored bases, which throw when
623 # base revision. this neatly avoids censored bases, which throw when
547 # they're decoded.
624 # they're decoded.
548 hlen = struct.calcsize(">lll")
625 hlen = struct.calcsize(">lll")
549 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
626 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
550 len(delta) - hlen):
627 len(delta) - hlen):
551 btext[0] = delta[hlen:]
628 btext[0] = delta[hlen:]
552 else:
629 else:
553 # deltabase is rawtext before changed by flag processors, which is
630 # deltabase is rawtext before changed by flag processors, which is
554 # equivalent to non-raw text
631 # equivalent to non-raw text
555 basetext = revlog.revision(baserev, _df=fh, raw=False)
632 basetext = revlog.revision(baserev, _df=fh, raw=False)
556 btext[0] = mdiff.patch(basetext, delta)
633 btext[0] = mdiff.patch(basetext, delta)
557
634
558 try:
635 try:
559 res = revlog._processflags(btext[0], flags, 'read', raw=True)
636 res = revlog._processflags(btext[0], flags, 'read', raw=True)
560 btext[0], validatehash = res
637 btext[0], validatehash = res
561 if validatehash:
638 if validatehash:
562 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
639 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
563 if flags & REVIDX_ISCENSORED:
640 if flags & REVIDX_ISCENSORED:
564 raise RevlogError(_('node %s is not censored') % node)
641 raise RevlogError(_('node %s is not censored') % node)
565 except CensoredNodeError:
642 except CensoredNodeError:
566 # must pass the censored index flag to add censored revisions
643 # must pass the censored index flag to add censored revisions
567 if not flags & REVIDX_ISCENSORED:
644 if not flags & REVIDX_ISCENSORED:
568 raise
645 raise
569 return btext[0]
646 return btext[0]
570
647
571 def _builddeltadiff(self, base, revinfo, fh):
648 def _builddeltadiff(self, base, revinfo, fh):
572 revlog = self.revlog
649 revlog = self.revlog
573 t = self.buildtext(revinfo, fh)
650 t = self.buildtext(revinfo, fh)
574 if revlog.iscensored(base):
651 if revlog.iscensored(base):
575 # deltas based on a censored revision must replace the
652 # deltas based on a censored revision must replace the
576 # full content in one patch, so delta works everywhere
653 # full content in one patch, so delta works everywhere
577 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
654 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
578 delta = header + t
655 delta = header + t
579 else:
656 else:
580 ptext = revlog.revision(base, _df=fh, raw=True)
657 ptext = revlog.revision(base, _df=fh, raw=True)
581 delta = mdiff.textdiff(ptext, t)
658 delta = mdiff.textdiff(ptext, t)
582
659
583 return delta
660 return delta
584
661
585 def _builddeltainfo(self, revinfo, base, fh):
662 def _builddeltainfo(self, revinfo, base, fh):
586 # can we use the cached delta?
663 # can we use the cached delta?
587 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
664 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
588 delta = revinfo.cachedelta[1]
665 delta = revinfo.cachedelta[1]
589 else:
666 else:
590 delta = self._builddeltadiff(base, revinfo, fh)
667 delta = self._builddeltadiff(base, revinfo, fh)
591 revlog = self.revlog
668 revlog = self.revlog
592 header, data = revlog.compress(delta)
669 header, data = revlog.compress(delta)
593 deltalen = len(header) + len(data)
670 deltalen = len(header) + len(data)
594 chainbase = revlog.chainbase(base)
671 chainbase = revlog.chainbase(base)
595 offset = revlog.end(len(revlog) - 1)
672 offset = revlog.end(len(revlog) - 1)
596 dist = deltalen + offset - revlog.start(chainbase)
673 dist = deltalen + offset - revlog.start(chainbase)
597 if revlog._generaldelta:
674 if revlog._generaldelta:
598 deltabase = base
675 deltabase = base
599 else:
676 else:
600 deltabase = chainbase
677 deltabase = chainbase
601 chainlen, compresseddeltalen = revlog._chaininfo(base)
678 chainlen, compresseddeltalen = revlog._chaininfo(base)
602 chainlen += 1
679 chainlen += 1
603 compresseddeltalen += deltalen
680 compresseddeltalen += deltalen
604 return _deltainfo(dist, deltalen, (header, data), deltabase,
681 return _deltainfo(dist, deltalen, (header, data), deltabase,
605 chainbase, chainlen, compresseddeltalen)
682 chainbase, chainlen, compresseddeltalen)
606
683
607 def finddeltainfo(self, revinfo, fh):
684 def finddeltainfo(self, revinfo, fh):
608 """Find an acceptable delta against a candidate revision
685 """Find an acceptable delta against a candidate revision
609
686
610 revinfo: information about the revision (instance of _revisioninfo)
687 revinfo: information about the revision (instance of _revisioninfo)
611 fh: file handle to either the .i or the .d revlog file,
688 fh: file handle to either the .i or the .d revlog file,
612 depending on whether it is inlined or not
689 depending on whether it is inlined or not
613
690
614 Returns the first acceptable candidate revision, as ordered by
691 Returns the first acceptable candidate revision, as ordered by
615 _getcandidaterevs
692 _getcandidaterevs
616 """
693 """
617 cachedelta = revinfo.cachedelta
694 cachedelta = revinfo.cachedelta
618 p1 = revinfo.p1
695 p1 = revinfo.p1
619 p2 = revinfo.p2
696 p2 = revinfo.p2
620 revlog = self.revlog
697 revlog = self.revlog
621
698
622 deltainfo = None
699 deltainfo = None
623 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
700 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
624 nominateddeltas = []
701 nominateddeltas = []
625 for candidaterev in candidaterevs:
702 for candidaterev in candidaterevs:
626 # no delta for rawtext-changing revs (see "candelta" for why)
703 # no delta for rawtext-changing revs (see "candelta" for why)
627 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
704 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
628 continue
705 continue
629 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
706 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
630 if revlog._isgooddeltainfo(candidatedelta, revinfo):
707 if revlog._isgooddeltainfo(candidatedelta, revinfo):
631 nominateddeltas.append(candidatedelta)
708 nominateddeltas.append(candidatedelta)
632 if nominateddeltas:
709 if nominateddeltas:
633 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
710 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
634 break
711 break
635
712
636 return deltainfo
713 return deltainfo
637
714
638 @attr.s(slots=True, frozen=True)
715 @attr.s(slots=True, frozen=True)
639 class _revisioninfo(object):
716 class _revisioninfo(object):
640 """Information about a revision that allows building its fulltext
717 """Information about a revision that allows building its fulltext
641 node: expected hash of the revision
718 node: expected hash of the revision
642 p1, p2: parent revs of the revision
719 p1, p2: parent revs of the revision
643 btext: built text cache consisting of a one-element list
720 btext: built text cache consisting of a one-element list
644 cachedelta: (baserev, uncompressed_delta) or None
721 cachedelta: (baserev, uncompressed_delta) or None
645 flags: flags associated to the revision storage
722 flags: flags associated to the revision storage
646
723
647 One of btext[0] or cachedelta must be set.
724 One of btext[0] or cachedelta must be set.
648 """
725 """
649 node = attr.ib()
726 node = attr.ib()
650 p1 = attr.ib()
727 p1 = attr.ib()
651 p2 = attr.ib()
728 p2 = attr.ib()
652 btext = attr.ib()
729 btext = attr.ib()
653 textlen = attr.ib()
730 textlen = attr.ib()
654 cachedelta = attr.ib()
731 cachedelta = attr.ib()
655 flags = attr.ib()
732 flags = attr.ib()
656
733
657 # index v0:
734 # index v0:
658 # 4 bytes: offset
735 # 4 bytes: offset
659 # 4 bytes: compressed length
736 # 4 bytes: compressed length
660 # 4 bytes: base rev
737 # 4 bytes: base rev
661 # 4 bytes: link rev
738 # 4 bytes: link rev
662 # 20 bytes: parent 1 nodeid
739 # 20 bytes: parent 1 nodeid
663 # 20 bytes: parent 2 nodeid
740 # 20 bytes: parent 2 nodeid
664 # 20 bytes: nodeid
741 # 20 bytes: nodeid
665 indexformatv0 = struct.Struct(">4l20s20s20s")
742 indexformatv0 = struct.Struct(">4l20s20s20s")
666 indexformatv0_pack = indexformatv0.pack
743 indexformatv0_pack = indexformatv0.pack
667 indexformatv0_unpack = indexformatv0.unpack
744 indexformatv0_unpack = indexformatv0.unpack
668
745
669 class revlogoldio(object):
746 class revlogoldio(object):
670 def __init__(self):
747 def __init__(self):
671 self.size = indexformatv0.size
748 self.size = indexformatv0.size
672
749
673 def parseindex(self, data, inline):
750 def parseindex(self, data, inline):
674 s = self.size
751 s = self.size
675 index = []
752 index = []
676 nodemap = {nullid: nullrev}
753 nodemap = {nullid: nullrev}
677 n = off = 0
754 n = off = 0
678 l = len(data)
755 l = len(data)
679 while off + s <= l:
756 while off + s <= l:
680 cur = data[off:off + s]
757 cur = data[off:off + s]
681 off += s
758 off += s
682 e = indexformatv0_unpack(cur)
759 e = indexformatv0_unpack(cur)
683 # transform to revlogv1 format
760 # transform to revlogv1 format
684 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
761 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
685 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
762 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
686 index.append(e2)
763 index.append(e2)
687 nodemap[e[6]] = n
764 nodemap[e[6]] = n
688 n += 1
765 n += 1
689
766
690 # add the magic null revision at -1
767 # add the magic null revision at -1
691 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
768 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
692
769
693 return index, nodemap, None
770 return index, nodemap, None
694
771
695 def packentry(self, entry, node, version, rev):
772 def packentry(self, entry, node, version, rev):
696 if gettype(entry[0]):
773 if gettype(entry[0]):
697 raise RevlogError(_('index entry flags need revlog version 1'))
774 raise RevlogError(_('index entry flags need revlog version 1'))
698 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
775 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
699 node(entry[5]), node(entry[6]), entry[7])
776 node(entry[5]), node(entry[6]), entry[7])
700 return indexformatv0_pack(*e2)
777 return indexformatv0_pack(*e2)
701
778
702 # index ng:
779 # index ng:
703 # 6 bytes: offset
780 # 6 bytes: offset
704 # 2 bytes: flags
781 # 2 bytes: flags
705 # 4 bytes: compressed length
782 # 4 bytes: compressed length
706 # 4 bytes: uncompressed length
783 # 4 bytes: uncompressed length
707 # 4 bytes: base rev
784 # 4 bytes: base rev
708 # 4 bytes: link rev
785 # 4 bytes: link rev
709 # 4 bytes: parent 1 rev
786 # 4 bytes: parent 1 rev
710 # 4 bytes: parent 2 rev
787 # 4 bytes: parent 2 rev
711 # 32 bytes: nodeid
788 # 32 bytes: nodeid
712 indexformatng = struct.Struct(">Qiiiiii20s12x")
789 indexformatng = struct.Struct(">Qiiiiii20s12x")
713 indexformatng_pack = indexformatng.pack
790 indexformatng_pack = indexformatng.pack
714 versionformat = struct.Struct(">I")
791 versionformat = struct.Struct(">I")
715 versionformat_pack = versionformat.pack
792 versionformat_pack = versionformat.pack
716 versionformat_unpack = versionformat.unpack
793 versionformat_unpack = versionformat.unpack
717
794
718 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
795 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
719 # signed integer)
796 # signed integer)
720 _maxentrysize = 0x7fffffff
797 _maxentrysize = 0x7fffffff
721
798
722 class revlogio(object):
799 class revlogio(object):
723 def __init__(self):
800 def __init__(self):
724 self.size = indexformatng.size
801 self.size = indexformatng.size
725
802
726 def parseindex(self, data, inline):
803 def parseindex(self, data, inline):
727 # call the C implementation to parse the index data
804 # call the C implementation to parse the index data
728 index, cache = parsers.parse_index2(data, inline)
805 index, cache = parsers.parse_index2(data, inline)
729 return index, getattr(index, 'nodemap', None), cache
806 return index, getattr(index, 'nodemap', None), cache
730
807
731 def packentry(self, entry, node, version, rev):
808 def packentry(self, entry, node, version, rev):
732 p = indexformatng_pack(*entry)
809 p = indexformatng_pack(*entry)
733 if rev == 0:
810 if rev == 0:
734 p = versionformat_pack(version) + p[4:]
811 p = versionformat_pack(version) + p[4:]
735 return p
812 return p
736
813
737 class revlog(object):
814 class revlog(object):
738 """
815 """
739 the underlying revision storage object
816 the underlying revision storage object
740
817
741 A revlog consists of two parts, an index and the revision data.
818 A revlog consists of two parts, an index and the revision data.
742
819
743 The index is a file with a fixed record size containing
820 The index is a file with a fixed record size containing
744 information on each revision, including its nodeid (hash), the
821 information on each revision, including its nodeid (hash), the
745 nodeids of its parents, the position and offset of its data within
822 nodeids of its parents, the position and offset of its data within
746 the data file, and the revision it's based on. Finally, each entry
823 the data file, and the revision it's based on. Finally, each entry
747 contains a linkrev entry that can serve as a pointer to external
824 contains a linkrev entry that can serve as a pointer to external
748 data.
825 data.
749
826
750 The revision data itself is a linear collection of data chunks.
827 The revision data itself is a linear collection of data chunks.
751 Each chunk represents a revision and is usually represented as a
828 Each chunk represents a revision and is usually represented as a
752 delta against the previous chunk. To bound lookup time, runs of
829 delta against the previous chunk. To bound lookup time, runs of
753 deltas are limited to about 2 times the length of the original
830 deltas are limited to about 2 times the length of the original
754 version data. This makes retrieval of a version proportional to
831 version data. This makes retrieval of a version proportional to
755 its size, or O(1) relative to the number of revisions.
832 its size, or O(1) relative to the number of revisions.
756
833
757 Both pieces of the revlog are written to in an append-only
834 Both pieces of the revlog are written to in an append-only
758 fashion, which means we never need to rewrite a file to insert or
835 fashion, which means we never need to rewrite a file to insert or
759 remove data, and can use some simple techniques to avoid the need
836 remove data, and can use some simple techniques to avoid the need
760 for locking while reading.
837 for locking while reading.
761
838
762 If checkambig, indexfile is opened with checkambig=True at
839 If checkambig, indexfile is opened with checkambig=True at
763 writing, to avoid file stat ambiguity.
840 writing, to avoid file stat ambiguity.
764
841
765 If mmaplargeindex is True, and an mmapindexthreshold is set, the
842 If mmaplargeindex is True, and an mmapindexthreshold is set, the
766 index will be mmapped rather than read if it is larger than the
843 index will be mmapped rather than read if it is larger than the
767 configured threshold.
844 configured threshold.
768
845
769 If censorable is True, the revlog can have censored revisions.
846 If censorable is True, the revlog can have censored revisions.
770 """
847 """
771 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
848 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
772 mmaplargeindex=False, censorable=False):
849 mmaplargeindex=False, censorable=False):
773 """
850 """
774 create a revlog object
851 create a revlog object
775
852
776 opener is a function that abstracts the file opening operation
853 opener is a function that abstracts the file opening operation
777 and can be used to implement COW semantics or the like.
854 and can be used to implement COW semantics or the like.
778 """
855 """
779 self.indexfile = indexfile
856 self.indexfile = indexfile
780 self.datafile = datafile or (indexfile[:-2] + ".d")
857 self.datafile = datafile or (indexfile[:-2] + ".d")
781 self.opener = opener
858 self.opener = opener
782 # When True, indexfile is opened with checkambig=True at writing, to
859 # When True, indexfile is opened with checkambig=True at writing, to
783 # avoid file stat ambiguity.
860 # avoid file stat ambiguity.
784 self._checkambig = checkambig
861 self._checkambig = checkambig
785 self._censorable = censorable
862 self._censorable = censorable
786 # 3-tuple of (node, rev, text) for a raw revision.
863 # 3-tuple of (node, rev, text) for a raw revision.
787 self._cache = None
864 self._cache = None
788 # Maps rev to chain base rev.
865 # Maps rev to chain base rev.
789 self._chainbasecache = util.lrucachedict(100)
866 self._chainbasecache = util.lrucachedict(100)
790 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
867 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
791 self._chunkcache = (0, '')
868 self._chunkcache = (0, '')
792 # How much data to read and cache into the raw revlog data cache.
869 # How much data to read and cache into the raw revlog data cache.
793 self._chunkcachesize = 65536
870 self._chunkcachesize = 65536
794 self._maxchainlen = None
871 self._maxchainlen = None
795 self._aggressivemergedeltas = True
872 self._aggressivemergedeltas = True
796 self.index = []
873 self.index = []
797 # Mapping of partial identifiers to full nodes.
874 # Mapping of partial identifiers to full nodes.
798 self._pcache = {}
875 self._pcache = {}
799 # Mapping of revision integer to full node.
876 # Mapping of revision integer to full node.
800 self._nodecache = {nullid: nullrev}
877 self._nodecache = {nullid: nullrev}
801 self._nodepos = None
878 self._nodepos = None
802 self._compengine = 'zlib'
879 self._compengine = 'zlib'
803 self._maxdeltachainspan = -1
880 self._maxdeltachainspan = -1
804 self._withsparseread = False
881 self._withsparseread = False
805 self._srdensitythreshold = 0.50
882 self._srdensitythreshold = 0.50
806 self._srmingapsize = 262144
883 self._srmingapsize = 262144
807
884
808 mmapindexthreshold = None
885 mmapindexthreshold = None
809 v = REVLOG_DEFAULT_VERSION
886 v = REVLOG_DEFAULT_VERSION
810 opts = getattr(opener, 'options', None)
887 opts = getattr(opener, 'options', None)
811 if opts is not None:
888 if opts is not None:
812 if 'revlogv2' in opts:
889 if 'revlogv2' in opts:
813 # version 2 revlogs always use generaldelta.
890 # version 2 revlogs always use generaldelta.
814 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
891 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
815 elif 'revlogv1' in opts:
892 elif 'revlogv1' in opts:
816 if 'generaldelta' in opts:
893 if 'generaldelta' in opts:
817 v |= FLAG_GENERALDELTA
894 v |= FLAG_GENERALDELTA
818 else:
895 else:
819 v = 0
896 v = 0
820 if 'chunkcachesize' in opts:
897 if 'chunkcachesize' in opts:
821 self._chunkcachesize = opts['chunkcachesize']
898 self._chunkcachesize = opts['chunkcachesize']
822 if 'maxchainlen' in opts:
899 if 'maxchainlen' in opts:
823 self._maxchainlen = opts['maxchainlen']
900 self._maxchainlen = opts['maxchainlen']
824 if 'aggressivemergedeltas' in opts:
901 if 'aggressivemergedeltas' in opts:
825 self._aggressivemergedeltas = opts['aggressivemergedeltas']
902 self._aggressivemergedeltas = opts['aggressivemergedeltas']
826 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
903 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
827 if 'compengine' in opts:
904 if 'compengine' in opts:
828 self._compengine = opts['compengine']
905 self._compengine = opts['compengine']
829 if 'maxdeltachainspan' in opts:
906 if 'maxdeltachainspan' in opts:
830 self._maxdeltachainspan = opts['maxdeltachainspan']
907 self._maxdeltachainspan = opts['maxdeltachainspan']
831 if mmaplargeindex and 'mmapindexthreshold' in opts:
908 if mmaplargeindex and 'mmapindexthreshold' in opts:
832 mmapindexthreshold = opts['mmapindexthreshold']
909 mmapindexthreshold = opts['mmapindexthreshold']
833 self._withsparseread = bool(opts.get('with-sparse-read', False))
910 self._withsparseread = bool(opts.get('with-sparse-read', False))
834 if 'sparse-read-density-threshold' in opts:
911 if 'sparse-read-density-threshold' in opts:
835 self._srdensitythreshold = opts['sparse-read-density-threshold']
912 self._srdensitythreshold = opts['sparse-read-density-threshold']
836 if 'sparse-read-min-gap-size' in opts:
913 if 'sparse-read-min-gap-size' in opts:
837 self._srmingapsize = opts['sparse-read-min-gap-size']
914 self._srmingapsize = opts['sparse-read-min-gap-size']
838
915
839 if self._chunkcachesize <= 0:
916 if self._chunkcachesize <= 0:
840 raise RevlogError(_('revlog chunk cache size %r is not greater '
917 raise RevlogError(_('revlog chunk cache size %r is not greater '
841 'than 0') % self._chunkcachesize)
918 'than 0') % self._chunkcachesize)
842 elif self._chunkcachesize & (self._chunkcachesize - 1):
919 elif self._chunkcachesize & (self._chunkcachesize - 1):
843 raise RevlogError(_('revlog chunk cache size %r is not a power '
920 raise RevlogError(_('revlog chunk cache size %r is not a power '
844 'of 2') % self._chunkcachesize)
921 'of 2') % self._chunkcachesize)
845
922
846 indexdata = ''
923 indexdata = ''
847 self._initempty = True
924 self._initempty = True
848 try:
925 try:
849 with self._indexfp() as f:
926 with self._indexfp() as f:
850 if (mmapindexthreshold is not None and
927 if (mmapindexthreshold is not None and
851 self.opener.fstat(f).st_size >= mmapindexthreshold):
928 self.opener.fstat(f).st_size >= mmapindexthreshold):
852 indexdata = util.buffer(util.mmapread(f))
929 indexdata = util.buffer(util.mmapread(f))
853 else:
930 else:
854 indexdata = f.read()
931 indexdata = f.read()
855 if len(indexdata) > 0:
932 if len(indexdata) > 0:
856 v = versionformat_unpack(indexdata[:4])[0]
933 v = versionformat_unpack(indexdata[:4])[0]
857 self._initempty = False
934 self._initempty = False
858 except IOError as inst:
935 except IOError as inst:
859 if inst.errno != errno.ENOENT:
936 if inst.errno != errno.ENOENT:
860 raise
937 raise
861
938
862 self.version = v
939 self.version = v
863 self._inline = v & FLAG_INLINE_DATA
940 self._inline = v & FLAG_INLINE_DATA
864 self._generaldelta = v & FLAG_GENERALDELTA
941 self._generaldelta = v & FLAG_GENERALDELTA
865 flags = v & ~0xFFFF
942 flags = v & ~0xFFFF
866 fmt = v & 0xFFFF
943 fmt = v & 0xFFFF
867 if fmt == REVLOGV0:
944 if fmt == REVLOGV0:
868 if flags:
945 if flags:
869 raise RevlogError(_('unknown flags (%#04x) in version %d '
946 raise RevlogError(_('unknown flags (%#04x) in version %d '
870 'revlog %s') %
947 'revlog %s') %
871 (flags >> 16, fmt, self.indexfile))
948 (flags >> 16, fmt, self.indexfile))
872 elif fmt == REVLOGV1:
949 elif fmt == REVLOGV1:
873 if flags & ~REVLOGV1_FLAGS:
950 if flags & ~REVLOGV1_FLAGS:
874 raise RevlogError(_('unknown flags (%#04x) in version %d '
951 raise RevlogError(_('unknown flags (%#04x) in version %d '
875 'revlog %s') %
952 'revlog %s') %
876 (flags >> 16, fmt, self.indexfile))
953 (flags >> 16, fmt, self.indexfile))
877 elif fmt == REVLOGV2:
954 elif fmt == REVLOGV2:
878 if flags & ~REVLOGV2_FLAGS:
955 if flags & ~REVLOGV2_FLAGS:
879 raise RevlogError(_('unknown flags (%#04x) in version %d '
956 raise RevlogError(_('unknown flags (%#04x) in version %d '
880 'revlog %s') %
957 'revlog %s') %
881 (flags >> 16, fmt, self.indexfile))
958 (flags >> 16, fmt, self.indexfile))
882 else:
959 else:
883 raise RevlogError(_('unknown version (%d) in revlog %s') %
960 raise RevlogError(_('unknown version (%d) in revlog %s') %
884 (fmt, self.indexfile))
961 (fmt, self.indexfile))
885
962
886 self.storedeltachains = True
963 self.storedeltachains = True
887
964
888 self._io = revlogio()
965 self._io = revlogio()
889 if self.version == REVLOGV0:
966 if self.version == REVLOGV0:
890 self._io = revlogoldio()
967 self._io = revlogoldio()
891 try:
968 try:
892 d = self._io.parseindex(indexdata, self._inline)
969 d = self._io.parseindex(indexdata, self._inline)
893 except (ValueError, IndexError):
970 except (ValueError, IndexError):
894 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
971 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
895 self.index, nodemap, self._chunkcache = d
972 self.index, nodemap, self._chunkcache = d
896 if nodemap is not None:
973 if nodemap is not None:
897 self.nodemap = self._nodecache = nodemap
974 self.nodemap = self._nodecache = nodemap
898 if not self._chunkcache:
975 if not self._chunkcache:
899 self._chunkclear()
976 self._chunkclear()
900 # revnum -> (chain-length, sum-delta-length)
977 # revnum -> (chain-length, sum-delta-length)
901 self._chaininfocache = {}
978 self._chaininfocache = {}
902 # revlog header -> revlog compressor
979 # revlog header -> revlog compressor
903 self._decompressors = {}
980 self._decompressors = {}
904
981
905 @util.propertycache
982 @util.propertycache
906 def _compressor(self):
983 def _compressor(self):
907 return util.compengines[self._compengine].revlogcompressor()
984 return util.compengines[self._compengine].revlogcompressor()
908
985
909 def _indexfp(self, mode='r'):
986 def _indexfp(self, mode='r'):
910 """file object for the revlog's index file"""
987 """file object for the revlog's index file"""
911 args = {r'mode': mode}
988 args = {r'mode': mode}
912 if mode != 'r':
989 if mode != 'r':
913 args[r'checkambig'] = self._checkambig
990 args[r'checkambig'] = self._checkambig
914 if mode == 'w':
991 if mode == 'w':
915 args[r'atomictemp'] = True
992 args[r'atomictemp'] = True
916 return self.opener(self.indexfile, **args)
993 return self.opener(self.indexfile, **args)
917
994
918 def _datafp(self, mode='r'):
995 def _datafp(self, mode='r'):
919 """file object for the revlog's data file"""
996 """file object for the revlog's data file"""
920 return self.opener(self.datafile, mode=mode)
997 return self.opener(self.datafile, mode=mode)
921
998
922 @contextlib.contextmanager
999 @contextlib.contextmanager
923 def _datareadfp(self, existingfp=None):
1000 def _datareadfp(self, existingfp=None):
924 """file object suitable to read data"""
1001 """file object suitable to read data"""
925 if existingfp is not None:
1002 if existingfp is not None:
926 yield existingfp
1003 yield existingfp
927 else:
1004 else:
928 if self._inline:
1005 if self._inline:
929 func = self._indexfp
1006 func = self._indexfp
930 else:
1007 else:
931 func = self._datafp
1008 func = self._datafp
932 with func() as fp:
1009 with func() as fp:
933 yield fp
1010 yield fp
934
1011
935 def tip(self):
1012 def tip(self):
936 return self.node(len(self.index) - 2)
1013 return self.node(len(self.index) - 2)
937 def __contains__(self, rev):
1014 def __contains__(self, rev):
938 return 0 <= rev < len(self)
1015 return 0 <= rev < len(self)
939 def __len__(self):
1016 def __len__(self):
940 return len(self.index) - 1
1017 return len(self.index) - 1
941 def __iter__(self):
1018 def __iter__(self):
942 return iter(xrange(len(self)))
1019 return iter(xrange(len(self)))
943 def revs(self, start=0, stop=None):
1020 def revs(self, start=0, stop=None):
944 """iterate over all rev in this revlog (from start to stop)"""
1021 """iterate over all rev in this revlog (from start to stop)"""
945 step = 1
1022 step = 1
946 if stop is not None:
1023 if stop is not None:
947 if start > stop:
1024 if start > stop:
948 step = -1
1025 step = -1
949 stop += step
1026 stop += step
950 else:
1027 else:
951 stop = len(self)
1028 stop = len(self)
952 return xrange(start, stop, step)
1029 return xrange(start, stop, step)
953
1030
954 @util.propertycache
1031 @util.propertycache
955 def nodemap(self):
1032 def nodemap(self):
956 self.rev(self.node(0))
1033 self.rev(self.node(0))
957 return self._nodecache
1034 return self._nodecache
958
1035
959 def hasnode(self, node):
1036 def hasnode(self, node):
960 try:
1037 try:
961 self.rev(node)
1038 self.rev(node)
962 return True
1039 return True
963 except KeyError:
1040 except KeyError:
964 return False
1041 return False
965
1042
966 def candelta(self, baserev, rev):
1043 def candelta(self, baserev, rev):
967 """whether two revisions (baserev, rev) can be delta-ed or not"""
1044 """whether two revisions (baserev, rev) can be delta-ed or not"""
968 # Disable delta if either rev requires a content-changing flag
1045 # Disable delta if either rev requires a content-changing flag
969 # processor (ex. LFS). This is because such flag processor can alter
1046 # processor (ex. LFS). This is because such flag processor can alter
970 # the rawtext content that the delta will be based on, and two clients
1047 # the rawtext content that the delta will be based on, and two clients
971 # could have a same revlog node with different flags (i.e. different
1048 # could have a same revlog node with different flags (i.e. different
972 # rawtext contents) and the delta could be incompatible.
1049 # rawtext contents) and the delta could be incompatible.
973 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1050 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
974 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1051 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
975 return False
1052 return False
976 return True
1053 return True
977
1054
978 def clearcaches(self):
1055 def clearcaches(self):
979 self._cache = None
1056 self._cache = None
980 self._chainbasecache.clear()
1057 self._chainbasecache.clear()
981 self._chunkcache = (0, '')
1058 self._chunkcache = (0, '')
982 self._pcache = {}
1059 self._pcache = {}
983
1060
984 try:
1061 try:
985 self._nodecache.clearcaches()
1062 self._nodecache.clearcaches()
986 except AttributeError:
1063 except AttributeError:
987 self._nodecache = {nullid: nullrev}
1064 self._nodecache = {nullid: nullrev}
988 self._nodepos = None
1065 self._nodepos = None
989
1066
990 def rev(self, node):
1067 def rev(self, node):
991 try:
1068 try:
992 return self._nodecache[node]
1069 return self._nodecache[node]
993 except TypeError:
1070 except TypeError:
994 raise
1071 raise
995 except RevlogError:
1072 except RevlogError:
996 # parsers.c radix tree lookup failed
1073 # parsers.c radix tree lookup failed
997 if node == wdirid or node in wdirfilenodeids:
1074 if node == wdirid or node in wdirfilenodeids:
998 raise error.WdirUnsupported
1075 raise error.WdirUnsupported
999 raise LookupError(node, self.indexfile, _('no node'))
1076 raise LookupError(node, self.indexfile, _('no node'))
1000 except KeyError:
1077 except KeyError:
1001 # pure python cache lookup failed
1078 # pure python cache lookup failed
1002 n = self._nodecache
1079 n = self._nodecache
1003 i = self.index
1080 i = self.index
1004 p = self._nodepos
1081 p = self._nodepos
1005 if p is None:
1082 if p is None:
1006 p = len(i) - 2
1083 p = len(i) - 2
1007 else:
1084 else:
1008 assert p < len(i)
1085 assert p < len(i)
1009 for r in xrange(p, -1, -1):
1086 for r in xrange(p, -1, -1):
1010 v = i[r][7]
1087 v = i[r][7]
1011 n[v] = r
1088 n[v] = r
1012 if v == node:
1089 if v == node:
1013 self._nodepos = r - 1
1090 self._nodepos = r - 1
1014 return r
1091 return r
1015 if node == wdirid or node in wdirfilenodeids:
1092 if node == wdirid or node in wdirfilenodeids:
1016 raise error.WdirUnsupported
1093 raise error.WdirUnsupported
1017 raise LookupError(node, self.indexfile, _('no node'))
1094 raise LookupError(node, self.indexfile, _('no node'))
1018
1095
1019 # Accessors for index entries.
1096 # Accessors for index entries.
1020
1097
1021 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1098 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1022 # are flags.
1099 # are flags.
1023 def start(self, rev):
1100 def start(self, rev):
1024 return int(self.index[rev][0] >> 16)
1101 return int(self.index[rev][0] >> 16)
1025
1102
1026 def flags(self, rev):
1103 def flags(self, rev):
1027 return self.index[rev][0] & 0xFFFF
1104 return self.index[rev][0] & 0xFFFF
1028
1105
1029 def length(self, rev):
1106 def length(self, rev):
1030 return self.index[rev][1]
1107 return self.index[rev][1]
1031
1108
1032 def rawsize(self, rev):
1109 def rawsize(self, rev):
1033 """return the length of the uncompressed text for a given revision"""
1110 """return the length of the uncompressed text for a given revision"""
1034 l = self.index[rev][2]
1111 l = self.index[rev][2]
1035 if l >= 0:
1112 if l >= 0:
1036 return l
1113 return l
1037
1114
1038 t = self.revision(rev, raw=True)
1115 t = self.revision(rev, raw=True)
1039 return len(t)
1116 return len(t)
1040
1117
1041 def size(self, rev):
1118 def size(self, rev):
1042 """length of non-raw text (processed by a "read" flag processor)"""
1119 """length of non-raw text (processed by a "read" flag processor)"""
1043 # fast path: if no "read" flag processor could change the content,
1120 # fast path: if no "read" flag processor could change the content,
1044 # size is rawsize. note: ELLIPSIS is known to not change the content.
1121 # size is rawsize. note: ELLIPSIS is known to not change the content.
1045 flags = self.flags(rev)
1122 flags = self.flags(rev)
1046 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1123 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1047 return self.rawsize(rev)
1124 return self.rawsize(rev)
1048
1125
1049 return len(self.revision(rev, raw=False))
1126 return len(self.revision(rev, raw=False))
1050
1127
1051 def chainbase(self, rev):
1128 def chainbase(self, rev):
1052 base = self._chainbasecache.get(rev)
1129 base = self._chainbasecache.get(rev)
1053 if base is not None:
1130 if base is not None:
1054 return base
1131 return base
1055
1132
1056 index = self.index
1133 index = self.index
1057 iterrev = rev
1134 iterrev = rev
1058 base = index[iterrev][3]
1135 base = index[iterrev][3]
1059 while base != iterrev:
1136 while base != iterrev:
1060 iterrev = base
1137 iterrev = base
1061 base = index[iterrev][3]
1138 base = index[iterrev][3]
1062
1139
1063 self._chainbasecache[rev] = base
1140 self._chainbasecache[rev] = base
1064 return base
1141 return base
1065
1142
1066 def linkrev(self, rev):
1143 def linkrev(self, rev):
1067 return self.index[rev][4]
1144 return self.index[rev][4]
1068
1145
1069 def parentrevs(self, rev):
1146 def parentrevs(self, rev):
1070 try:
1147 try:
1071 entry = self.index[rev]
1148 entry = self.index[rev]
1072 except IndexError:
1149 except IndexError:
1073 if rev == wdirrev:
1150 if rev == wdirrev:
1074 raise error.WdirUnsupported
1151 raise error.WdirUnsupported
1075 raise
1152 raise
1076
1153
1077 return entry[5], entry[6]
1154 return entry[5], entry[6]
1078
1155
1079 def node(self, rev):
1156 def node(self, rev):
1080 try:
1157 try:
1081 return self.index[rev][7]
1158 return self.index[rev][7]
1082 except IndexError:
1159 except IndexError:
1083 if rev == wdirrev:
1160 if rev == wdirrev:
1084 raise error.WdirUnsupported
1161 raise error.WdirUnsupported
1085 raise
1162 raise
1086
1163
1087 # Derived from index values.
1164 # Derived from index values.
1088
1165
1089 def end(self, rev):
1166 def end(self, rev):
1090 return self.start(rev) + self.length(rev)
1167 return self.start(rev) + self.length(rev)
1091
1168
1092 def parents(self, node):
1169 def parents(self, node):
1093 i = self.index
1170 i = self.index
1094 d = i[self.rev(node)]
1171 d = i[self.rev(node)]
1095 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1172 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1096
1173
1097 def chainlen(self, rev):
1174 def chainlen(self, rev):
1098 return self._chaininfo(rev)[0]
1175 return self._chaininfo(rev)[0]
1099
1176
1100 def _chaininfo(self, rev):
1177 def _chaininfo(self, rev):
1101 chaininfocache = self._chaininfocache
1178 chaininfocache = self._chaininfocache
1102 if rev in chaininfocache:
1179 if rev in chaininfocache:
1103 return chaininfocache[rev]
1180 return chaininfocache[rev]
1104 index = self.index
1181 index = self.index
1105 generaldelta = self._generaldelta
1182 generaldelta = self._generaldelta
1106 iterrev = rev
1183 iterrev = rev
1107 e = index[iterrev]
1184 e = index[iterrev]
1108 clen = 0
1185 clen = 0
1109 compresseddeltalen = 0
1186 compresseddeltalen = 0
1110 while iterrev != e[3]:
1187 while iterrev != e[3]:
1111 clen += 1
1188 clen += 1
1112 compresseddeltalen += e[1]
1189 compresseddeltalen += e[1]
1113 if generaldelta:
1190 if generaldelta:
1114 iterrev = e[3]
1191 iterrev = e[3]
1115 else:
1192 else:
1116 iterrev -= 1
1193 iterrev -= 1
1117 if iterrev in chaininfocache:
1194 if iterrev in chaininfocache:
1118 t = chaininfocache[iterrev]
1195 t = chaininfocache[iterrev]
1119 clen += t[0]
1196 clen += t[0]
1120 compresseddeltalen += t[1]
1197 compresseddeltalen += t[1]
1121 break
1198 break
1122 e = index[iterrev]
1199 e = index[iterrev]
1123 else:
1200 else:
1124 # Add text length of base since decompressing that also takes
1201 # Add text length of base since decompressing that also takes
1125 # work. For cache hits the length is already included.
1202 # work. For cache hits the length is already included.
1126 compresseddeltalen += e[1]
1203 compresseddeltalen += e[1]
1127 r = (clen, compresseddeltalen)
1204 r = (clen, compresseddeltalen)
1128 chaininfocache[rev] = r
1205 chaininfocache[rev] = r
1129 return r
1206 return r
1130
1207
1131 def _deltachain(self, rev, stoprev=None):
1208 def _deltachain(self, rev, stoprev=None):
1132 """Obtain the delta chain for a revision.
1209 """Obtain the delta chain for a revision.
1133
1210
1134 ``stoprev`` specifies a revision to stop at. If not specified, we
1211 ``stoprev`` specifies a revision to stop at. If not specified, we
1135 stop at the base of the chain.
1212 stop at the base of the chain.
1136
1213
1137 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1214 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1138 revs in ascending order and ``stopped`` is a bool indicating whether
1215 revs in ascending order and ``stopped`` is a bool indicating whether
1139 ``stoprev`` was hit.
1216 ``stoprev`` was hit.
1140 """
1217 """
1141 # Try C implementation.
1218 # Try C implementation.
1142 try:
1219 try:
1143 return self.index.deltachain(rev, stoprev, self._generaldelta)
1220 return self.index.deltachain(rev, stoprev, self._generaldelta)
1144 except AttributeError:
1221 except AttributeError:
1145 pass
1222 pass
1146
1223
1147 chain = []
1224 chain = []
1148
1225
1149 # Alias to prevent attribute lookup in tight loop.
1226 # Alias to prevent attribute lookup in tight loop.
1150 index = self.index
1227 index = self.index
1151 generaldelta = self._generaldelta
1228 generaldelta = self._generaldelta
1152
1229
1153 iterrev = rev
1230 iterrev = rev
1154 e = index[iterrev]
1231 e = index[iterrev]
1155 while iterrev != e[3] and iterrev != stoprev:
1232 while iterrev != e[3] and iterrev != stoprev:
1156 chain.append(iterrev)
1233 chain.append(iterrev)
1157 if generaldelta:
1234 if generaldelta:
1158 iterrev = e[3]
1235 iterrev = e[3]
1159 else:
1236 else:
1160 iterrev -= 1
1237 iterrev -= 1
1161 e = index[iterrev]
1238 e = index[iterrev]
1162
1239
1163 if iterrev == stoprev:
1240 if iterrev == stoprev:
1164 stopped = True
1241 stopped = True
1165 else:
1242 else:
1166 chain.append(iterrev)
1243 chain.append(iterrev)
1167 stopped = False
1244 stopped = False
1168
1245
1169 chain.reverse()
1246 chain.reverse()
1170 return chain, stopped
1247 return chain, stopped
1171
1248
1172 def ancestors(self, revs, stoprev=0, inclusive=False):
1249 def ancestors(self, revs, stoprev=0, inclusive=False):
1173 """Generate the ancestors of 'revs' in reverse topological order.
1250 """Generate the ancestors of 'revs' in reverse topological order.
1174 Does not generate revs lower than stoprev.
1251 Does not generate revs lower than stoprev.
1175
1252
1176 See the documentation for ancestor.lazyancestors for more details."""
1253 See the documentation for ancestor.lazyancestors for more details."""
1177
1254
1178 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1255 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1179 inclusive=inclusive)
1256 inclusive=inclusive)
1180
1257
1181 def descendants(self, revs):
1258 def descendants(self, revs):
1182 """Generate the descendants of 'revs' in revision order.
1259 """Generate the descendants of 'revs' in revision order.
1183
1260
1184 Yield a sequence of revision numbers starting with a child of
1261 Yield a sequence of revision numbers starting with a child of
1185 some rev in revs, i.e., each revision is *not* considered a
1262 some rev in revs, i.e., each revision is *not* considered a
1186 descendant of itself. Results are ordered by revision number (a
1263 descendant of itself. Results are ordered by revision number (a
1187 topological sort)."""
1264 topological sort)."""
1188 first = min(revs)
1265 first = min(revs)
1189 if first == nullrev:
1266 if first == nullrev:
1190 for i in self:
1267 for i in self:
1191 yield i
1268 yield i
1192 return
1269 return
1193
1270
1194 seen = set(revs)
1271 seen = set(revs)
1195 for i in self.revs(start=first + 1):
1272 for i in self.revs(start=first + 1):
1196 for x in self.parentrevs(i):
1273 for x in self.parentrevs(i):
1197 if x != nullrev and x in seen:
1274 if x != nullrev and x in seen:
1198 seen.add(i)
1275 seen.add(i)
1199 yield i
1276 yield i
1200 break
1277 break
1201
1278
1202 def findcommonmissing(self, common=None, heads=None):
1279 def findcommonmissing(self, common=None, heads=None):
1203 """Return a tuple of the ancestors of common and the ancestors of heads
1280 """Return a tuple of the ancestors of common and the ancestors of heads
1204 that are not ancestors of common. In revset terminology, we return the
1281 that are not ancestors of common. In revset terminology, we return the
1205 tuple:
1282 tuple:
1206
1283
1207 ::common, (::heads) - (::common)
1284 ::common, (::heads) - (::common)
1208
1285
1209 The list is sorted by revision number, meaning it is
1286 The list is sorted by revision number, meaning it is
1210 topologically sorted.
1287 topologically sorted.
1211
1288
1212 'heads' and 'common' are both lists of node IDs. If heads is
1289 'heads' and 'common' are both lists of node IDs. If heads is
1213 not supplied, uses all of the revlog's heads. If common is not
1290 not supplied, uses all of the revlog's heads. If common is not
1214 supplied, uses nullid."""
1291 supplied, uses nullid."""
1215 if common is None:
1292 if common is None:
1216 common = [nullid]
1293 common = [nullid]
1217 if heads is None:
1294 if heads is None:
1218 heads = self.heads()
1295 heads = self.heads()
1219
1296
1220 common = [self.rev(n) for n in common]
1297 common = [self.rev(n) for n in common]
1221 heads = [self.rev(n) for n in heads]
1298 heads = [self.rev(n) for n in heads]
1222
1299
1223 # we want the ancestors, but inclusive
1300 # we want the ancestors, but inclusive
1224 class lazyset(object):
1301 class lazyset(object):
1225 def __init__(self, lazyvalues):
1302 def __init__(self, lazyvalues):
1226 self.addedvalues = set()
1303 self.addedvalues = set()
1227 self.lazyvalues = lazyvalues
1304 self.lazyvalues = lazyvalues
1228
1305
1229 def __contains__(self, value):
1306 def __contains__(self, value):
1230 return value in self.addedvalues or value in self.lazyvalues
1307 return value in self.addedvalues or value in self.lazyvalues
1231
1308
1232 def __iter__(self):
1309 def __iter__(self):
1233 added = self.addedvalues
1310 added = self.addedvalues
1234 for r in added:
1311 for r in added:
1235 yield r
1312 yield r
1236 for r in self.lazyvalues:
1313 for r in self.lazyvalues:
1237 if not r in added:
1314 if not r in added:
1238 yield r
1315 yield r
1239
1316
1240 def add(self, value):
1317 def add(self, value):
1241 self.addedvalues.add(value)
1318 self.addedvalues.add(value)
1242
1319
1243 def update(self, values):
1320 def update(self, values):
1244 self.addedvalues.update(values)
1321 self.addedvalues.update(values)
1245
1322
1246 has = lazyset(self.ancestors(common))
1323 has = lazyset(self.ancestors(common))
1247 has.add(nullrev)
1324 has.add(nullrev)
1248 has.update(common)
1325 has.update(common)
1249
1326
1250 # take all ancestors from heads that aren't in has
1327 # take all ancestors from heads that aren't in has
1251 missing = set()
1328 missing = set()
1252 visit = collections.deque(r for r in heads if r not in has)
1329 visit = collections.deque(r for r in heads if r not in has)
1253 while visit:
1330 while visit:
1254 r = visit.popleft()
1331 r = visit.popleft()
1255 if r in missing:
1332 if r in missing:
1256 continue
1333 continue
1257 else:
1334 else:
1258 missing.add(r)
1335 missing.add(r)
1259 for p in self.parentrevs(r):
1336 for p in self.parentrevs(r):
1260 if p not in has:
1337 if p not in has:
1261 visit.append(p)
1338 visit.append(p)
1262 missing = list(missing)
1339 missing = list(missing)
1263 missing.sort()
1340 missing.sort()
1264 return has, [self.node(miss) for miss in missing]
1341 return has, [self.node(miss) for miss in missing]
1265
1342
1266 def incrementalmissingrevs(self, common=None):
1343 def incrementalmissingrevs(self, common=None):
1267 """Return an object that can be used to incrementally compute the
1344 """Return an object that can be used to incrementally compute the
1268 revision numbers of the ancestors of arbitrary sets that are not
1345 revision numbers of the ancestors of arbitrary sets that are not
1269 ancestors of common. This is an ancestor.incrementalmissingancestors
1346 ancestors of common. This is an ancestor.incrementalmissingancestors
1270 object.
1347 object.
1271
1348
1272 'common' is a list of revision numbers. If common is not supplied, uses
1349 'common' is a list of revision numbers. If common is not supplied, uses
1273 nullrev.
1350 nullrev.
1274 """
1351 """
1275 if common is None:
1352 if common is None:
1276 common = [nullrev]
1353 common = [nullrev]
1277
1354
1278 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1355 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1279
1356
1280 def findmissingrevs(self, common=None, heads=None):
1357 def findmissingrevs(self, common=None, heads=None):
1281 """Return the revision numbers of the ancestors of heads that
1358 """Return the revision numbers of the ancestors of heads that
1282 are not ancestors of common.
1359 are not ancestors of common.
1283
1360
1284 More specifically, return a list of revision numbers corresponding to
1361 More specifically, return a list of revision numbers corresponding to
1285 nodes N such that every N satisfies the following constraints:
1362 nodes N such that every N satisfies the following constraints:
1286
1363
1287 1. N is an ancestor of some node in 'heads'
1364 1. N is an ancestor of some node in 'heads'
1288 2. N is not an ancestor of any node in 'common'
1365 2. N is not an ancestor of any node in 'common'
1289
1366
1290 The list is sorted by revision number, meaning it is
1367 The list is sorted by revision number, meaning it is
1291 topologically sorted.
1368 topologically sorted.
1292
1369
1293 'heads' and 'common' are both lists of revision numbers. If heads is
1370 'heads' and 'common' are both lists of revision numbers. If heads is
1294 not supplied, uses all of the revlog's heads. If common is not
1371 not supplied, uses all of the revlog's heads. If common is not
1295 supplied, uses nullid."""
1372 supplied, uses nullid."""
1296 if common is None:
1373 if common is None:
1297 common = [nullrev]
1374 common = [nullrev]
1298 if heads is None:
1375 if heads is None:
1299 heads = self.headrevs()
1376 heads = self.headrevs()
1300
1377
1301 inc = self.incrementalmissingrevs(common=common)
1378 inc = self.incrementalmissingrevs(common=common)
1302 return inc.missingancestors(heads)
1379 return inc.missingancestors(heads)
1303
1380
1304 def findmissing(self, common=None, heads=None):
1381 def findmissing(self, common=None, heads=None):
1305 """Return the ancestors of heads that are not ancestors of common.
1382 """Return the ancestors of heads that are not ancestors of common.
1306
1383
1307 More specifically, return a list of nodes N such that every N
1384 More specifically, return a list of nodes N such that every N
1308 satisfies the following constraints:
1385 satisfies the following constraints:
1309
1386
1310 1. N is an ancestor of some node in 'heads'
1387 1. N is an ancestor of some node in 'heads'
1311 2. N is not an ancestor of any node in 'common'
1388 2. N is not an ancestor of any node in 'common'
1312
1389
1313 The list is sorted by revision number, meaning it is
1390 The list is sorted by revision number, meaning it is
1314 topologically sorted.
1391 topologically sorted.
1315
1392
1316 'heads' and 'common' are both lists of node IDs. If heads is
1393 'heads' and 'common' are both lists of node IDs. If heads is
1317 not supplied, uses all of the revlog's heads. If common is not
1394 not supplied, uses all of the revlog's heads. If common is not
1318 supplied, uses nullid."""
1395 supplied, uses nullid."""
1319 if common is None:
1396 if common is None:
1320 common = [nullid]
1397 common = [nullid]
1321 if heads is None:
1398 if heads is None:
1322 heads = self.heads()
1399 heads = self.heads()
1323
1400
1324 common = [self.rev(n) for n in common]
1401 common = [self.rev(n) for n in common]
1325 heads = [self.rev(n) for n in heads]
1402 heads = [self.rev(n) for n in heads]
1326
1403
1327 inc = self.incrementalmissingrevs(common=common)
1404 inc = self.incrementalmissingrevs(common=common)
1328 return [self.node(r) for r in inc.missingancestors(heads)]
1405 return [self.node(r) for r in inc.missingancestors(heads)]
1329
1406
1330 def nodesbetween(self, roots=None, heads=None):
1407 def nodesbetween(self, roots=None, heads=None):
1331 """Return a topological path from 'roots' to 'heads'.
1408 """Return a topological path from 'roots' to 'heads'.
1332
1409
1333 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1410 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1334 topologically sorted list of all nodes N that satisfy both of
1411 topologically sorted list of all nodes N that satisfy both of
1335 these constraints:
1412 these constraints:
1336
1413
1337 1. N is a descendant of some node in 'roots'
1414 1. N is a descendant of some node in 'roots'
1338 2. N is an ancestor of some node in 'heads'
1415 2. N is an ancestor of some node in 'heads'
1339
1416
1340 Every node is considered to be both a descendant and an ancestor
1417 Every node is considered to be both a descendant and an ancestor
1341 of itself, so every reachable node in 'roots' and 'heads' will be
1418 of itself, so every reachable node in 'roots' and 'heads' will be
1342 included in 'nodes'.
1419 included in 'nodes'.
1343
1420
1344 'outroots' is the list of reachable nodes in 'roots', i.e., the
1421 'outroots' is the list of reachable nodes in 'roots', i.e., the
1345 subset of 'roots' that is returned in 'nodes'. Likewise,
1422 subset of 'roots' that is returned in 'nodes'. Likewise,
1346 'outheads' is the subset of 'heads' that is also in 'nodes'.
1423 'outheads' is the subset of 'heads' that is also in 'nodes'.
1347
1424
1348 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1425 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1349 unspecified, uses nullid as the only root. If 'heads' is
1426 unspecified, uses nullid as the only root. If 'heads' is
1350 unspecified, uses list of all of the revlog's heads."""
1427 unspecified, uses list of all of the revlog's heads."""
1351 nonodes = ([], [], [])
1428 nonodes = ([], [], [])
1352 if roots is not None:
1429 if roots is not None:
1353 roots = list(roots)
1430 roots = list(roots)
1354 if not roots:
1431 if not roots:
1355 return nonodes
1432 return nonodes
1356 lowestrev = min([self.rev(n) for n in roots])
1433 lowestrev = min([self.rev(n) for n in roots])
1357 else:
1434 else:
1358 roots = [nullid] # Everybody's a descendant of nullid
1435 roots = [nullid] # Everybody's a descendant of nullid
1359 lowestrev = nullrev
1436 lowestrev = nullrev
1360 if (lowestrev == nullrev) and (heads is None):
1437 if (lowestrev == nullrev) and (heads is None):
1361 # We want _all_ the nodes!
1438 # We want _all_ the nodes!
1362 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1439 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1363 if heads is None:
1440 if heads is None:
1364 # All nodes are ancestors, so the latest ancestor is the last
1441 # All nodes are ancestors, so the latest ancestor is the last
1365 # node.
1442 # node.
1366 highestrev = len(self) - 1
1443 highestrev = len(self) - 1
1367 # Set ancestors to None to signal that every node is an ancestor.
1444 # Set ancestors to None to signal that every node is an ancestor.
1368 ancestors = None
1445 ancestors = None
1369 # Set heads to an empty dictionary for later discovery of heads
1446 # Set heads to an empty dictionary for later discovery of heads
1370 heads = {}
1447 heads = {}
1371 else:
1448 else:
1372 heads = list(heads)
1449 heads = list(heads)
1373 if not heads:
1450 if not heads:
1374 return nonodes
1451 return nonodes
1375 ancestors = set()
1452 ancestors = set()
1376 # Turn heads into a dictionary so we can remove 'fake' heads.
1453 # Turn heads into a dictionary so we can remove 'fake' heads.
1377 # Also, later we will be using it to filter out the heads we can't
1454 # Also, later we will be using it to filter out the heads we can't
1378 # find from roots.
1455 # find from roots.
1379 heads = dict.fromkeys(heads, False)
1456 heads = dict.fromkeys(heads, False)
1380 # Start at the top and keep marking parents until we're done.
1457 # Start at the top and keep marking parents until we're done.
1381 nodestotag = set(heads)
1458 nodestotag = set(heads)
1382 # Remember where the top was so we can use it as a limit later.
1459 # Remember where the top was so we can use it as a limit later.
1383 highestrev = max([self.rev(n) for n in nodestotag])
1460 highestrev = max([self.rev(n) for n in nodestotag])
1384 while nodestotag:
1461 while nodestotag:
1385 # grab a node to tag
1462 # grab a node to tag
1386 n = nodestotag.pop()
1463 n = nodestotag.pop()
1387 # Never tag nullid
1464 # Never tag nullid
1388 if n == nullid:
1465 if n == nullid:
1389 continue
1466 continue
1390 # A node's revision number represents its place in a
1467 # A node's revision number represents its place in a
1391 # topologically sorted list of nodes.
1468 # topologically sorted list of nodes.
1392 r = self.rev(n)
1469 r = self.rev(n)
1393 if r >= lowestrev:
1470 if r >= lowestrev:
1394 if n not in ancestors:
1471 if n not in ancestors:
1395 # If we are possibly a descendant of one of the roots
1472 # If we are possibly a descendant of one of the roots
1396 # and we haven't already been marked as an ancestor
1473 # and we haven't already been marked as an ancestor
1397 ancestors.add(n) # Mark as ancestor
1474 ancestors.add(n) # Mark as ancestor
1398 # Add non-nullid parents to list of nodes to tag.
1475 # Add non-nullid parents to list of nodes to tag.
1399 nodestotag.update([p for p in self.parents(n) if
1476 nodestotag.update([p for p in self.parents(n) if
1400 p != nullid])
1477 p != nullid])
1401 elif n in heads: # We've seen it before, is it a fake head?
1478 elif n in heads: # We've seen it before, is it a fake head?
1402 # So it is, real heads should not be the ancestors of
1479 # So it is, real heads should not be the ancestors of
1403 # any other heads.
1480 # any other heads.
1404 heads.pop(n)
1481 heads.pop(n)
1405 if not ancestors:
1482 if not ancestors:
1406 return nonodes
1483 return nonodes
1407 # Now that we have our set of ancestors, we want to remove any
1484 # Now that we have our set of ancestors, we want to remove any
1408 # roots that are not ancestors.
1485 # roots that are not ancestors.
1409
1486
1410 # If one of the roots was nullid, everything is included anyway.
1487 # If one of the roots was nullid, everything is included anyway.
1411 if lowestrev > nullrev:
1488 if lowestrev > nullrev:
1412 # But, since we weren't, let's recompute the lowest rev to not
1489 # But, since we weren't, let's recompute the lowest rev to not
1413 # include roots that aren't ancestors.
1490 # include roots that aren't ancestors.
1414
1491
1415 # Filter out roots that aren't ancestors of heads
1492 # Filter out roots that aren't ancestors of heads
1416 roots = [root for root in roots if root in ancestors]
1493 roots = [root for root in roots if root in ancestors]
1417 # Recompute the lowest revision
1494 # Recompute the lowest revision
1418 if roots:
1495 if roots:
1419 lowestrev = min([self.rev(root) for root in roots])
1496 lowestrev = min([self.rev(root) for root in roots])
1420 else:
1497 else:
1421 # No more roots? Return empty list
1498 # No more roots? Return empty list
1422 return nonodes
1499 return nonodes
1423 else:
1500 else:
1424 # We are descending from nullid, and don't need to care about
1501 # We are descending from nullid, and don't need to care about
1425 # any other roots.
1502 # any other roots.
1426 lowestrev = nullrev
1503 lowestrev = nullrev
1427 roots = [nullid]
1504 roots = [nullid]
1428 # Transform our roots list into a set.
1505 # Transform our roots list into a set.
1429 descendants = set(roots)
1506 descendants = set(roots)
1430 # Also, keep the original roots so we can filter out roots that aren't
1507 # Also, keep the original roots so we can filter out roots that aren't
1431 # 'real' roots (i.e. are descended from other roots).
1508 # 'real' roots (i.e. are descended from other roots).
1432 roots = descendants.copy()
1509 roots = descendants.copy()
1433 # Our topologically sorted list of output nodes.
1510 # Our topologically sorted list of output nodes.
1434 orderedout = []
1511 orderedout = []
1435 # Don't start at nullid since we don't want nullid in our output list,
1512 # Don't start at nullid since we don't want nullid in our output list,
1436 # and if nullid shows up in descendants, empty parents will look like
1513 # and if nullid shows up in descendants, empty parents will look like
1437 # they're descendants.
1514 # they're descendants.
1438 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1515 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1439 n = self.node(r)
1516 n = self.node(r)
1440 isdescendant = False
1517 isdescendant = False
1441 if lowestrev == nullrev: # Everybody is a descendant of nullid
1518 if lowestrev == nullrev: # Everybody is a descendant of nullid
1442 isdescendant = True
1519 isdescendant = True
1443 elif n in descendants:
1520 elif n in descendants:
1444 # n is already a descendant
1521 # n is already a descendant
1445 isdescendant = True
1522 isdescendant = True
1446 # This check only needs to be done here because all the roots
1523 # This check only needs to be done here because all the roots
1447 # will start being marked is descendants before the loop.
1524 # will start being marked is descendants before the loop.
1448 if n in roots:
1525 if n in roots:
1449 # If n was a root, check if it's a 'real' root.
1526 # If n was a root, check if it's a 'real' root.
1450 p = tuple(self.parents(n))
1527 p = tuple(self.parents(n))
1451 # If any of its parents are descendants, it's not a root.
1528 # If any of its parents are descendants, it's not a root.
1452 if (p[0] in descendants) or (p[1] in descendants):
1529 if (p[0] in descendants) or (p[1] in descendants):
1453 roots.remove(n)
1530 roots.remove(n)
1454 else:
1531 else:
1455 p = tuple(self.parents(n))
1532 p = tuple(self.parents(n))
1456 # A node is a descendant if either of its parents are
1533 # A node is a descendant if either of its parents are
1457 # descendants. (We seeded the dependents list with the roots
1534 # descendants. (We seeded the dependents list with the roots
1458 # up there, remember?)
1535 # up there, remember?)
1459 if (p[0] in descendants) or (p[1] in descendants):
1536 if (p[0] in descendants) or (p[1] in descendants):
1460 descendants.add(n)
1537 descendants.add(n)
1461 isdescendant = True
1538 isdescendant = True
1462 if isdescendant and ((ancestors is None) or (n in ancestors)):
1539 if isdescendant and ((ancestors is None) or (n in ancestors)):
1463 # Only include nodes that are both descendants and ancestors.
1540 # Only include nodes that are both descendants and ancestors.
1464 orderedout.append(n)
1541 orderedout.append(n)
1465 if (ancestors is not None) and (n in heads):
1542 if (ancestors is not None) and (n in heads):
1466 # We're trying to figure out which heads are reachable
1543 # We're trying to figure out which heads are reachable
1467 # from roots.
1544 # from roots.
1468 # Mark this head as having been reached
1545 # Mark this head as having been reached
1469 heads[n] = True
1546 heads[n] = True
1470 elif ancestors is None:
1547 elif ancestors is None:
1471 # Otherwise, we're trying to discover the heads.
1548 # Otherwise, we're trying to discover the heads.
1472 # Assume this is a head because if it isn't, the next step
1549 # Assume this is a head because if it isn't, the next step
1473 # will eventually remove it.
1550 # will eventually remove it.
1474 heads[n] = True
1551 heads[n] = True
1475 # But, obviously its parents aren't.
1552 # But, obviously its parents aren't.
1476 for p in self.parents(n):
1553 for p in self.parents(n):
1477 heads.pop(p, None)
1554 heads.pop(p, None)
1478 heads = [head for head, flag in heads.iteritems() if flag]
1555 heads = [head for head, flag in heads.iteritems() if flag]
1479 roots = list(roots)
1556 roots = list(roots)
1480 assert orderedout
1557 assert orderedout
1481 assert roots
1558 assert roots
1482 assert heads
1559 assert heads
1483 return (orderedout, roots, heads)
1560 return (orderedout, roots, heads)
1484
1561
1485 def headrevs(self):
1562 def headrevs(self):
1486 try:
1563 try:
1487 return self.index.headrevs()
1564 return self.index.headrevs()
1488 except AttributeError:
1565 except AttributeError:
1489 return self._headrevs()
1566 return self._headrevs()
1490
1567
1491 def computephases(self, roots):
1568 def computephases(self, roots):
1492 return self.index.computephasesmapsets(roots)
1569 return self.index.computephasesmapsets(roots)
1493
1570
1494 def _headrevs(self):
1571 def _headrevs(self):
1495 count = len(self)
1572 count = len(self)
1496 if not count:
1573 if not count:
1497 return [nullrev]
1574 return [nullrev]
1498 # we won't iter over filtered rev so nobody is a head at start
1575 # we won't iter over filtered rev so nobody is a head at start
1499 ishead = [0] * (count + 1)
1576 ishead = [0] * (count + 1)
1500 index = self.index
1577 index = self.index
1501 for r in self:
1578 for r in self:
1502 ishead[r] = 1 # I may be an head
1579 ishead[r] = 1 # I may be an head
1503 e = index[r]
1580 e = index[r]
1504 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1581 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1505 return [r for r, val in enumerate(ishead) if val]
1582 return [r for r, val in enumerate(ishead) if val]
1506
1583
1507 def heads(self, start=None, stop=None):
1584 def heads(self, start=None, stop=None):
1508 """return the list of all nodes that have no children
1585 """return the list of all nodes that have no children
1509
1586
1510 if start is specified, only heads that are descendants of
1587 if start is specified, only heads that are descendants of
1511 start will be returned
1588 start will be returned
1512 if stop is specified, it will consider all the revs from stop
1589 if stop is specified, it will consider all the revs from stop
1513 as if they had no children
1590 as if they had no children
1514 """
1591 """
1515 if start is None and stop is None:
1592 if start is None and stop is None:
1516 if not len(self):
1593 if not len(self):
1517 return [nullid]
1594 return [nullid]
1518 return [self.node(r) for r in self.headrevs()]
1595 return [self.node(r) for r in self.headrevs()]
1519
1596
1520 if start is None:
1597 if start is None:
1521 start = nullid
1598 start = nullid
1522 if stop is None:
1599 if stop is None:
1523 stop = []
1600 stop = []
1524 stoprevs = set([self.rev(n) for n in stop])
1601 stoprevs = set([self.rev(n) for n in stop])
1525 startrev = self.rev(start)
1602 startrev = self.rev(start)
1526 reachable = {startrev}
1603 reachable = {startrev}
1527 heads = {startrev}
1604 heads = {startrev}
1528
1605
1529 parentrevs = self.parentrevs
1606 parentrevs = self.parentrevs
1530 for r in self.revs(start=startrev + 1):
1607 for r in self.revs(start=startrev + 1):
1531 for p in parentrevs(r):
1608 for p in parentrevs(r):
1532 if p in reachable:
1609 if p in reachable:
1533 if r not in stoprevs:
1610 if r not in stoprevs:
1534 reachable.add(r)
1611 reachable.add(r)
1535 heads.add(r)
1612 heads.add(r)
1536 if p in heads and p not in stoprevs:
1613 if p in heads and p not in stoprevs:
1537 heads.remove(p)
1614 heads.remove(p)
1538
1615
1539 return [self.node(r) for r in heads]
1616 return [self.node(r) for r in heads]
1540
1617
1541 def children(self, node):
1618 def children(self, node):
1542 """find the children of a given node"""
1619 """find the children of a given node"""
1543 c = []
1620 c = []
1544 p = self.rev(node)
1621 p = self.rev(node)
1545 for r in self.revs(start=p + 1):
1622 for r in self.revs(start=p + 1):
1546 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1623 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1547 if prevs:
1624 if prevs:
1548 for pr in prevs:
1625 for pr in prevs:
1549 if pr == p:
1626 if pr == p:
1550 c.append(self.node(r))
1627 c.append(self.node(r))
1551 elif p == nullrev:
1628 elif p == nullrev:
1552 c.append(self.node(r))
1629 c.append(self.node(r))
1553 return c
1630 return c
1554
1631
1555 def descendant(self, start, end):
1632 def descendant(self, start, end):
1556 """True if revision 'end' is an descendant of revision 'start'
1633 """True if revision 'end' is an descendant of revision 'start'
1557
1634
1558 A revision is considered as a descendant of itself."""
1635 A revision is considered as a descendant of itself."""
1559 if start == nullrev:
1636 if start == nullrev:
1560 return True
1637 return True
1561 elif start == end:
1638 elif start == end:
1562 return True
1639 return True
1563 return start in self._commonancestorsheads(start, end)
1640 return start in self._commonancestorsheads(start, end)
1564
1641
1565 def commonancestorsheads(self, a, b):
1642 def commonancestorsheads(self, a, b):
1566 """calculate all the heads of the common ancestors of nodes a and b"""
1643 """calculate all the heads of the common ancestors of nodes a and b"""
1567 a, b = self.rev(a), self.rev(b)
1644 a, b = self.rev(a), self.rev(b)
1568 ancs = self._commonancestorsheads(a, b)
1645 ancs = self._commonancestorsheads(a, b)
1569 return pycompat.maplist(self.node, ancs)
1646 return pycompat.maplist(self.node, ancs)
1570
1647
1571 def _commonancestorsheads(self, *revs):
1648 def _commonancestorsheads(self, *revs):
1572 """calculate all the heads of the common ancestors of revs"""
1649 """calculate all the heads of the common ancestors of revs"""
1573 try:
1650 try:
1574 ancs = self.index.commonancestorsheads(*revs)
1651 ancs = self.index.commonancestorsheads(*revs)
1575 except (AttributeError, OverflowError): # C implementation failed
1652 except (AttributeError, OverflowError): # C implementation failed
1576 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1653 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1577 return ancs
1654 return ancs
1578
1655
1579 def isancestor(self, a, b):
1656 def isancestor(self, a, b):
1580 """return True if node a is an ancestor of node b
1657 """return True if node a is an ancestor of node b
1581
1658
1582 The implementation of this is trivial but the use of
1659 The implementation of this is trivial but the use of
1583 commonancestorsheads is not."""
1660 commonancestorsheads is not."""
1584 a, b = self.rev(a), self.rev(b)
1661 a, b = self.rev(a), self.rev(b)
1585 return self.descendant(a, b)
1662 return self.descendant(a, b)
1586
1663
1587 def ancestor(self, a, b):
1664 def ancestor(self, a, b):
1588 """calculate the "best" common ancestor of nodes a and b"""
1665 """calculate the "best" common ancestor of nodes a and b"""
1589
1666
1590 a, b = self.rev(a), self.rev(b)
1667 a, b = self.rev(a), self.rev(b)
1591 try:
1668 try:
1592 ancs = self.index.ancestors(a, b)
1669 ancs = self.index.ancestors(a, b)
1593 except (AttributeError, OverflowError):
1670 except (AttributeError, OverflowError):
1594 ancs = ancestor.ancestors(self.parentrevs, a, b)
1671 ancs = ancestor.ancestors(self.parentrevs, a, b)
1595 if ancs:
1672 if ancs:
1596 # choose a consistent winner when there's a tie
1673 # choose a consistent winner when there's a tie
1597 return min(map(self.node, ancs))
1674 return min(map(self.node, ancs))
1598 return nullid
1675 return nullid
1599
1676
1600 def _match(self, id):
1677 def _match(self, id):
1601 if isinstance(id, int):
1678 if isinstance(id, int):
1602 # rev
1679 # rev
1603 return self.node(id)
1680 return self.node(id)
1604 if len(id) == 20:
1681 if len(id) == 20:
1605 # possibly a binary node
1682 # possibly a binary node
1606 # odds of a binary node being all hex in ASCII are 1 in 10**25
1683 # odds of a binary node being all hex in ASCII are 1 in 10**25
1607 try:
1684 try:
1608 node = id
1685 node = id
1609 self.rev(node) # quick search the index
1686 self.rev(node) # quick search the index
1610 return node
1687 return node
1611 except LookupError:
1688 except LookupError:
1612 pass # may be partial hex id
1689 pass # may be partial hex id
1613 try:
1690 try:
1614 # str(rev)
1691 # str(rev)
1615 rev = int(id)
1692 rev = int(id)
1616 if "%d" % rev != id:
1693 if "%d" % rev != id:
1617 raise ValueError
1694 raise ValueError
1618 if rev < 0:
1695 if rev < 0:
1619 rev = len(self) + rev
1696 rev = len(self) + rev
1620 if rev < 0 or rev >= len(self):
1697 if rev < 0 or rev >= len(self):
1621 raise ValueError
1698 raise ValueError
1622 return self.node(rev)
1699 return self.node(rev)
1623 except (ValueError, OverflowError):
1700 except (ValueError, OverflowError):
1624 pass
1701 pass
1625 if len(id) == 40:
1702 if len(id) == 40:
1626 try:
1703 try:
1627 # a full hex nodeid?
1704 # a full hex nodeid?
1628 node = bin(id)
1705 node = bin(id)
1629 self.rev(node)
1706 self.rev(node)
1630 return node
1707 return node
1631 except (TypeError, LookupError):
1708 except (TypeError, LookupError):
1632 pass
1709 pass
1633
1710
1634 def _partialmatch(self, id):
1711 def _partialmatch(self, id):
1635 # we don't care wdirfilenodeids as they should be always full hash
1712 # we don't care wdirfilenodeids as they should be always full hash
1636 maybewdir = wdirhex.startswith(id)
1713 maybewdir = wdirhex.startswith(id)
1637 try:
1714 try:
1638 partial = self.index.partialmatch(id)
1715 partial = self.index.partialmatch(id)
1639 if partial and self.hasnode(partial):
1716 if partial and self.hasnode(partial):
1640 if maybewdir:
1717 if maybewdir:
1641 # single 'ff...' match in radix tree, ambiguous with wdir
1718 # single 'ff...' match in radix tree, ambiguous with wdir
1642 raise RevlogError
1719 raise RevlogError
1643 return partial
1720 return partial
1644 if maybewdir:
1721 if maybewdir:
1645 # no 'ff...' match in radix tree, wdir identified
1722 # no 'ff...' match in radix tree, wdir identified
1646 raise error.WdirUnsupported
1723 raise error.WdirUnsupported
1647 return None
1724 return None
1648 except RevlogError:
1725 except RevlogError:
1649 # parsers.c radix tree lookup gave multiple matches
1726 # parsers.c radix tree lookup gave multiple matches
1650 # fast path: for unfiltered changelog, radix tree is accurate
1727 # fast path: for unfiltered changelog, radix tree is accurate
1651 if not getattr(self, 'filteredrevs', None):
1728 if not getattr(self, 'filteredrevs', None):
1652 raise LookupError(id, self.indexfile,
1729 raise LookupError(id, self.indexfile,
1653 _('ambiguous identifier'))
1730 _('ambiguous identifier'))
1654 # fall through to slow path that filters hidden revisions
1731 # fall through to slow path that filters hidden revisions
1655 except (AttributeError, ValueError):
1732 except (AttributeError, ValueError):
1656 # we are pure python, or key was too short to search radix tree
1733 # we are pure python, or key was too short to search radix tree
1657 pass
1734 pass
1658
1735
1659 if id in self._pcache:
1736 if id in self._pcache:
1660 return self._pcache[id]
1737 return self._pcache[id]
1661
1738
1662 if len(id) <= 40:
1739 if len(id) <= 40:
1663 try:
1740 try:
1664 # hex(node)[:...]
1741 # hex(node)[:...]
1665 l = len(id) // 2 # grab an even number of digits
1742 l = len(id) // 2 # grab an even number of digits
1666 prefix = bin(id[:l * 2])
1743 prefix = bin(id[:l * 2])
1667 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1744 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1668 nl = [n for n in nl if hex(n).startswith(id) and
1745 nl = [n for n in nl if hex(n).startswith(id) and
1669 self.hasnode(n)]
1746 self.hasnode(n)]
1670 if len(nl) > 0:
1747 if len(nl) > 0:
1671 if len(nl) == 1 and not maybewdir:
1748 if len(nl) == 1 and not maybewdir:
1672 self._pcache[id] = nl[0]
1749 self._pcache[id] = nl[0]
1673 return nl[0]
1750 return nl[0]
1674 raise LookupError(id, self.indexfile,
1751 raise LookupError(id, self.indexfile,
1675 _('ambiguous identifier'))
1752 _('ambiguous identifier'))
1676 if maybewdir:
1753 if maybewdir:
1677 raise error.WdirUnsupported
1754 raise error.WdirUnsupported
1678 return None
1755 return None
1679 except TypeError:
1756 except TypeError:
1680 pass
1757 pass
1681
1758
1682 def lookup(self, id):
1759 def lookup(self, id):
1683 """locate a node based on:
1760 """locate a node based on:
1684 - revision number or str(revision number)
1761 - revision number or str(revision number)
1685 - nodeid or subset of hex nodeid
1762 - nodeid or subset of hex nodeid
1686 """
1763 """
1687 n = self._match(id)
1764 n = self._match(id)
1688 if n is not None:
1765 if n is not None:
1689 return n
1766 return n
1690 n = self._partialmatch(id)
1767 n = self._partialmatch(id)
1691 if n:
1768 if n:
1692 return n
1769 return n
1693
1770
1694 raise LookupError(id, self.indexfile, _('no match found'))
1771 raise LookupError(id, self.indexfile, _('no match found'))
1695
1772
1696 def shortest(self, node, minlength=1):
1773 def shortest(self, node, minlength=1):
1697 """Find the shortest unambiguous prefix that matches node."""
1774 """Find the shortest unambiguous prefix that matches node."""
1698 def isvalid(prefix):
1775 def isvalid(prefix):
1699 try:
1776 try:
1700 node = self._partialmatch(prefix)
1777 node = self._partialmatch(prefix)
1701 except error.RevlogError:
1778 except error.RevlogError:
1702 return False
1779 return False
1703 except error.WdirUnsupported:
1780 except error.WdirUnsupported:
1704 # single 'ff...' match
1781 # single 'ff...' match
1705 return True
1782 return True
1706 if node is None:
1783 if node is None:
1707 raise LookupError(node, self.indexfile, _('no node'))
1784 raise LookupError(node, self.indexfile, _('no node'))
1708 return True
1785 return True
1709
1786
1710 def maybewdir(prefix):
1787 def maybewdir(prefix):
1711 return all(c == 'f' for c in prefix)
1788 return all(c == 'f' for c in prefix)
1712
1789
1713 hexnode = hex(node)
1790 hexnode = hex(node)
1714
1791
1715 def disambiguate(hexnode, minlength):
1792 def disambiguate(hexnode, minlength):
1716 """Disambiguate against wdirid."""
1793 """Disambiguate against wdirid."""
1717 for length in range(minlength, 41):
1794 for length in range(minlength, 41):
1718 prefix = hexnode[:length]
1795 prefix = hexnode[:length]
1719 if not maybewdir(prefix):
1796 if not maybewdir(prefix):
1720 return prefix
1797 return prefix
1721
1798
1722 if not getattr(self, 'filteredrevs', None):
1799 if not getattr(self, 'filteredrevs', None):
1723 try:
1800 try:
1724 length = max(self.index.shortest(node), minlength)
1801 length = max(self.index.shortest(node), minlength)
1725 return disambiguate(hexnode, length)
1802 return disambiguate(hexnode, length)
1726 except RevlogError:
1803 except RevlogError:
1727 if node != wdirid:
1804 if node != wdirid:
1728 raise LookupError(node, self.indexfile, _('no node'))
1805 raise LookupError(node, self.indexfile, _('no node'))
1729 except AttributeError:
1806 except AttributeError:
1730 # Fall through to pure code
1807 # Fall through to pure code
1731 pass
1808 pass
1732
1809
1733 if node == wdirid:
1810 if node == wdirid:
1734 for length in range(minlength, 41):
1811 for length in range(minlength, 41):
1735 prefix = hexnode[:length]
1812 prefix = hexnode[:length]
1736 if isvalid(prefix):
1813 if isvalid(prefix):
1737 return prefix
1814 return prefix
1738
1815
1739 for length in range(minlength, 41):
1816 for length in range(minlength, 41):
1740 prefix = hexnode[:length]
1817 prefix = hexnode[:length]
1741 if isvalid(prefix):
1818 if isvalid(prefix):
1742 return disambiguate(hexnode, length)
1819 return disambiguate(hexnode, length)
1743
1820
1744 def cmp(self, node, text):
1821 def cmp(self, node, text):
1745 """compare text with a given file revision
1822 """compare text with a given file revision
1746
1823
1747 returns True if text is different than what is stored.
1824 returns True if text is different than what is stored.
1748 """
1825 """
1749 p1, p2 = self.parents(node)
1826 p1, p2 = self.parents(node)
1750 return hash(text, p1, p2) != node
1827 return hash(text, p1, p2) != node
1751
1828
1752 def _cachesegment(self, offset, data):
1829 def _cachesegment(self, offset, data):
1753 """Add a segment to the revlog cache.
1830 """Add a segment to the revlog cache.
1754
1831
1755 Accepts an absolute offset and the data that is at that location.
1832 Accepts an absolute offset and the data that is at that location.
1756 """
1833 """
1757 o, d = self._chunkcache
1834 o, d = self._chunkcache
1758 # try to add to existing cache
1835 # try to add to existing cache
1759 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1836 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1760 self._chunkcache = o, d + data
1837 self._chunkcache = o, d + data
1761 else:
1838 else:
1762 self._chunkcache = offset, data
1839 self._chunkcache = offset, data
1763
1840
1764 def _readsegment(self, offset, length, df=None):
1841 def _readsegment(self, offset, length, df=None):
1765 """Load a segment of raw data from the revlog.
1842 """Load a segment of raw data from the revlog.
1766
1843
1767 Accepts an absolute offset, length to read, and an optional existing
1844 Accepts an absolute offset, length to read, and an optional existing
1768 file handle to read from.
1845 file handle to read from.
1769
1846
1770 If an existing file handle is passed, it will be seeked and the
1847 If an existing file handle is passed, it will be seeked and the
1771 original seek position will NOT be restored.
1848 original seek position will NOT be restored.
1772
1849
1773 Returns a str or buffer of raw byte data.
1850 Returns a str or buffer of raw byte data.
1774 """
1851 """
1775 # Cache data both forward and backward around the requested
1852 # Cache data both forward and backward around the requested
1776 # data, in a fixed size window. This helps speed up operations
1853 # data, in a fixed size window. This helps speed up operations
1777 # involving reading the revlog backwards.
1854 # involving reading the revlog backwards.
1778 cachesize = self._chunkcachesize
1855 cachesize = self._chunkcachesize
1779 realoffset = offset & ~(cachesize - 1)
1856 realoffset = offset & ~(cachesize - 1)
1780 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1857 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1781 - realoffset)
1858 - realoffset)
1782 with self._datareadfp(df) as df:
1859 with self._datareadfp(df) as df:
1783 df.seek(realoffset)
1860 df.seek(realoffset)
1784 d = df.read(reallength)
1861 d = df.read(reallength)
1785 self._cachesegment(realoffset, d)
1862 self._cachesegment(realoffset, d)
1786 if offset != realoffset or reallength != length:
1863 if offset != realoffset or reallength != length:
1787 return util.buffer(d, offset - realoffset, length)
1864 return util.buffer(d, offset - realoffset, length)
1788 return d
1865 return d
1789
1866
1790 def _getsegment(self, offset, length, df=None):
1867 def _getsegment(self, offset, length, df=None):
1791 """Obtain a segment of raw data from the revlog.
1868 """Obtain a segment of raw data from the revlog.
1792
1869
1793 Accepts an absolute offset, length of bytes to obtain, and an
1870 Accepts an absolute offset, length of bytes to obtain, and an
1794 optional file handle to the already-opened revlog. If the file
1871 optional file handle to the already-opened revlog. If the file
1795 handle is used, it's original seek position will not be preserved.
1872 handle is used, it's original seek position will not be preserved.
1796
1873
1797 Requests for data may be returned from a cache.
1874 Requests for data may be returned from a cache.
1798
1875
1799 Returns a str or a buffer instance of raw byte data.
1876 Returns a str or a buffer instance of raw byte data.
1800 """
1877 """
1801 o, d = self._chunkcache
1878 o, d = self._chunkcache
1802 l = len(d)
1879 l = len(d)
1803
1880
1804 # is it in the cache?
1881 # is it in the cache?
1805 cachestart = offset - o
1882 cachestart = offset - o
1806 cacheend = cachestart + length
1883 cacheend = cachestart + length
1807 if cachestart >= 0 and cacheend <= l:
1884 if cachestart >= 0 and cacheend <= l:
1808 if cachestart == 0 and cacheend == l:
1885 if cachestart == 0 and cacheend == l:
1809 return d # avoid a copy
1886 return d # avoid a copy
1810 return util.buffer(d, cachestart, cacheend - cachestart)
1887 return util.buffer(d, cachestart, cacheend - cachestart)
1811
1888
1812 return self._readsegment(offset, length, df=df)
1889 return self._readsegment(offset, length, df=df)
1813
1890
1814 def _getsegmentforrevs(self, startrev, endrev, df=None):
1891 def _getsegmentforrevs(self, startrev, endrev, df=None):
1815 """Obtain a segment of raw data corresponding to a range of revisions.
1892 """Obtain a segment of raw data corresponding to a range of revisions.
1816
1893
1817 Accepts the start and end revisions and an optional already-open
1894 Accepts the start and end revisions and an optional already-open
1818 file handle to be used for reading. If the file handle is read, its
1895 file handle to be used for reading. If the file handle is read, its
1819 seek position will not be preserved.
1896 seek position will not be preserved.
1820
1897
1821 Requests for data may be satisfied by a cache.
1898 Requests for data may be satisfied by a cache.
1822
1899
1823 Returns a 2-tuple of (offset, data) for the requested range of
1900 Returns a 2-tuple of (offset, data) for the requested range of
1824 revisions. Offset is the integer offset from the beginning of the
1901 revisions. Offset is the integer offset from the beginning of the
1825 revlog and data is a str or buffer of the raw byte data.
1902 revlog and data is a str or buffer of the raw byte data.
1826
1903
1827 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1904 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1828 to determine where each revision's data begins and ends.
1905 to determine where each revision's data begins and ends.
1829 """
1906 """
1830 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1907 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1831 # (functions are expensive).
1908 # (functions are expensive).
1832 index = self.index
1909 index = self.index
1833 istart = index[startrev]
1910 istart = index[startrev]
1834 start = int(istart[0] >> 16)
1911 start = int(istart[0] >> 16)
1835 if startrev == endrev:
1912 if startrev == endrev:
1836 end = start + istart[1]
1913 end = start + istart[1]
1837 else:
1914 else:
1838 iend = index[endrev]
1915 iend = index[endrev]
1839 end = int(iend[0] >> 16) + iend[1]
1916 end = int(iend[0] >> 16) + iend[1]
1840
1917
1841 if self._inline:
1918 if self._inline:
1842 start += (startrev + 1) * self._io.size
1919 start += (startrev + 1) * self._io.size
1843 end += (endrev + 1) * self._io.size
1920 end += (endrev + 1) * self._io.size
1844 length = end - start
1921 length = end - start
1845
1922
1846 return start, self._getsegment(start, length, df=df)
1923 return start, self._getsegment(start, length, df=df)
1847
1924
1848 def _chunk(self, rev, df=None):
1925 def _chunk(self, rev, df=None):
1849 """Obtain a single decompressed chunk for a revision.
1926 """Obtain a single decompressed chunk for a revision.
1850
1927
1851 Accepts an integer revision and an optional already-open file handle
1928 Accepts an integer revision and an optional already-open file handle
1852 to be used for reading. If used, the seek position of the file will not
1929 to be used for reading. If used, the seek position of the file will not
1853 be preserved.
1930 be preserved.
1854
1931
1855 Returns a str holding uncompressed data for the requested revision.
1932 Returns a str holding uncompressed data for the requested revision.
1856 """
1933 """
1857 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1934 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1858
1935
1859 def _chunks(self, revs, df=None):
1936 def _chunks(self, revs, df=None):
1860 """Obtain decompressed chunks for the specified revisions.
1937 """Obtain decompressed chunks for the specified revisions.
1861
1938
1862 Accepts an iterable of numeric revisions that are assumed to be in
1939 Accepts an iterable of numeric revisions that are assumed to be in
1863 ascending order. Also accepts an optional already-open file handle
1940 ascending order. Also accepts an optional already-open file handle
1864 to be used for reading. If used, the seek position of the file will
1941 to be used for reading. If used, the seek position of the file will
1865 not be preserved.
1942 not be preserved.
1866
1943
1867 This function is similar to calling ``self._chunk()`` multiple times,
1944 This function is similar to calling ``self._chunk()`` multiple times,
1868 but is faster.
1945 but is faster.
1869
1946
1870 Returns a list with decompressed data for each requested revision.
1947 Returns a list with decompressed data for each requested revision.
1871 """
1948 """
1872 if not revs:
1949 if not revs:
1873 return []
1950 return []
1874 start = self.start
1951 start = self.start
1875 length = self.length
1952 length = self.length
1876 inline = self._inline
1953 inline = self._inline
1877 iosize = self._io.size
1954 iosize = self._io.size
1878 buffer = util.buffer
1955 buffer = util.buffer
1879
1956
1880 l = []
1957 l = []
1881 ladd = l.append
1958 ladd = l.append
1882
1959
1883 if not self._withsparseread:
1960 if not self._withsparseread:
1884 slicedchunks = (revs,)
1961 slicedchunks = (revs,)
1885 else:
1962 else:
1886 slicedchunks = _slicechunk(self, revs)
1963 slicedchunks = _slicechunk(self, revs)
1887
1964
1888 for revschunk in slicedchunks:
1965 for revschunk in slicedchunks:
1889 firstrev = revschunk[0]
1966 firstrev = revschunk[0]
1890 # Skip trailing revisions with empty diff
1967 # Skip trailing revisions with empty diff
1891 for lastrev in revschunk[::-1]:
1968 for lastrev in revschunk[::-1]:
1892 if length(lastrev) != 0:
1969 if length(lastrev) != 0:
1893 break
1970 break
1894
1971
1895 try:
1972 try:
1896 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1973 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1897 except OverflowError:
1974 except OverflowError:
1898 # issue4215 - we can't cache a run of chunks greater than
1975 # issue4215 - we can't cache a run of chunks greater than
1899 # 2G on Windows
1976 # 2G on Windows
1900 return [self._chunk(rev, df=df) for rev in revschunk]
1977 return [self._chunk(rev, df=df) for rev in revschunk]
1901
1978
1902 decomp = self.decompress
1979 decomp = self.decompress
1903 for rev in revschunk:
1980 for rev in revschunk:
1904 chunkstart = start(rev)
1981 chunkstart = start(rev)
1905 if inline:
1982 if inline:
1906 chunkstart += (rev + 1) * iosize
1983 chunkstart += (rev + 1) * iosize
1907 chunklength = length(rev)
1984 chunklength = length(rev)
1908 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1985 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1909
1986
1910 return l
1987 return l
1911
1988
1912 def _chunkclear(self):
1989 def _chunkclear(self):
1913 """Clear the raw chunk cache."""
1990 """Clear the raw chunk cache."""
1914 self._chunkcache = (0, '')
1991 self._chunkcache = (0, '')
1915
1992
1916 def deltaparent(self, rev):
1993 def deltaparent(self, rev):
1917 """return deltaparent of the given revision"""
1994 """return deltaparent of the given revision"""
1918 base = self.index[rev][3]
1995 base = self.index[rev][3]
1919 if base == rev:
1996 if base == rev:
1920 return nullrev
1997 return nullrev
1921 elif self._generaldelta:
1998 elif self._generaldelta:
1922 return base
1999 return base
1923 else:
2000 else:
1924 return rev - 1
2001 return rev - 1
1925
2002
1926 def revdiff(self, rev1, rev2):
2003 def revdiff(self, rev1, rev2):
1927 """return or calculate a delta between two revisions
2004 """return or calculate a delta between two revisions
1928
2005
1929 The delta calculated is in binary form and is intended to be written to
2006 The delta calculated is in binary form and is intended to be written to
1930 revlog data directly. So this function needs raw revision data.
2007 revlog data directly. So this function needs raw revision data.
1931 """
2008 """
1932 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2009 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1933 return bytes(self._chunk(rev2))
2010 return bytes(self._chunk(rev2))
1934
2011
1935 return mdiff.textdiff(self.revision(rev1, raw=True),
2012 return mdiff.textdiff(self.revision(rev1, raw=True),
1936 self.revision(rev2, raw=True))
2013 self.revision(rev2, raw=True))
1937
2014
1938 def revision(self, nodeorrev, _df=None, raw=False):
2015 def revision(self, nodeorrev, _df=None, raw=False):
1939 """return an uncompressed revision of a given node or revision
2016 """return an uncompressed revision of a given node or revision
1940 number.
2017 number.
1941
2018
1942 _df - an existing file handle to read from. (internal-only)
2019 _df - an existing file handle to read from. (internal-only)
1943 raw - an optional argument specifying if the revision data is to be
2020 raw - an optional argument specifying if the revision data is to be
1944 treated as raw data when applying flag transforms. 'raw' should be set
2021 treated as raw data when applying flag transforms. 'raw' should be set
1945 to True when generating changegroups or in debug commands.
2022 to True when generating changegroups or in debug commands.
1946 """
2023 """
1947 if isinstance(nodeorrev, int):
2024 if isinstance(nodeorrev, int):
1948 rev = nodeorrev
2025 rev = nodeorrev
1949 node = self.node(rev)
2026 node = self.node(rev)
1950 else:
2027 else:
1951 node = nodeorrev
2028 node = nodeorrev
1952 rev = None
2029 rev = None
1953
2030
1954 cachedrev = None
2031 cachedrev = None
1955 flags = None
2032 flags = None
1956 rawtext = None
2033 rawtext = None
1957 if node == nullid:
2034 if node == nullid:
1958 return ""
2035 return ""
1959 if self._cache:
2036 if self._cache:
1960 if self._cache[0] == node:
2037 if self._cache[0] == node:
1961 # _cache only stores rawtext
2038 # _cache only stores rawtext
1962 if raw:
2039 if raw:
1963 return self._cache[2]
2040 return self._cache[2]
1964 # duplicated, but good for perf
2041 # duplicated, but good for perf
1965 if rev is None:
2042 if rev is None:
1966 rev = self.rev(node)
2043 rev = self.rev(node)
1967 if flags is None:
2044 if flags is None:
1968 flags = self.flags(rev)
2045 flags = self.flags(rev)
1969 # no extra flags set, no flag processor runs, text = rawtext
2046 # no extra flags set, no flag processor runs, text = rawtext
1970 if flags == REVIDX_DEFAULT_FLAGS:
2047 if flags == REVIDX_DEFAULT_FLAGS:
1971 return self._cache[2]
2048 return self._cache[2]
1972 # rawtext is reusable. need to run flag processor
2049 # rawtext is reusable. need to run flag processor
1973 rawtext = self._cache[2]
2050 rawtext = self._cache[2]
1974
2051
1975 cachedrev = self._cache[1]
2052 cachedrev = self._cache[1]
1976
2053
1977 # look up what we need to read
2054 # look up what we need to read
1978 if rawtext is None:
2055 if rawtext is None:
1979 if rev is None:
2056 if rev is None:
1980 rev = self.rev(node)
2057 rev = self.rev(node)
1981
2058
1982 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2059 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1983 if stopped:
2060 if stopped:
1984 rawtext = self._cache[2]
2061 rawtext = self._cache[2]
1985
2062
1986 # drop cache to save memory
2063 # drop cache to save memory
1987 self._cache = None
2064 self._cache = None
1988
2065
1989 bins = self._chunks(chain, df=_df)
2066 bins = self._chunks(chain, df=_df)
1990 if rawtext is None:
2067 if rawtext is None:
1991 rawtext = bytes(bins[0])
2068 rawtext = bytes(bins[0])
1992 bins = bins[1:]
2069 bins = bins[1:]
1993
2070
1994 rawtext = mdiff.patches(rawtext, bins)
2071 rawtext = mdiff.patches(rawtext, bins)
1995 self._cache = (node, rev, rawtext)
2072 self._cache = (node, rev, rawtext)
1996
2073
1997 if flags is None:
2074 if flags is None:
1998 if rev is None:
2075 if rev is None:
1999 rev = self.rev(node)
2076 rev = self.rev(node)
2000 flags = self.flags(rev)
2077 flags = self.flags(rev)
2001
2078
2002 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2079 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2003 if validatehash:
2080 if validatehash:
2004 self.checkhash(text, node, rev=rev)
2081 self.checkhash(text, node, rev=rev)
2005
2082
2006 return text
2083 return text
2007
2084
2008 def hash(self, text, p1, p2):
2085 def hash(self, text, p1, p2):
2009 """Compute a node hash.
2086 """Compute a node hash.
2010
2087
2011 Available as a function so that subclasses can replace the hash
2088 Available as a function so that subclasses can replace the hash
2012 as needed.
2089 as needed.
2013 """
2090 """
2014 return hash(text, p1, p2)
2091 return hash(text, p1, p2)
2015
2092
2016 def _processflags(self, text, flags, operation, raw=False):
2093 def _processflags(self, text, flags, operation, raw=False):
2017 """Inspect revision data flags and applies transforms defined by
2094 """Inspect revision data flags and applies transforms defined by
2018 registered flag processors.
2095 registered flag processors.
2019
2096
2020 ``text`` - the revision data to process
2097 ``text`` - the revision data to process
2021 ``flags`` - the revision flags
2098 ``flags`` - the revision flags
2022 ``operation`` - the operation being performed (read or write)
2099 ``operation`` - the operation being performed (read or write)
2023 ``raw`` - an optional argument describing if the raw transform should be
2100 ``raw`` - an optional argument describing if the raw transform should be
2024 applied.
2101 applied.
2025
2102
2026 This method processes the flags in the order (or reverse order if
2103 This method processes the flags in the order (or reverse order if
2027 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2104 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2028 flag processors registered for present flags. The order of flags defined
2105 flag processors registered for present flags. The order of flags defined
2029 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2106 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2030
2107
2031 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2108 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2032 processed text and ``validatehash`` is a bool indicating whether the
2109 processed text and ``validatehash`` is a bool indicating whether the
2033 returned text should be checked for hash integrity.
2110 returned text should be checked for hash integrity.
2034
2111
2035 Note: If the ``raw`` argument is set, it has precedence over the
2112 Note: If the ``raw`` argument is set, it has precedence over the
2036 operation and will only update the value of ``validatehash``.
2113 operation and will only update the value of ``validatehash``.
2037 """
2114 """
2038 # fast path: no flag processors will run
2115 # fast path: no flag processors will run
2039 if flags == 0:
2116 if flags == 0:
2040 return text, True
2117 return text, True
2041 if not operation in ('read', 'write'):
2118 if not operation in ('read', 'write'):
2042 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2119 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2043 # Check all flags are known.
2120 # Check all flags are known.
2044 if flags & ~REVIDX_KNOWN_FLAGS:
2121 if flags & ~REVIDX_KNOWN_FLAGS:
2045 raise RevlogError(_("incompatible revision flag '%#x'") %
2122 raise RevlogError(_("incompatible revision flag '%#x'") %
2046 (flags & ~REVIDX_KNOWN_FLAGS))
2123 (flags & ~REVIDX_KNOWN_FLAGS))
2047 validatehash = True
2124 validatehash = True
2048 # Depending on the operation (read or write), the order might be
2125 # Depending on the operation (read or write), the order might be
2049 # reversed due to non-commutative transforms.
2126 # reversed due to non-commutative transforms.
2050 orderedflags = REVIDX_FLAGS_ORDER
2127 orderedflags = REVIDX_FLAGS_ORDER
2051 if operation == 'write':
2128 if operation == 'write':
2052 orderedflags = reversed(orderedflags)
2129 orderedflags = reversed(orderedflags)
2053
2130
2054 for flag in orderedflags:
2131 for flag in orderedflags:
2055 # If a flagprocessor has been registered for a known flag, apply the
2132 # If a flagprocessor has been registered for a known flag, apply the
2056 # related operation transform and update result tuple.
2133 # related operation transform and update result tuple.
2057 if flag & flags:
2134 if flag & flags:
2058 vhash = True
2135 vhash = True
2059
2136
2060 if flag not in _flagprocessors:
2137 if flag not in _flagprocessors:
2061 message = _("missing processor for flag '%#x'") % (flag)
2138 message = _("missing processor for flag '%#x'") % (flag)
2062 raise RevlogError(message)
2139 raise RevlogError(message)
2063
2140
2064 processor = _flagprocessors[flag]
2141 processor = _flagprocessors[flag]
2065 if processor is not None:
2142 if processor is not None:
2066 readtransform, writetransform, rawtransform = processor
2143 readtransform, writetransform, rawtransform = processor
2067
2144
2068 if raw:
2145 if raw:
2069 vhash = rawtransform(self, text)
2146 vhash = rawtransform(self, text)
2070 elif operation == 'read':
2147 elif operation == 'read':
2071 text, vhash = readtransform(self, text)
2148 text, vhash = readtransform(self, text)
2072 else: # write operation
2149 else: # write operation
2073 text, vhash = writetransform(self, text)
2150 text, vhash = writetransform(self, text)
2074 validatehash = validatehash and vhash
2151 validatehash = validatehash and vhash
2075
2152
2076 return text, validatehash
2153 return text, validatehash
2077
2154
2078 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2155 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2079 """Check node hash integrity.
2156 """Check node hash integrity.
2080
2157
2081 Available as a function so that subclasses can extend hash mismatch
2158 Available as a function so that subclasses can extend hash mismatch
2082 behaviors as needed.
2159 behaviors as needed.
2083 """
2160 """
2084 try:
2161 try:
2085 if p1 is None and p2 is None:
2162 if p1 is None and p2 is None:
2086 p1, p2 = self.parents(node)
2163 p1, p2 = self.parents(node)
2087 if node != self.hash(text, p1, p2):
2164 if node != self.hash(text, p1, p2):
2088 revornode = rev
2165 revornode = rev
2089 if revornode is None:
2166 if revornode is None:
2090 revornode = templatefilters.short(hex(node))
2167 revornode = templatefilters.short(hex(node))
2091 raise RevlogError(_("integrity check failed on %s:%s")
2168 raise RevlogError(_("integrity check failed on %s:%s")
2092 % (self.indexfile, pycompat.bytestr(revornode)))
2169 % (self.indexfile, pycompat.bytestr(revornode)))
2093 except RevlogError:
2170 except RevlogError:
2094 if self._censorable and _censoredtext(text):
2171 if self._censorable and _censoredtext(text):
2095 raise error.CensoredNodeError(self.indexfile, node, text)
2172 raise error.CensoredNodeError(self.indexfile, node, text)
2096 raise
2173 raise
2097
2174
2098 def _enforceinlinesize(self, tr, fp=None):
2175 def _enforceinlinesize(self, tr, fp=None):
2099 """Check if the revlog is too big for inline and convert if so.
2176 """Check if the revlog is too big for inline and convert if so.
2100
2177
2101 This should be called after revisions are added to the revlog. If the
2178 This should be called after revisions are added to the revlog. If the
2102 revlog has grown too large to be an inline revlog, it will convert it
2179 revlog has grown too large to be an inline revlog, it will convert it
2103 to use multiple index and data files.
2180 to use multiple index and data files.
2104 """
2181 """
2105 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
2182 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
2106 return
2183 return
2107
2184
2108 trinfo = tr.find(self.indexfile)
2185 trinfo = tr.find(self.indexfile)
2109 if trinfo is None:
2186 if trinfo is None:
2110 raise RevlogError(_("%s not found in the transaction")
2187 raise RevlogError(_("%s not found in the transaction")
2111 % self.indexfile)
2188 % self.indexfile)
2112
2189
2113 trindex = trinfo[2]
2190 trindex = trinfo[2]
2114 if trindex is not None:
2191 if trindex is not None:
2115 dataoff = self.start(trindex)
2192 dataoff = self.start(trindex)
2116 else:
2193 else:
2117 # revlog was stripped at start of transaction, use all leftover data
2194 # revlog was stripped at start of transaction, use all leftover data
2118 trindex = len(self) - 1
2195 trindex = len(self) - 1
2119 dataoff = self.end(-2)
2196 dataoff = self.end(-2)
2120
2197
2121 tr.add(self.datafile, dataoff)
2198 tr.add(self.datafile, dataoff)
2122
2199
2123 if fp:
2200 if fp:
2124 fp.flush()
2201 fp.flush()
2125 fp.close()
2202 fp.close()
2126
2203
2127 with self._datafp('w') as df:
2204 with self._datafp('w') as df:
2128 for r in self:
2205 for r in self:
2129 df.write(self._getsegmentforrevs(r, r)[1])
2206 df.write(self._getsegmentforrevs(r, r)[1])
2130
2207
2131 with self._indexfp('w') as fp:
2208 with self._indexfp('w') as fp:
2132 self.version &= ~FLAG_INLINE_DATA
2209 self.version &= ~FLAG_INLINE_DATA
2133 self._inline = False
2210 self._inline = False
2134 io = self._io
2211 io = self._io
2135 for i in self:
2212 for i in self:
2136 e = io.packentry(self.index[i], self.node, self.version, i)
2213 e = io.packentry(self.index[i], self.node, self.version, i)
2137 fp.write(e)
2214 fp.write(e)
2138
2215
2139 # the temp file replace the real index when we exit the context
2216 # the temp file replace the real index when we exit the context
2140 # manager
2217 # manager
2141
2218
2142 tr.replace(self.indexfile, trindex * self._io.size)
2219 tr.replace(self.indexfile, trindex * self._io.size)
2143 self._chunkclear()
2220 self._chunkclear()
2144
2221
2145 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2222 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2146 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2223 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2147 """add a revision to the log
2224 """add a revision to the log
2148
2225
2149 text - the revision data to add
2226 text - the revision data to add
2150 transaction - the transaction object used for rollback
2227 transaction - the transaction object used for rollback
2151 link - the linkrev data to add
2228 link - the linkrev data to add
2152 p1, p2 - the parent nodeids of the revision
2229 p1, p2 - the parent nodeids of the revision
2153 cachedelta - an optional precomputed delta
2230 cachedelta - an optional precomputed delta
2154 node - nodeid of revision; typically node is not specified, and it is
2231 node - nodeid of revision; typically node is not specified, and it is
2155 computed by default as hash(text, p1, p2), however subclasses might
2232 computed by default as hash(text, p1, p2), however subclasses might
2156 use different hashing method (and override checkhash() in such case)
2233 use different hashing method (and override checkhash() in such case)
2157 flags - the known flags to set on the revision
2234 flags - the known flags to set on the revision
2158 deltacomputer - an optional _deltacomputer instance shared between
2235 deltacomputer - an optional _deltacomputer instance shared between
2159 multiple calls
2236 multiple calls
2160 """
2237 """
2161 if link == nullrev:
2238 if link == nullrev:
2162 raise RevlogError(_("attempted to add linkrev -1 to %s")
2239 raise RevlogError(_("attempted to add linkrev -1 to %s")
2163 % self.indexfile)
2240 % self.indexfile)
2164
2241
2165 if flags:
2242 if flags:
2166 node = node or self.hash(text, p1, p2)
2243 node = node or self.hash(text, p1, p2)
2167
2244
2168 rawtext, validatehash = self._processflags(text, flags, 'write')
2245 rawtext, validatehash = self._processflags(text, flags, 'write')
2169
2246
2170 # If the flag processor modifies the revision data, ignore any provided
2247 # If the flag processor modifies the revision data, ignore any provided
2171 # cachedelta.
2248 # cachedelta.
2172 if rawtext != text:
2249 if rawtext != text:
2173 cachedelta = None
2250 cachedelta = None
2174
2251
2175 if len(rawtext) > _maxentrysize:
2252 if len(rawtext) > _maxentrysize:
2176 raise RevlogError(
2253 raise RevlogError(
2177 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2254 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2178 % (self.indexfile, len(rawtext)))
2255 % (self.indexfile, len(rawtext)))
2179
2256
2180 node = node or self.hash(rawtext, p1, p2)
2257 node = node or self.hash(rawtext, p1, p2)
2181 if node in self.nodemap:
2258 if node in self.nodemap:
2182 return node
2259 return node
2183
2260
2184 if validatehash:
2261 if validatehash:
2185 self.checkhash(rawtext, node, p1=p1, p2=p2)
2262 self.checkhash(rawtext, node, p1=p1, p2=p2)
2186
2263
2187 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2264 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2188 flags, cachedelta=cachedelta,
2265 flags, cachedelta=cachedelta,
2189 deltacomputer=deltacomputer)
2266 deltacomputer=deltacomputer)
2190
2267
2191 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2268 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2192 cachedelta=None, deltacomputer=None):
2269 cachedelta=None, deltacomputer=None):
2193 """add a raw revision with known flags, node and parents
2270 """add a raw revision with known flags, node and parents
2194 useful when reusing a revision not stored in this revlog (ex: received
2271 useful when reusing a revision not stored in this revlog (ex: received
2195 over wire, or read from an external bundle).
2272 over wire, or read from an external bundle).
2196 """
2273 """
2197 dfh = None
2274 dfh = None
2198 if not self._inline:
2275 if not self._inline:
2199 dfh = self._datafp("a+")
2276 dfh = self._datafp("a+")
2200 ifh = self._indexfp("a+")
2277 ifh = self._indexfp("a+")
2201 try:
2278 try:
2202 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2279 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2203 flags, cachedelta, ifh, dfh,
2280 flags, cachedelta, ifh, dfh,
2204 deltacomputer=deltacomputer)
2281 deltacomputer=deltacomputer)
2205 finally:
2282 finally:
2206 if dfh:
2283 if dfh:
2207 dfh.close()
2284 dfh.close()
2208 ifh.close()
2285 ifh.close()
2209
2286
2210 def compress(self, data):
2287 def compress(self, data):
2211 """Generate a possibly-compressed representation of data."""
2288 """Generate a possibly-compressed representation of data."""
2212 if not data:
2289 if not data:
2213 return '', data
2290 return '', data
2214
2291
2215 compressed = self._compressor.compress(data)
2292 compressed = self._compressor.compress(data)
2216
2293
2217 if compressed:
2294 if compressed:
2218 # The revlog compressor added the header in the returned data.
2295 # The revlog compressor added the header in the returned data.
2219 return '', compressed
2296 return '', compressed
2220
2297
2221 if data[0:1] == '\0':
2298 if data[0:1] == '\0':
2222 return '', data
2299 return '', data
2223 return 'u', data
2300 return 'u', data
2224
2301
2225 def decompress(self, data):
2302 def decompress(self, data):
2226 """Decompress a revlog chunk.
2303 """Decompress a revlog chunk.
2227
2304
2228 The chunk is expected to begin with a header identifying the
2305 The chunk is expected to begin with a header identifying the
2229 format type so it can be routed to an appropriate decompressor.
2306 format type so it can be routed to an appropriate decompressor.
2230 """
2307 """
2231 if not data:
2308 if not data:
2232 return data
2309 return data
2233
2310
2234 # Revlogs are read much more frequently than they are written and many
2311 # Revlogs are read much more frequently than they are written and many
2235 # chunks only take microseconds to decompress, so performance is
2312 # chunks only take microseconds to decompress, so performance is
2236 # important here.
2313 # important here.
2237 #
2314 #
2238 # We can make a few assumptions about revlogs:
2315 # We can make a few assumptions about revlogs:
2239 #
2316 #
2240 # 1) the majority of chunks will be compressed (as opposed to inline
2317 # 1) the majority of chunks will be compressed (as opposed to inline
2241 # raw data).
2318 # raw data).
2242 # 2) decompressing *any* data will likely by at least 10x slower than
2319 # 2) decompressing *any* data will likely by at least 10x slower than
2243 # returning raw inline data.
2320 # returning raw inline data.
2244 # 3) we want to prioritize common and officially supported compression
2321 # 3) we want to prioritize common and officially supported compression
2245 # engines
2322 # engines
2246 #
2323 #
2247 # It follows that we want to optimize for "decompress compressed data
2324 # It follows that we want to optimize for "decompress compressed data
2248 # when encoded with common and officially supported compression engines"
2325 # when encoded with common and officially supported compression engines"
2249 # case over "raw data" and "data encoded by less common or non-official
2326 # case over "raw data" and "data encoded by less common or non-official
2250 # compression engines." That is why we have the inline lookup first
2327 # compression engines." That is why we have the inline lookup first
2251 # followed by the compengines lookup.
2328 # followed by the compengines lookup.
2252 #
2329 #
2253 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2330 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2254 # compressed chunks. And this matters for changelog and manifest reads.
2331 # compressed chunks. And this matters for changelog and manifest reads.
2255 t = data[0:1]
2332 t = data[0:1]
2256
2333
2257 if t == 'x':
2334 if t == 'x':
2258 try:
2335 try:
2259 return _zlibdecompress(data)
2336 return _zlibdecompress(data)
2260 except zlib.error as e:
2337 except zlib.error as e:
2261 raise RevlogError(_('revlog decompress error: %s') %
2338 raise RevlogError(_('revlog decompress error: %s') %
2262 stringutil.forcebytestr(e))
2339 stringutil.forcebytestr(e))
2263 # '\0' is more common than 'u' so it goes first.
2340 # '\0' is more common than 'u' so it goes first.
2264 elif t == '\0':
2341 elif t == '\0':
2265 return data
2342 return data
2266 elif t == 'u':
2343 elif t == 'u':
2267 return util.buffer(data, 1)
2344 return util.buffer(data, 1)
2268
2345
2269 try:
2346 try:
2270 compressor = self._decompressors[t]
2347 compressor = self._decompressors[t]
2271 except KeyError:
2348 except KeyError:
2272 try:
2349 try:
2273 engine = util.compengines.forrevlogheader(t)
2350 engine = util.compengines.forrevlogheader(t)
2274 compressor = engine.revlogcompressor()
2351 compressor = engine.revlogcompressor()
2275 self._decompressors[t] = compressor
2352 self._decompressors[t] = compressor
2276 except KeyError:
2353 except KeyError:
2277 raise RevlogError(_('unknown compression type %r') % t)
2354 raise RevlogError(_('unknown compression type %r') % t)
2278
2355
2279 return compressor.decompress(data)
2356 return compressor.decompress(data)
2280
2357
2281 def _isgooddeltainfo(self, deltainfo, revinfo):
2358 def _isgooddeltainfo(self, deltainfo, revinfo):
2282 """Returns True if the given delta is good. Good means that it is within
2359 """Returns True if the given delta is good. Good means that it is within
2283 the disk span, disk size, and chain length bounds that we know to be
2360 the disk span, disk size, and chain length bounds that we know to be
2284 performant."""
2361 performant."""
2285 if deltainfo is None:
2362 if deltainfo is None:
2286 return False
2363 return False
2287
2364
2288 # - 'deltainfo.distance' is the distance from the base revision --
2365 # - 'deltainfo.distance' is the distance from the base revision --
2289 # bounding it limits the amount of I/O we need to do.
2366 # bounding it limits the amount of I/O we need to do.
2290 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2367 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2291 # deltas we need to apply -- bounding it limits the amount of CPU
2368 # deltas we need to apply -- bounding it limits the amount of CPU
2292 # we consume.
2369 # we consume.
2293
2370
2294 textlen = revinfo.textlen
2371 textlen = revinfo.textlen
2295 defaultmax = textlen * 4
2372 defaultmax = textlen * 4
2296 maxdist = self._maxdeltachainspan
2373 maxdist = self._maxdeltachainspan
2297 if not maxdist:
2374 if not maxdist:
2298 maxdist = deltainfo.distance # ensure the conditional pass
2375 maxdist = deltainfo.distance # ensure the conditional pass
2299 maxdist = max(maxdist, defaultmax)
2376 maxdist = max(maxdist, defaultmax)
2300 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2377 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2301 deltainfo.compresseddeltalen > textlen * 2 or
2378 deltainfo.compresseddeltalen > textlen * 2 or
2302 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2379 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2303 return False
2380 return False
2304
2381
2305 return True
2382 return True
2306
2383
2307 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2384 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2308 cachedelta, ifh, dfh, alwayscache=False,
2385 cachedelta, ifh, dfh, alwayscache=False,
2309 deltacomputer=None):
2386 deltacomputer=None):
2310 """internal function to add revisions to the log
2387 """internal function to add revisions to the log
2311
2388
2312 see addrevision for argument descriptions.
2389 see addrevision for argument descriptions.
2313
2390
2314 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2391 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2315
2392
2316 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2393 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2317 be used.
2394 be used.
2318
2395
2319 invariants:
2396 invariants:
2320 - rawtext is optional (can be None); if not set, cachedelta must be set.
2397 - rawtext is optional (can be None); if not set, cachedelta must be set.
2321 if both are set, they must correspond to each other.
2398 if both are set, they must correspond to each other.
2322 """
2399 """
2323 if node == nullid:
2400 if node == nullid:
2324 raise RevlogError(_("%s: attempt to add null revision") %
2401 raise RevlogError(_("%s: attempt to add null revision") %
2325 (self.indexfile))
2402 (self.indexfile))
2326 if node == wdirid or node in wdirfilenodeids:
2403 if node == wdirid or node in wdirfilenodeids:
2327 raise RevlogError(_("%s: attempt to add wdir revision") %
2404 raise RevlogError(_("%s: attempt to add wdir revision") %
2328 (self.indexfile))
2405 (self.indexfile))
2329
2406
2330 if self._inline:
2407 if self._inline:
2331 fh = ifh
2408 fh = ifh
2332 else:
2409 else:
2333 fh = dfh
2410 fh = dfh
2334
2411
2335 btext = [rawtext]
2412 btext = [rawtext]
2336
2413
2337 curr = len(self)
2414 curr = len(self)
2338 prev = curr - 1
2415 prev = curr - 1
2339 offset = self.end(prev)
2416 offset = self.end(prev)
2340 p1r, p2r = self.rev(p1), self.rev(p2)
2417 p1r, p2r = self.rev(p1), self.rev(p2)
2341
2418
2342 # full versions are inserted when the needed deltas
2419 # full versions are inserted when the needed deltas
2343 # become comparable to the uncompressed text
2420 # become comparable to the uncompressed text
2344 if rawtext is None:
2421 if rawtext is None:
2345 # need rawtext size, before changed by flag processors, which is
2422 # need rawtext size, before changed by flag processors, which is
2346 # the non-raw size. use revlog explicitly to avoid filelog's extra
2423 # the non-raw size. use revlog explicitly to avoid filelog's extra
2347 # logic that might remove metadata size.
2424 # logic that might remove metadata size.
2348 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2425 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2349 cachedelta[1])
2426 cachedelta[1])
2350 else:
2427 else:
2351 textlen = len(rawtext)
2428 textlen = len(rawtext)
2352
2429
2353 if deltacomputer is None:
2430 if deltacomputer is None:
2354 deltacomputer = _deltacomputer(self)
2431 deltacomputer = _deltacomputer(self)
2355
2432
2356 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2433 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2357
2434
2358 # no delta for flag processor revision (see "candelta" for why)
2435 # no delta for flag processor revision (see "candelta" for why)
2359 # not calling candelta since only one revision needs test, also to
2436 # not calling candelta since only one revision needs test, also to
2360 # avoid overhead fetching flags again.
2437 # avoid overhead fetching flags again.
2361 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2438 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2362 deltainfo = None
2439 deltainfo = None
2363 else:
2440 else:
2364 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2441 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2365
2442
2366 if deltainfo is not None:
2443 if deltainfo is not None:
2367 base = deltainfo.base
2444 base = deltainfo.base
2368 chainbase = deltainfo.chainbase
2445 chainbase = deltainfo.chainbase
2369 data = deltainfo.data
2446 data = deltainfo.data
2370 l = deltainfo.deltalen
2447 l = deltainfo.deltalen
2371 else:
2448 else:
2372 rawtext = deltacomputer.buildtext(revinfo, fh)
2449 rawtext = deltacomputer.buildtext(revinfo, fh)
2373 data = self.compress(rawtext)
2450 data = self.compress(rawtext)
2374 l = len(data[1]) + len(data[0])
2451 l = len(data[1]) + len(data[0])
2375 base = chainbase = curr
2452 base = chainbase = curr
2376
2453
2377 e = (offset_type(offset, flags), l, textlen,
2454 e = (offset_type(offset, flags), l, textlen,
2378 base, link, p1r, p2r, node)
2455 base, link, p1r, p2r, node)
2379 self.index.insert(-1, e)
2456 self.index.insert(-1, e)
2380 self.nodemap[node] = curr
2457 self.nodemap[node] = curr
2381
2458
2382 entry = self._io.packentry(e, self.node, self.version, curr)
2459 entry = self._io.packentry(e, self.node, self.version, curr)
2383 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2460 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2384
2461
2385 if alwayscache and rawtext is None:
2462 if alwayscache and rawtext is None:
2386 rawtext = deltacomputer._buildtext(revinfo, fh)
2463 rawtext = deltacomputer._buildtext(revinfo, fh)
2387
2464
2388 if type(rawtext) == bytes: # only accept immutable objects
2465 if type(rawtext) == bytes: # only accept immutable objects
2389 self._cache = (node, curr, rawtext)
2466 self._cache = (node, curr, rawtext)
2390 self._chainbasecache[curr] = chainbase
2467 self._chainbasecache[curr] = chainbase
2391 return node
2468 return node
2392
2469
2393 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2470 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2394 # Files opened in a+ mode have inconsistent behavior on various
2471 # Files opened in a+ mode have inconsistent behavior on various
2395 # platforms. Windows requires that a file positioning call be made
2472 # platforms. Windows requires that a file positioning call be made
2396 # when the file handle transitions between reads and writes. See
2473 # when the file handle transitions between reads and writes. See
2397 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2474 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2398 # platforms, Python or the platform itself can be buggy. Some versions
2475 # platforms, Python or the platform itself can be buggy. Some versions
2399 # of Solaris have been observed to not append at the end of the file
2476 # of Solaris have been observed to not append at the end of the file
2400 # if the file was seeked to before the end. See issue4943 for more.
2477 # if the file was seeked to before the end. See issue4943 for more.
2401 #
2478 #
2402 # We work around this issue by inserting a seek() before writing.
2479 # We work around this issue by inserting a seek() before writing.
2403 # Note: This is likely not necessary on Python 3.
2480 # Note: This is likely not necessary on Python 3.
2404 ifh.seek(0, os.SEEK_END)
2481 ifh.seek(0, os.SEEK_END)
2405 if dfh:
2482 if dfh:
2406 dfh.seek(0, os.SEEK_END)
2483 dfh.seek(0, os.SEEK_END)
2407
2484
2408 curr = len(self) - 1
2485 curr = len(self) - 1
2409 if not self._inline:
2486 if not self._inline:
2410 transaction.add(self.datafile, offset)
2487 transaction.add(self.datafile, offset)
2411 transaction.add(self.indexfile, curr * len(entry))
2488 transaction.add(self.indexfile, curr * len(entry))
2412 if data[0]:
2489 if data[0]:
2413 dfh.write(data[0])
2490 dfh.write(data[0])
2414 dfh.write(data[1])
2491 dfh.write(data[1])
2415 ifh.write(entry)
2492 ifh.write(entry)
2416 else:
2493 else:
2417 offset += curr * self._io.size
2494 offset += curr * self._io.size
2418 transaction.add(self.indexfile, offset, curr)
2495 transaction.add(self.indexfile, offset, curr)
2419 ifh.write(entry)
2496 ifh.write(entry)
2420 ifh.write(data[0])
2497 ifh.write(data[0])
2421 ifh.write(data[1])
2498 ifh.write(data[1])
2422 self._enforceinlinesize(transaction, ifh)
2499 self._enforceinlinesize(transaction, ifh)
2423
2500
2424 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2501 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2425 """
2502 """
2426 add a delta group
2503 add a delta group
2427
2504
2428 given a set of deltas, add them to the revision log. the
2505 given a set of deltas, add them to the revision log. the
2429 first delta is against its parent, which should be in our
2506 first delta is against its parent, which should be in our
2430 log, the rest are against the previous delta.
2507 log, the rest are against the previous delta.
2431
2508
2432 If ``addrevisioncb`` is defined, it will be called with arguments of
2509 If ``addrevisioncb`` is defined, it will be called with arguments of
2433 this revlog and the node that was added.
2510 this revlog and the node that was added.
2434 """
2511 """
2435
2512
2436 nodes = []
2513 nodes = []
2437
2514
2438 r = len(self)
2515 r = len(self)
2439 end = 0
2516 end = 0
2440 if r:
2517 if r:
2441 end = self.end(r - 1)
2518 end = self.end(r - 1)
2442 ifh = self._indexfp("a+")
2519 ifh = self._indexfp("a+")
2443 isize = r * self._io.size
2520 isize = r * self._io.size
2444 if self._inline:
2521 if self._inline:
2445 transaction.add(self.indexfile, end + isize, r)
2522 transaction.add(self.indexfile, end + isize, r)
2446 dfh = None
2523 dfh = None
2447 else:
2524 else:
2448 transaction.add(self.indexfile, isize, r)
2525 transaction.add(self.indexfile, isize, r)
2449 transaction.add(self.datafile, end)
2526 transaction.add(self.datafile, end)
2450 dfh = self._datafp("a+")
2527 dfh = self._datafp("a+")
2451 def flush():
2528 def flush():
2452 if dfh:
2529 if dfh:
2453 dfh.flush()
2530 dfh.flush()
2454 ifh.flush()
2531 ifh.flush()
2455 try:
2532 try:
2456 deltacomputer = _deltacomputer(self)
2533 deltacomputer = _deltacomputer(self)
2457 # loop through our set of deltas
2534 # loop through our set of deltas
2458 for data in deltas:
2535 for data in deltas:
2459 node, p1, p2, linknode, deltabase, delta, flags = data
2536 node, p1, p2, linknode, deltabase, delta, flags = data
2460 link = linkmapper(linknode)
2537 link = linkmapper(linknode)
2461 flags = flags or REVIDX_DEFAULT_FLAGS
2538 flags = flags or REVIDX_DEFAULT_FLAGS
2462
2539
2463 nodes.append(node)
2540 nodes.append(node)
2464
2541
2465 if node in self.nodemap:
2542 if node in self.nodemap:
2466 # this can happen if two branches make the same change
2543 # this can happen if two branches make the same change
2467 continue
2544 continue
2468
2545
2469 for p in (p1, p2):
2546 for p in (p1, p2):
2470 if p not in self.nodemap:
2547 if p not in self.nodemap:
2471 raise LookupError(p, self.indexfile,
2548 raise LookupError(p, self.indexfile,
2472 _('unknown parent'))
2549 _('unknown parent'))
2473
2550
2474 if deltabase not in self.nodemap:
2551 if deltabase not in self.nodemap:
2475 raise LookupError(deltabase, self.indexfile,
2552 raise LookupError(deltabase, self.indexfile,
2476 _('unknown delta base'))
2553 _('unknown delta base'))
2477
2554
2478 baserev = self.rev(deltabase)
2555 baserev = self.rev(deltabase)
2479
2556
2480 if baserev != nullrev and self.iscensored(baserev):
2557 if baserev != nullrev and self.iscensored(baserev):
2481 # if base is censored, delta must be full replacement in a
2558 # if base is censored, delta must be full replacement in a
2482 # single patch operation
2559 # single patch operation
2483 hlen = struct.calcsize(">lll")
2560 hlen = struct.calcsize(">lll")
2484 oldlen = self.rawsize(baserev)
2561 oldlen = self.rawsize(baserev)
2485 newlen = len(delta) - hlen
2562 newlen = len(delta) - hlen
2486 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2563 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2487 raise error.CensoredBaseError(self.indexfile,
2564 raise error.CensoredBaseError(self.indexfile,
2488 self.node(baserev))
2565 self.node(baserev))
2489
2566
2490 if not flags and self._peek_iscensored(baserev, delta, flush):
2567 if not flags and self._peek_iscensored(baserev, delta, flush):
2491 flags |= REVIDX_ISCENSORED
2568 flags |= REVIDX_ISCENSORED
2492
2569
2493 # We assume consumers of addrevisioncb will want to retrieve
2570 # We assume consumers of addrevisioncb will want to retrieve
2494 # the added revision, which will require a call to
2571 # the added revision, which will require a call to
2495 # revision(). revision() will fast path if there is a cache
2572 # revision(). revision() will fast path if there is a cache
2496 # hit. So, we tell _addrevision() to always cache in this case.
2573 # hit. So, we tell _addrevision() to always cache in this case.
2497 # We're only using addgroup() in the context of changegroup
2574 # We're only using addgroup() in the context of changegroup
2498 # generation so the revision data can always be handled as raw
2575 # generation so the revision data can always be handled as raw
2499 # by the flagprocessor.
2576 # by the flagprocessor.
2500 self._addrevision(node, None, transaction, link,
2577 self._addrevision(node, None, transaction, link,
2501 p1, p2, flags, (baserev, delta),
2578 p1, p2, flags, (baserev, delta),
2502 ifh, dfh,
2579 ifh, dfh,
2503 alwayscache=bool(addrevisioncb),
2580 alwayscache=bool(addrevisioncb),
2504 deltacomputer=deltacomputer)
2581 deltacomputer=deltacomputer)
2505
2582
2506 if addrevisioncb:
2583 if addrevisioncb:
2507 addrevisioncb(self, node)
2584 addrevisioncb(self, node)
2508
2585
2509 if not dfh and not self._inline:
2586 if not dfh and not self._inline:
2510 # addrevision switched from inline to conventional
2587 # addrevision switched from inline to conventional
2511 # reopen the index
2588 # reopen the index
2512 ifh.close()
2589 ifh.close()
2513 dfh = self._datafp("a+")
2590 dfh = self._datafp("a+")
2514 ifh = self._indexfp("a+")
2591 ifh = self._indexfp("a+")
2515 finally:
2592 finally:
2516 if dfh:
2593 if dfh:
2517 dfh.close()
2594 dfh.close()
2518 ifh.close()
2595 ifh.close()
2519
2596
2520 return nodes
2597 return nodes
2521
2598
2522 def iscensored(self, rev):
2599 def iscensored(self, rev):
2523 """Check if a file revision is censored."""
2600 """Check if a file revision is censored."""
2524 if not self._censorable:
2601 if not self._censorable:
2525 return False
2602 return False
2526
2603
2527 return self.flags(rev) & REVIDX_ISCENSORED
2604 return self.flags(rev) & REVIDX_ISCENSORED
2528
2605
2529 def _peek_iscensored(self, baserev, delta, flush):
2606 def _peek_iscensored(self, baserev, delta, flush):
2530 """Quickly check if a delta produces a censored revision."""
2607 """Quickly check if a delta produces a censored revision."""
2531 if not self._censorable:
2608 if not self._censorable:
2532 return False
2609 return False
2533
2610
2534 # Fragile heuristic: unless new file meta keys are added alphabetically
2611 # Fragile heuristic: unless new file meta keys are added alphabetically
2535 # preceding "censored", all censored revisions are prefixed by
2612 # preceding "censored", all censored revisions are prefixed by
2536 # "\1\ncensored:". A delta producing such a censored revision must be a
2613 # "\1\ncensored:". A delta producing such a censored revision must be a
2537 # full-replacement delta, so we inspect the first and only patch in the
2614 # full-replacement delta, so we inspect the first and only patch in the
2538 # delta for this prefix.
2615 # delta for this prefix.
2539 hlen = struct.calcsize(">lll")
2616 hlen = struct.calcsize(">lll")
2540 if len(delta) <= hlen:
2617 if len(delta) <= hlen:
2541 return False
2618 return False
2542
2619
2543 oldlen = self.rawsize(baserev)
2620 oldlen = self.rawsize(baserev)
2544 newlen = len(delta) - hlen
2621 newlen = len(delta) - hlen
2545 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2622 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2546 return False
2623 return False
2547
2624
2548 add = "\1\ncensored:"
2625 add = "\1\ncensored:"
2549 addlen = len(add)
2626 addlen = len(add)
2550 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2627 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2551
2628
2552 def getstrippoint(self, minlink):
2629 def getstrippoint(self, minlink):
2553 """find the minimum rev that must be stripped to strip the linkrev
2630 """find the minimum rev that must be stripped to strip the linkrev
2554
2631
2555 Returns a tuple containing the minimum rev and a set of all revs that
2632 Returns a tuple containing the minimum rev and a set of all revs that
2556 have linkrevs that will be broken by this strip.
2633 have linkrevs that will be broken by this strip.
2557 """
2634 """
2558 brokenrevs = set()
2635 brokenrevs = set()
2559 strippoint = len(self)
2636 strippoint = len(self)
2560
2637
2561 heads = {}
2638 heads = {}
2562 futurelargelinkrevs = set()
2639 futurelargelinkrevs = set()
2563 for head in self.headrevs():
2640 for head in self.headrevs():
2564 headlinkrev = self.linkrev(head)
2641 headlinkrev = self.linkrev(head)
2565 heads[head] = headlinkrev
2642 heads[head] = headlinkrev
2566 if headlinkrev >= minlink:
2643 if headlinkrev >= minlink:
2567 futurelargelinkrevs.add(headlinkrev)
2644 futurelargelinkrevs.add(headlinkrev)
2568
2645
2569 # This algorithm involves walking down the rev graph, starting at the
2646 # This algorithm involves walking down the rev graph, starting at the
2570 # heads. Since the revs are topologically sorted according to linkrev,
2647 # heads. Since the revs are topologically sorted according to linkrev,
2571 # once all head linkrevs are below the minlink, we know there are
2648 # once all head linkrevs are below the minlink, we know there are
2572 # no more revs that could have a linkrev greater than minlink.
2649 # no more revs that could have a linkrev greater than minlink.
2573 # So we can stop walking.
2650 # So we can stop walking.
2574 while futurelargelinkrevs:
2651 while futurelargelinkrevs:
2575 strippoint -= 1
2652 strippoint -= 1
2576 linkrev = heads.pop(strippoint)
2653 linkrev = heads.pop(strippoint)
2577
2654
2578 if linkrev < minlink:
2655 if linkrev < minlink:
2579 brokenrevs.add(strippoint)
2656 brokenrevs.add(strippoint)
2580 else:
2657 else:
2581 futurelargelinkrevs.remove(linkrev)
2658 futurelargelinkrevs.remove(linkrev)
2582
2659
2583 for p in self.parentrevs(strippoint):
2660 for p in self.parentrevs(strippoint):
2584 if p != nullrev:
2661 if p != nullrev:
2585 plinkrev = self.linkrev(p)
2662 plinkrev = self.linkrev(p)
2586 heads[p] = plinkrev
2663 heads[p] = plinkrev
2587 if plinkrev >= minlink:
2664 if plinkrev >= minlink:
2588 futurelargelinkrevs.add(plinkrev)
2665 futurelargelinkrevs.add(plinkrev)
2589
2666
2590 return strippoint, brokenrevs
2667 return strippoint, brokenrevs
2591
2668
2592 def strip(self, minlink, transaction):
2669 def strip(self, minlink, transaction):
2593 """truncate the revlog on the first revision with a linkrev >= minlink
2670 """truncate the revlog on the first revision with a linkrev >= minlink
2594
2671
2595 This function is called when we're stripping revision minlink and
2672 This function is called when we're stripping revision minlink and
2596 its descendants from the repository.
2673 its descendants from the repository.
2597
2674
2598 We have to remove all revisions with linkrev >= minlink, because
2675 We have to remove all revisions with linkrev >= minlink, because
2599 the equivalent changelog revisions will be renumbered after the
2676 the equivalent changelog revisions will be renumbered after the
2600 strip.
2677 strip.
2601
2678
2602 So we truncate the revlog on the first of these revisions, and
2679 So we truncate the revlog on the first of these revisions, and
2603 trust that the caller has saved the revisions that shouldn't be
2680 trust that the caller has saved the revisions that shouldn't be
2604 removed and that it'll re-add them after this truncation.
2681 removed and that it'll re-add them after this truncation.
2605 """
2682 """
2606 if len(self) == 0:
2683 if len(self) == 0:
2607 return
2684 return
2608
2685
2609 rev, _ = self.getstrippoint(minlink)
2686 rev, _ = self.getstrippoint(minlink)
2610 if rev == len(self):
2687 if rev == len(self):
2611 return
2688 return
2612
2689
2613 # first truncate the files on disk
2690 # first truncate the files on disk
2614 end = self.start(rev)
2691 end = self.start(rev)
2615 if not self._inline:
2692 if not self._inline:
2616 transaction.add(self.datafile, end)
2693 transaction.add(self.datafile, end)
2617 end = rev * self._io.size
2694 end = rev * self._io.size
2618 else:
2695 else:
2619 end += rev * self._io.size
2696 end += rev * self._io.size
2620
2697
2621 transaction.add(self.indexfile, end)
2698 transaction.add(self.indexfile, end)
2622
2699
2623 # then reset internal state in memory to forget those revisions
2700 # then reset internal state in memory to forget those revisions
2624 self._cache = None
2701 self._cache = None
2625 self._chaininfocache = {}
2702 self._chaininfocache = {}
2626 self._chunkclear()
2703 self._chunkclear()
2627 for x in xrange(rev, len(self)):
2704 for x in xrange(rev, len(self)):
2628 del self.nodemap[self.node(x)]
2705 del self.nodemap[self.node(x)]
2629
2706
2630 del self.index[rev:-1]
2707 del self.index[rev:-1]
2631 self._nodepos = None
2708 self._nodepos = None
2632
2709
2633 def checksize(self):
2710 def checksize(self):
2634 expected = 0
2711 expected = 0
2635 if len(self):
2712 if len(self):
2636 expected = max(0, self.end(len(self) - 1))
2713 expected = max(0, self.end(len(self) - 1))
2637
2714
2638 try:
2715 try:
2639 with self._datafp() as f:
2716 with self._datafp() as f:
2640 f.seek(0, 2)
2717 f.seek(0, 2)
2641 actual = f.tell()
2718 actual = f.tell()
2642 dd = actual - expected
2719 dd = actual - expected
2643 except IOError as inst:
2720 except IOError as inst:
2644 if inst.errno != errno.ENOENT:
2721 if inst.errno != errno.ENOENT:
2645 raise
2722 raise
2646 dd = 0
2723 dd = 0
2647
2724
2648 try:
2725 try:
2649 f = self.opener(self.indexfile)
2726 f = self.opener(self.indexfile)
2650 f.seek(0, 2)
2727 f.seek(0, 2)
2651 actual = f.tell()
2728 actual = f.tell()
2652 f.close()
2729 f.close()
2653 s = self._io.size
2730 s = self._io.size
2654 i = max(0, actual // s)
2731 i = max(0, actual // s)
2655 di = actual - (i * s)
2732 di = actual - (i * s)
2656 if self._inline:
2733 if self._inline:
2657 databytes = 0
2734 databytes = 0
2658 for r in self:
2735 for r in self:
2659 databytes += max(0, self.length(r))
2736 databytes += max(0, self.length(r))
2660 dd = 0
2737 dd = 0
2661 di = actual - len(self) * s - databytes
2738 di = actual - len(self) * s - databytes
2662 except IOError as inst:
2739 except IOError as inst:
2663 if inst.errno != errno.ENOENT:
2740 if inst.errno != errno.ENOENT:
2664 raise
2741 raise
2665 di = 0
2742 di = 0
2666
2743
2667 return (dd, di)
2744 return (dd, di)
2668
2745
2669 def files(self):
2746 def files(self):
2670 res = [self.indexfile]
2747 res = [self.indexfile]
2671 if not self._inline:
2748 if not self._inline:
2672 res.append(self.datafile)
2749 res.append(self.datafile)
2673 return res
2750 return res
2674
2751
2675 DELTAREUSEALWAYS = 'always'
2752 DELTAREUSEALWAYS = 'always'
2676 DELTAREUSESAMEREVS = 'samerevs'
2753 DELTAREUSESAMEREVS = 'samerevs'
2677 DELTAREUSENEVER = 'never'
2754 DELTAREUSENEVER = 'never'
2678
2755
2679 DELTAREUSEFULLADD = 'fulladd'
2756 DELTAREUSEFULLADD = 'fulladd'
2680
2757
2681 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2758 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2682
2759
2683 def clone(self, tr, destrevlog, addrevisioncb=None,
2760 def clone(self, tr, destrevlog, addrevisioncb=None,
2684 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2761 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2685 """Copy this revlog to another, possibly with format changes.
2762 """Copy this revlog to another, possibly with format changes.
2686
2763
2687 The destination revlog will contain the same revisions and nodes.
2764 The destination revlog will contain the same revisions and nodes.
2688 However, it may not be bit-for-bit identical due to e.g. delta encoding
2765 However, it may not be bit-for-bit identical due to e.g. delta encoding
2689 differences.
2766 differences.
2690
2767
2691 The ``deltareuse`` argument control how deltas from the existing revlog
2768 The ``deltareuse`` argument control how deltas from the existing revlog
2692 are preserved in the destination revlog. The argument can have the
2769 are preserved in the destination revlog. The argument can have the
2693 following values:
2770 following values:
2694
2771
2695 DELTAREUSEALWAYS
2772 DELTAREUSEALWAYS
2696 Deltas will always be reused (if possible), even if the destination
2773 Deltas will always be reused (if possible), even if the destination
2697 revlog would not select the same revisions for the delta. This is the
2774 revlog would not select the same revisions for the delta. This is the
2698 fastest mode of operation.
2775 fastest mode of operation.
2699 DELTAREUSESAMEREVS
2776 DELTAREUSESAMEREVS
2700 Deltas will be reused if the destination revlog would pick the same
2777 Deltas will be reused if the destination revlog would pick the same
2701 revisions for the delta. This mode strikes a balance between speed
2778 revisions for the delta. This mode strikes a balance between speed
2702 and optimization.
2779 and optimization.
2703 DELTAREUSENEVER
2780 DELTAREUSENEVER
2704 Deltas will never be reused. This is the slowest mode of execution.
2781 Deltas will never be reused. This is the slowest mode of execution.
2705 This mode can be used to recompute deltas (e.g. if the diff/delta
2782 This mode can be used to recompute deltas (e.g. if the diff/delta
2706 algorithm changes).
2783 algorithm changes).
2707
2784
2708 Delta computation can be slow, so the choice of delta reuse policy can
2785 Delta computation can be slow, so the choice of delta reuse policy can
2709 significantly affect run time.
2786 significantly affect run time.
2710
2787
2711 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2788 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2712 two extremes. Deltas will be reused if they are appropriate. But if the
2789 two extremes. Deltas will be reused if they are appropriate. But if the
2713 delta could choose a better revision, it will do so. This means if you
2790 delta could choose a better revision, it will do so. This means if you
2714 are converting a non-generaldelta revlog to a generaldelta revlog,
2791 are converting a non-generaldelta revlog to a generaldelta revlog,
2715 deltas will be recomputed if the delta's parent isn't a parent of the
2792 deltas will be recomputed if the delta's parent isn't a parent of the
2716 revision.
2793 revision.
2717
2794
2718 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2795 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2719 controls whether to compute deltas against both parents for merges.
2796 controls whether to compute deltas against both parents for merges.
2720 By default, the current default is used.
2797 By default, the current default is used.
2721 """
2798 """
2722 if deltareuse not in self.DELTAREUSEALL:
2799 if deltareuse not in self.DELTAREUSEALL:
2723 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2800 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2724
2801
2725 if len(destrevlog):
2802 if len(destrevlog):
2726 raise ValueError(_('destination revlog is not empty'))
2803 raise ValueError(_('destination revlog is not empty'))
2727
2804
2728 if getattr(self, 'filteredrevs', None):
2805 if getattr(self, 'filteredrevs', None):
2729 raise ValueError(_('source revlog has filtered revisions'))
2806 raise ValueError(_('source revlog has filtered revisions'))
2730 if getattr(destrevlog, 'filteredrevs', None):
2807 if getattr(destrevlog, 'filteredrevs', None):
2731 raise ValueError(_('destination revlog has filtered revisions'))
2808 raise ValueError(_('destination revlog has filtered revisions'))
2732
2809
2733 # lazydeltabase controls whether to reuse a cached delta, if possible.
2810 # lazydeltabase controls whether to reuse a cached delta, if possible.
2734 oldlazydeltabase = destrevlog._lazydeltabase
2811 oldlazydeltabase = destrevlog._lazydeltabase
2735 oldamd = destrevlog._aggressivemergedeltas
2812 oldamd = destrevlog._aggressivemergedeltas
2736
2813
2737 try:
2814 try:
2738 if deltareuse == self.DELTAREUSEALWAYS:
2815 if deltareuse == self.DELTAREUSEALWAYS:
2739 destrevlog._lazydeltabase = True
2816 destrevlog._lazydeltabase = True
2740 elif deltareuse == self.DELTAREUSESAMEREVS:
2817 elif deltareuse == self.DELTAREUSESAMEREVS:
2741 destrevlog._lazydeltabase = False
2818 destrevlog._lazydeltabase = False
2742
2819
2743 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2820 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2744
2821
2745 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2822 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2746 self.DELTAREUSESAMEREVS)
2823 self.DELTAREUSESAMEREVS)
2747
2824
2748 deltacomputer = _deltacomputer(destrevlog)
2825 deltacomputer = _deltacomputer(destrevlog)
2749 index = self.index
2826 index = self.index
2750 for rev in self:
2827 for rev in self:
2751 entry = index[rev]
2828 entry = index[rev]
2752
2829
2753 # Some classes override linkrev to take filtered revs into
2830 # Some classes override linkrev to take filtered revs into
2754 # account. Use raw entry from index.
2831 # account. Use raw entry from index.
2755 flags = entry[0] & 0xffff
2832 flags = entry[0] & 0xffff
2756 linkrev = entry[4]
2833 linkrev = entry[4]
2757 p1 = index[entry[5]][7]
2834 p1 = index[entry[5]][7]
2758 p2 = index[entry[6]][7]
2835 p2 = index[entry[6]][7]
2759 node = entry[7]
2836 node = entry[7]
2760
2837
2761 # (Possibly) reuse the delta from the revlog if allowed and
2838 # (Possibly) reuse the delta from the revlog if allowed and
2762 # the revlog chunk is a delta.
2839 # the revlog chunk is a delta.
2763 cachedelta = None
2840 cachedelta = None
2764 rawtext = None
2841 rawtext = None
2765 if populatecachedelta:
2842 if populatecachedelta:
2766 dp = self.deltaparent(rev)
2843 dp = self.deltaparent(rev)
2767 if dp != nullrev:
2844 if dp != nullrev:
2768 cachedelta = (dp, bytes(self._chunk(rev)))
2845 cachedelta = (dp, bytes(self._chunk(rev)))
2769
2846
2770 if not cachedelta:
2847 if not cachedelta:
2771 rawtext = self.revision(rev, raw=True)
2848 rawtext = self.revision(rev, raw=True)
2772
2849
2773
2850
2774 if deltareuse == self.DELTAREUSEFULLADD:
2851 if deltareuse == self.DELTAREUSEFULLADD:
2775 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2852 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2776 cachedelta=cachedelta,
2853 cachedelta=cachedelta,
2777 node=node, flags=flags,
2854 node=node, flags=flags,
2778 deltacomputer=deltacomputer)
2855 deltacomputer=deltacomputer)
2779 else:
2856 else:
2780 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2857 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2781 checkambig=False)
2858 checkambig=False)
2782 dfh = None
2859 dfh = None
2783 if not destrevlog._inline:
2860 if not destrevlog._inline:
2784 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2861 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2785 try:
2862 try:
2786 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2863 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2787 p2, flags, cachedelta, ifh, dfh,
2864 p2, flags, cachedelta, ifh, dfh,
2788 deltacomputer=deltacomputer)
2865 deltacomputer=deltacomputer)
2789 finally:
2866 finally:
2790 if dfh:
2867 if dfh:
2791 dfh.close()
2868 dfh.close()
2792 ifh.close()
2869 ifh.close()
2793
2870
2794 if addrevisioncb:
2871 if addrevisioncb:
2795 addrevisioncb(self, rev, node)
2872 addrevisioncb(self, rev, node)
2796 finally:
2873 finally:
2797 destrevlog._lazydeltabase = oldlazydeltabase
2874 destrevlog._lazydeltabase = oldlazydeltabase
2798 destrevlog._aggressivemergedeltas = oldamd
2875 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now