##// END OF EJS Templates
revlog: micro-optimize the computation of hashes...
Alex Gaynor -
r33391:943b8c37 default
parent child Browse files
Show More
@@ -1,2208 +1,2212
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 wdirhex,
30 wdirhex,
31 wdirid,
31 wdirid,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from . import (
35 from . import (
36 ancestor,
36 ancestor,
37 error,
37 error,
38 mdiff,
38 mdiff,
39 policy,
39 policy,
40 pycompat,
40 pycompat,
41 templatefilters,
41 templatefilters,
42 util,
42 util,
43 )
43 )
44
44
45 parsers = policy.importmod(r'parsers')
45 parsers = policy.importmod(r'parsers')
46
46
47 _pack = struct.pack
47 _pack = struct.pack
48 _unpack = struct.unpack
48 _unpack = struct.unpack
49 # Aliased for performance.
49 # Aliased for performance.
50 _zlibdecompress = zlib.decompress
50 _zlibdecompress = zlib.decompress
51
51
52 # revlog header flags
52 # revlog header flags
53 REVLOGV0 = 0
53 REVLOGV0 = 0
54 REVLOGV1 = 1
54 REVLOGV1 = 1
55 # Dummy value until file format is finalized.
55 # Dummy value until file format is finalized.
56 # Reminder: change the bounds check in revlog.__init__ when this is changed.
56 # Reminder: change the bounds check in revlog.__init__ when this is changed.
57 REVLOGV2 = 0xDEAD
57 REVLOGV2 = 0xDEAD
58 FLAG_INLINE_DATA = (1 << 16)
58 FLAG_INLINE_DATA = (1 << 16)
59 FLAG_GENERALDELTA = (1 << 17)
59 FLAG_GENERALDELTA = (1 << 17)
60 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
60 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
61 REVLOG_DEFAULT_FORMAT = REVLOGV1
61 REVLOG_DEFAULT_FORMAT = REVLOGV1
62 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
62 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
63 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
63 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
64 REVLOGV2_FLAGS = REVLOGV1_FLAGS
64 REVLOGV2_FLAGS = REVLOGV1_FLAGS
65
65
66 # revlog index flags
66 # revlog index flags
67 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
67 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
68 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
68 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
69 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
69 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
70 REVIDX_DEFAULT_FLAGS = 0
70 REVIDX_DEFAULT_FLAGS = 0
71 # stable order in which flags need to be processed and their processors applied
71 # stable order in which flags need to be processed and their processors applied
72 REVIDX_FLAGS_ORDER = [
72 REVIDX_FLAGS_ORDER = [
73 REVIDX_ISCENSORED,
73 REVIDX_ISCENSORED,
74 REVIDX_ELLIPSIS,
74 REVIDX_ELLIPSIS,
75 REVIDX_EXTSTORED,
75 REVIDX_EXTSTORED,
76 ]
76 ]
77 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
77 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
78
78
79 # max size of revlog with inline data
79 # max size of revlog with inline data
80 _maxinline = 131072
80 _maxinline = 131072
81 _chunksize = 1048576
81 _chunksize = 1048576
82
82
83 RevlogError = error.RevlogError
83 RevlogError = error.RevlogError
84 LookupError = error.LookupError
84 LookupError = error.LookupError
85 CensoredNodeError = error.CensoredNodeError
85 CensoredNodeError = error.CensoredNodeError
86 ProgrammingError = error.ProgrammingError
86 ProgrammingError = error.ProgrammingError
87
87
88 # Store flag processors (cf. 'addflagprocessor()' to register)
88 # Store flag processors (cf. 'addflagprocessor()' to register)
89 _flagprocessors = {
89 _flagprocessors = {
90 REVIDX_ISCENSORED: None,
90 REVIDX_ISCENSORED: None,
91 }
91 }
92
92
93 def addflagprocessor(flag, processor):
93 def addflagprocessor(flag, processor):
94 """Register a flag processor on a revision data flag.
94 """Register a flag processor on a revision data flag.
95
95
96 Invariant:
96 Invariant:
97 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
97 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
98 - Only one flag processor can be registered on a specific flag.
98 - Only one flag processor can be registered on a specific flag.
99 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
99 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
100 following signatures:
100 following signatures:
101 - (read) f(self, rawtext) -> text, bool
101 - (read) f(self, rawtext) -> text, bool
102 - (write) f(self, text) -> rawtext, bool
102 - (write) f(self, text) -> rawtext, bool
103 - (raw) f(self, rawtext) -> bool
103 - (raw) f(self, rawtext) -> bool
104 "text" is presented to the user. "rawtext" is stored in revlog data, not
104 "text" is presented to the user. "rawtext" is stored in revlog data, not
105 directly visible to the user.
105 directly visible to the user.
106 The boolean returned by these transforms is used to determine whether
106 The boolean returned by these transforms is used to determine whether
107 the returned text can be used for hash integrity checking. For example,
107 the returned text can be used for hash integrity checking. For example,
108 if "write" returns False, then "text" is used to generate hash. If
108 if "write" returns False, then "text" is used to generate hash. If
109 "write" returns True, that basically means "rawtext" returned by "write"
109 "write" returns True, that basically means "rawtext" returned by "write"
110 should be used to generate hash. Usually, "write" and "read" return
110 should be used to generate hash. Usually, "write" and "read" return
111 different booleans. And "raw" returns a same boolean as "write".
111 different booleans. And "raw" returns a same boolean as "write".
112
112
113 Note: The 'raw' transform is used for changegroup generation and in some
113 Note: The 'raw' transform is used for changegroup generation and in some
114 debug commands. In this case the transform only indicates whether the
114 debug commands. In this case the transform only indicates whether the
115 contents can be used for hash integrity checks.
115 contents can be used for hash integrity checks.
116 """
116 """
117 if not flag & REVIDX_KNOWN_FLAGS:
117 if not flag & REVIDX_KNOWN_FLAGS:
118 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
118 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
119 raise ProgrammingError(msg)
119 raise ProgrammingError(msg)
120 if flag not in REVIDX_FLAGS_ORDER:
120 if flag not in REVIDX_FLAGS_ORDER:
121 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
121 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
122 raise ProgrammingError(msg)
122 raise ProgrammingError(msg)
123 if flag in _flagprocessors:
123 if flag in _flagprocessors:
124 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
124 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
125 raise error.Abort(msg)
125 raise error.Abort(msg)
126 _flagprocessors[flag] = processor
126 _flagprocessors[flag] = processor
127
127
128 def getoffset(q):
128 def getoffset(q):
129 return int(q >> 16)
129 return int(q >> 16)
130
130
131 def gettype(q):
131 def gettype(q):
132 return int(q & 0xFFFF)
132 return int(q & 0xFFFF)
133
133
134 def offset_type(offset, type):
134 def offset_type(offset, type):
135 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
135 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
136 raise ValueError('unknown revlog index flags')
136 raise ValueError('unknown revlog index flags')
137 return int(int(offset) << 16 | type)
137 return int(int(offset) << 16 | type)
138
138
139 _nullhash = hashlib.sha1(nullid)
139 _nullhash = hashlib.sha1(nullid)
140
140
141 def hash(text, p1, p2):
141 def hash(text, p1, p2):
142 """generate a hash from the given text and its parent hashes
142 """generate a hash from the given text and its parent hashes
143
143
144 This hash combines both the current file contents and its history
144 This hash combines both the current file contents and its history
145 in a manner that makes it easy to distinguish nodes with the same
145 in a manner that makes it easy to distinguish nodes with the same
146 content in the revision graph.
146 content in the revision graph.
147 """
147 """
148 # As of now, if one of the parent node is null, p2 is null
148 # As of now, if one of the parent node is null, p2 is null
149 if p2 == nullid:
149 if p2 == nullid:
150 # deep copy of a hash is faster than creating one
150 # deep copy of a hash is faster than creating one
151 s = _nullhash.copy()
151 s = _nullhash.copy()
152 s.update(p1)
152 s.update(p1)
153 else:
153 else:
154 # none of the parent nodes are nullid
154 # none of the parent nodes are nullid
155 l = [p1, p2]
155 if p1 < p2:
156 l.sort()
156 a = p1
157 s = hashlib.sha1(l[0])
157 b = p2
158 s.update(l[1])
158 else:
159 a = p2
160 b = p1
161 s = hashlib.sha1(a)
162 s.update(b)
159 s.update(text)
163 s.update(text)
160 return s.digest()
164 return s.digest()
161
165
162 # index v0:
166 # index v0:
163 # 4 bytes: offset
167 # 4 bytes: offset
164 # 4 bytes: compressed length
168 # 4 bytes: compressed length
165 # 4 bytes: base rev
169 # 4 bytes: base rev
166 # 4 bytes: link rev
170 # 4 bytes: link rev
167 # 20 bytes: parent 1 nodeid
171 # 20 bytes: parent 1 nodeid
168 # 20 bytes: parent 2 nodeid
172 # 20 bytes: parent 2 nodeid
169 # 20 bytes: nodeid
173 # 20 bytes: nodeid
170 indexformatv0 = ">4l20s20s20s"
174 indexformatv0 = ">4l20s20s20s"
171
175
172 class revlogoldio(object):
176 class revlogoldio(object):
173 def __init__(self):
177 def __init__(self):
174 self.size = struct.calcsize(indexformatv0)
178 self.size = struct.calcsize(indexformatv0)
175
179
176 def parseindex(self, data, inline):
180 def parseindex(self, data, inline):
177 s = self.size
181 s = self.size
178 index = []
182 index = []
179 nodemap = {nullid: nullrev}
183 nodemap = {nullid: nullrev}
180 n = off = 0
184 n = off = 0
181 l = len(data)
185 l = len(data)
182 while off + s <= l:
186 while off + s <= l:
183 cur = data[off:off + s]
187 cur = data[off:off + s]
184 off += s
188 off += s
185 e = _unpack(indexformatv0, cur)
189 e = _unpack(indexformatv0, cur)
186 # transform to revlogv1 format
190 # transform to revlogv1 format
187 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
191 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
188 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
192 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
189 index.append(e2)
193 index.append(e2)
190 nodemap[e[6]] = n
194 nodemap[e[6]] = n
191 n += 1
195 n += 1
192
196
193 # add the magic null revision at -1
197 # add the magic null revision at -1
194 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
198 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
195
199
196 return index, nodemap, None
200 return index, nodemap, None
197
201
198 def packentry(self, entry, node, version, rev):
202 def packentry(self, entry, node, version, rev):
199 if gettype(entry[0]):
203 if gettype(entry[0]):
200 raise RevlogError(_('index entry flags need revlog version 1'))
204 raise RevlogError(_('index entry flags need revlog version 1'))
201 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
205 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
202 node(entry[5]), node(entry[6]), entry[7])
206 node(entry[5]), node(entry[6]), entry[7])
203 return _pack(indexformatv0, *e2)
207 return _pack(indexformatv0, *e2)
204
208
205 # index ng:
209 # index ng:
206 # 6 bytes: offset
210 # 6 bytes: offset
207 # 2 bytes: flags
211 # 2 bytes: flags
208 # 4 bytes: compressed length
212 # 4 bytes: compressed length
209 # 4 bytes: uncompressed length
213 # 4 bytes: uncompressed length
210 # 4 bytes: base rev
214 # 4 bytes: base rev
211 # 4 bytes: link rev
215 # 4 bytes: link rev
212 # 4 bytes: parent 1 rev
216 # 4 bytes: parent 1 rev
213 # 4 bytes: parent 2 rev
217 # 4 bytes: parent 2 rev
214 # 32 bytes: nodeid
218 # 32 bytes: nodeid
215 indexformatng = ">Qiiiiii20s12x"
219 indexformatng = ">Qiiiiii20s12x"
216 versionformat = ">I"
220 versionformat = ">I"
217
221
218 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
222 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
219 # signed integer)
223 # signed integer)
220 _maxentrysize = 0x7fffffff
224 _maxentrysize = 0x7fffffff
221
225
222 class revlogio(object):
226 class revlogio(object):
223 def __init__(self):
227 def __init__(self):
224 self.size = struct.calcsize(indexformatng)
228 self.size = struct.calcsize(indexformatng)
225
229
226 def parseindex(self, data, inline):
230 def parseindex(self, data, inline):
227 # call the C implementation to parse the index data
231 # call the C implementation to parse the index data
228 index, cache = parsers.parse_index2(data, inline)
232 index, cache = parsers.parse_index2(data, inline)
229 return index, getattr(index, 'nodemap', None), cache
233 return index, getattr(index, 'nodemap', None), cache
230
234
231 def packentry(self, entry, node, version, rev):
235 def packentry(self, entry, node, version, rev):
232 p = _pack(indexformatng, *entry)
236 p = _pack(indexformatng, *entry)
233 if rev == 0:
237 if rev == 0:
234 p = _pack(versionformat, version) + p[4:]
238 p = _pack(versionformat, version) + p[4:]
235 return p
239 return p
236
240
237 class revlog(object):
241 class revlog(object):
238 """
242 """
239 the underlying revision storage object
243 the underlying revision storage object
240
244
241 A revlog consists of two parts, an index and the revision data.
245 A revlog consists of two parts, an index and the revision data.
242
246
243 The index is a file with a fixed record size containing
247 The index is a file with a fixed record size containing
244 information on each revision, including its nodeid (hash), the
248 information on each revision, including its nodeid (hash), the
245 nodeids of its parents, the position and offset of its data within
249 nodeids of its parents, the position and offset of its data within
246 the data file, and the revision it's based on. Finally, each entry
250 the data file, and the revision it's based on. Finally, each entry
247 contains a linkrev entry that can serve as a pointer to external
251 contains a linkrev entry that can serve as a pointer to external
248 data.
252 data.
249
253
250 The revision data itself is a linear collection of data chunks.
254 The revision data itself is a linear collection of data chunks.
251 Each chunk represents a revision and is usually represented as a
255 Each chunk represents a revision and is usually represented as a
252 delta against the previous chunk. To bound lookup time, runs of
256 delta against the previous chunk. To bound lookup time, runs of
253 deltas are limited to about 2 times the length of the original
257 deltas are limited to about 2 times the length of the original
254 version data. This makes retrieval of a version proportional to
258 version data. This makes retrieval of a version proportional to
255 its size, or O(1) relative to the number of revisions.
259 its size, or O(1) relative to the number of revisions.
256
260
257 Both pieces of the revlog are written to in an append-only
261 Both pieces of the revlog are written to in an append-only
258 fashion, which means we never need to rewrite a file to insert or
262 fashion, which means we never need to rewrite a file to insert or
259 remove data, and can use some simple techniques to avoid the need
263 remove data, and can use some simple techniques to avoid the need
260 for locking while reading.
264 for locking while reading.
261
265
262 If checkambig, indexfile is opened with checkambig=True at
266 If checkambig, indexfile is opened with checkambig=True at
263 writing, to avoid file stat ambiguity.
267 writing, to avoid file stat ambiguity.
264 """
268 """
265 def __init__(self, opener, indexfile, datafile=None, checkambig=False):
269 def __init__(self, opener, indexfile, datafile=None, checkambig=False):
266 """
270 """
267 create a revlog object
271 create a revlog object
268
272
269 opener is a function that abstracts the file opening operation
273 opener is a function that abstracts the file opening operation
270 and can be used to implement COW semantics or the like.
274 and can be used to implement COW semantics or the like.
271 """
275 """
272 self.indexfile = indexfile
276 self.indexfile = indexfile
273 self.datafile = datafile or (indexfile[:-2] + ".d")
277 self.datafile = datafile or (indexfile[:-2] + ".d")
274 self.opener = opener
278 self.opener = opener
275 # When True, indexfile is opened with checkambig=True at writing, to
279 # When True, indexfile is opened with checkambig=True at writing, to
276 # avoid file stat ambiguity.
280 # avoid file stat ambiguity.
277 self._checkambig = checkambig
281 self._checkambig = checkambig
278 # 3-tuple of (node, rev, text) for a raw revision.
282 # 3-tuple of (node, rev, text) for a raw revision.
279 self._cache = None
283 self._cache = None
280 # Maps rev to chain base rev.
284 # Maps rev to chain base rev.
281 self._chainbasecache = util.lrucachedict(100)
285 self._chainbasecache = util.lrucachedict(100)
282 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
286 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
283 self._chunkcache = (0, '')
287 self._chunkcache = (0, '')
284 # How much data to read and cache into the raw revlog data cache.
288 # How much data to read and cache into the raw revlog data cache.
285 self._chunkcachesize = 65536
289 self._chunkcachesize = 65536
286 self._maxchainlen = None
290 self._maxchainlen = None
287 self._aggressivemergedeltas = False
291 self._aggressivemergedeltas = False
288 self.index = []
292 self.index = []
289 # Mapping of partial identifiers to full nodes.
293 # Mapping of partial identifiers to full nodes.
290 self._pcache = {}
294 self._pcache = {}
291 # Mapping of revision integer to full node.
295 # Mapping of revision integer to full node.
292 self._nodecache = {nullid: nullrev}
296 self._nodecache = {nullid: nullrev}
293 self._nodepos = None
297 self._nodepos = None
294 self._compengine = 'zlib'
298 self._compengine = 'zlib'
295 self._maxdeltachainspan = -1
299 self._maxdeltachainspan = -1
296
300
297 v = REVLOG_DEFAULT_VERSION
301 v = REVLOG_DEFAULT_VERSION
298 opts = getattr(opener, 'options', None)
302 opts = getattr(opener, 'options', None)
299 if opts is not None:
303 if opts is not None:
300 if 'revlogv2' in opts:
304 if 'revlogv2' in opts:
301 # version 2 revlogs always use generaldelta.
305 # version 2 revlogs always use generaldelta.
302 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
306 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
303 elif 'revlogv1' in opts:
307 elif 'revlogv1' in opts:
304 if 'generaldelta' in opts:
308 if 'generaldelta' in opts:
305 v |= FLAG_GENERALDELTA
309 v |= FLAG_GENERALDELTA
306 else:
310 else:
307 v = 0
311 v = 0
308 if 'chunkcachesize' in opts:
312 if 'chunkcachesize' in opts:
309 self._chunkcachesize = opts['chunkcachesize']
313 self._chunkcachesize = opts['chunkcachesize']
310 if 'maxchainlen' in opts:
314 if 'maxchainlen' in opts:
311 self._maxchainlen = opts['maxchainlen']
315 self._maxchainlen = opts['maxchainlen']
312 if 'aggressivemergedeltas' in opts:
316 if 'aggressivemergedeltas' in opts:
313 self._aggressivemergedeltas = opts['aggressivemergedeltas']
317 self._aggressivemergedeltas = opts['aggressivemergedeltas']
314 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
318 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
315 if 'compengine' in opts:
319 if 'compengine' in opts:
316 self._compengine = opts['compengine']
320 self._compengine = opts['compengine']
317 if 'maxdeltachainspan' in opts:
321 if 'maxdeltachainspan' in opts:
318 self._maxdeltachainspan = opts['maxdeltachainspan']
322 self._maxdeltachainspan = opts['maxdeltachainspan']
319
323
320 if self._chunkcachesize <= 0:
324 if self._chunkcachesize <= 0:
321 raise RevlogError(_('revlog chunk cache size %r is not greater '
325 raise RevlogError(_('revlog chunk cache size %r is not greater '
322 'than 0') % self._chunkcachesize)
326 'than 0') % self._chunkcachesize)
323 elif self._chunkcachesize & (self._chunkcachesize - 1):
327 elif self._chunkcachesize & (self._chunkcachesize - 1):
324 raise RevlogError(_('revlog chunk cache size %r is not a power '
328 raise RevlogError(_('revlog chunk cache size %r is not a power '
325 'of 2') % self._chunkcachesize)
329 'of 2') % self._chunkcachesize)
326
330
327 indexdata = ''
331 indexdata = ''
328 self._initempty = True
332 self._initempty = True
329 try:
333 try:
330 f = self.opener(self.indexfile)
334 f = self.opener(self.indexfile)
331 indexdata = f.read()
335 indexdata = f.read()
332 f.close()
336 f.close()
333 if len(indexdata) > 0:
337 if len(indexdata) > 0:
334 v = struct.unpack(versionformat, indexdata[:4])[0]
338 v = struct.unpack(versionformat, indexdata[:4])[0]
335 self._initempty = False
339 self._initempty = False
336 except IOError as inst:
340 except IOError as inst:
337 if inst.errno != errno.ENOENT:
341 if inst.errno != errno.ENOENT:
338 raise
342 raise
339
343
340 self.version = v
344 self.version = v
341 self._inline = v & FLAG_INLINE_DATA
345 self._inline = v & FLAG_INLINE_DATA
342 self._generaldelta = v & FLAG_GENERALDELTA
346 self._generaldelta = v & FLAG_GENERALDELTA
343 flags = v & ~0xFFFF
347 flags = v & ~0xFFFF
344 fmt = v & 0xFFFF
348 fmt = v & 0xFFFF
345 if fmt == REVLOGV0:
349 if fmt == REVLOGV0:
346 if flags:
350 if flags:
347 raise RevlogError(_('unknown flags (%#04x) in version %d '
351 raise RevlogError(_('unknown flags (%#04x) in version %d '
348 'revlog %s') %
352 'revlog %s') %
349 (flags >> 16, fmt, self.indexfile))
353 (flags >> 16, fmt, self.indexfile))
350 elif fmt == REVLOGV1:
354 elif fmt == REVLOGV1:
351 if flags & ~REVLOGV1_FLAGS:
355 if flags & ~REVLOGV1_FLAGS:
352 raise RevlogError(_('unknown flags (%#04x) in version %d '
356 raise RevlogError(_('unknown flags (%#04x) in version %d '
353 'revlog %s') %
357 'revlog %s') %
354 (flags >> 16, fmt, self.indexfile))
358 (flags >> 16, fmt, self.indexfile))
355 elif fmt == REVLOGV2:
359 elif fmt == REVLOGV2:
356 if flags & ~REVLOGV2_FLAGS:
360 if flags & ~REVLOGV2_FLAGS:
357 raise RevlogError(_('unknown flags (%#04x) in version %d '
361 raise RevlogError(_('unknown flags (%#04x) in version %d '
358 'revlog %s') %
362 'revlog %s') %
359 (flags >> 16, fmt, self.indexfile))
363 (flags >> 16, fmt, self.indexfile))
360 else:
364 else:
361 raise RevlogError(_('unknown version (%d) in revlog %s') %
365 raise RevlogError(_('unknown version (%d) in revlog %s') %
362 (fmt, self.indexfile))
366 (fmt, self.indexfile))
363
367
364 self.storedeltachains = True
368 self.storedeltachains = True
365
369
366 self._io = revlogio()
370 self._io = revlogio()
367 if self.version == REVLOGV0:
371 if self.version == REVLOGV0:
368 self._io = revlogoldio()
372 self._io = revlogoldio()
369 try:
373 try:
370 d = self._io.parseindex(indexdata, self._inline)
374 d = self._io.parseindex(indexdata, self._inline)
371 except (ValueError, IndexError):
375 except (ValueError, IndexError):
372 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
376 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
373 self.index, nodemap, self._chunkcache = d
377 self.index, nodemap, self._chunkcache = d
374 if nodemap is not None:
378 if nodemap is not None:
375 self.nodemap = self._nodecache = nodemap
379 self.nodemap = self._nodecache = nodemap
376 if not self._chunkcache:
380 if not self._chunkcache:
377 self._chunkclear()
381 self._chunkclear()
378 # revnum -> (chain-length, sum-delta-length)
382 # revnum -> (chain-length, sum-delta-length)
379 self._chaininfocache = {}
383 self._chaininfocache = {}
380 # revlog header -> revlog compressor
384 # revlog header -> revlog compressor
381 self._decompressors = {}
385 self._decompressors = {}
382
386
383 @util.propertycache
387 @util.propertycache
384 def _compressor(self):
388 def _compressor(self):
385 return util.compengines[self._compengine].revlogcompressor()
389 return util.compengines[self._compengine].revlogcompressor()
386
390
387 def tip(self):
391 def tip(self):
388 return self.node(len(self.index) - 2)
392 return self.node(len(self.index) - 2)
389 def __contains__(self, rev):
393 def __contains__(self, rev):
390 return 0 <= rev < len(self)
394 return 0 <= rev < len(self)
391 def __len__(self):
395 def __len__(self):
392 return len(self.index) - 1
396 return len(self.index) - 1
393 def __iter__(self):
397 def __iter__(self):
394 return iter(xrange(len(self)))
398 return iter(xrange(len(self)))
395 def revs(self, start=0, stop=None):
399 def revs(self, start=0, stop=None):
396 """iterate over all rev in this revlog (from start to stop)"""
400 """iterate over all rev in this revlog (from start to stop)"""
397 step = 1
401 step = 1
398 if stop is not None:
402 if stop is not None:
399 if start > stop:
403 if start > stop:
400 step = -1
404 step = -1
401 stop += step
405 stop += step
402 else:
406 else:
403 stop = len(self)
407 stop = len(self)
404 return xrange(start, stop, step)
408 return xrange(start, stop, step)
405
409
406 @util.propertycache
410 @util.propertycache
407 def nodemap(self):
411 def nodemap(self):
408 self.rev(self.node(0))
412 self.rev(self.node(0))
409 return self._nodecache
413 return self._nodecache
410
414
411 def hasnode(self, node):
415 def hasnode(self, node):
412 try:
416 try:
413 self.rev(node)
417 self.rev(node)
414 return True
418 return True
415 except KeyError:
419 except KeyError:
416 return False
420 return False
417
421
418 def clearcaches(self):
422 def clearcaches(self):
419 self._cache = None
423 self._cache = None
420 self._chainbasecache.clear()
424 self._chainbasecache.clear()
421 self._chunkcache = (0, '')
425 self._chunkcache = (0, '')
422 self._pcache = {}
426 self._pcache = {}
423
427
424 try:
428 try:
425 self._nodecache.clearcaches()
429 self._nodecache.clearcaches()
426 except AttributeError:
430 except AttributeError:
427 self._nodecache = {nullid: nullrev}
431 self._nodecache = {nullid: nullrev}
428 self._nodepos = None
432 self._nodepos = None
429
433
430 def rev(self, node):
434 def rev(self, node):
431 try:
435 try:
432 return self._nodecache[node]
436 return self._nodecache[node]
433 except TypeError:
437 except TypeError:
434 raise
438 raise
435 except RevlogError:
439 except RevlogError:
436 # parsers.c radix tree lookup failed
440 # parsers.c radix tree lookup failed
437 if node == wdirid:
441 if node == wdirid:
438 raise error.WdirUnsupported
442 raise error.WdirUnsupported
439 raise LookupError(node, self.indexfile, _('no node'))
443 raise LookupError(node, self.indexfile, _('no node'))
440 except KeyError:
444 except KeyError:
441 # pure python cache lookup failed
445 # pure python cache lookup failed
442 n = self._nodecache
446 n = self._nodecache
443 i = self.index
447 i = self.index
444 p = self._nodepos
448 p = self._nodepos
445 if p is None:
449 if p is None:
446 p = len(i) - 2
450 p = len(i) - 2
447 for r in xrange(p, -1, -1):
451 for r in xrange(p, -1, -1):
448 v = i[r][7]
452 v = i[r][7]
449 n[v] = r
453 n[v] = r
450 if v == node:
454 if v == node:
451 self._nodepos = r - 1
455 self._nodepos = r - 1
452 return r
456 return r
453 if node == wdirid:
457 if node == wdirid:
454 raise error.WdirUnsupported
458 raise error.WdirUnsupported
455 raise LookupError(node, self.indexfile, _('no node'))
459 raise LookupError(node, self.indexfile, _('no node'))
456
460
457 # Accessors for index entries.
461 # Accessors for index entries.
458
462
459 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
463 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
460 # are flags.
464 # are flags.
461 def start(self, rev):
465 def start(self, rev):
462 return int(self.index[rev][0] >> 16)
466 return int(self.index[rev][0] >> 16)
463
467
464 def flags(self, rev):
468 def flags(self, rev):
465 return self.index[rev][0] & 0xFFFF
469 return self.index[rev][0] & 0xFFFF
466
470
467 def length(self, rev):
471 def length(self, rev):
468 return self.index[rev][1]
472 return self.index[rev][1]
469
473
470 def rawsize(self, rev):
474 def rawsize(self, rev):
471 """return the length of the uncompressed text for a given revision"""
475 """return the length of the uncompressed text for a given revision"""
472 l = self.index[rev][2]
476 l = self.index[rev][2]
473 if l >= 0:
477 if l >= 0:
474 return l
478 return l
475
479
476 t = self.revision(rev, raw=True)
480 t = self.revision(rev, raw=True)
477 return len(t)
481 return len(t)
478
482
479 def size(self, rev):
483 def size(self, rev):
480 """length of non-raw text (processed by a "read" flag processor)"""
484 """length of non-raw text (processed by a "read" flag processor)"""
481 # fast path: if no "read" flag processor could change the content,
485 # fast path: if no "read" flag processor could change the content,
482 # size is rawsize. note: ELLIPSIS is known to not change the content.
486 # size is rawsize. note: ELLIPSIS is known to not change the content.
483 flags = self.flags(rev)
487 flags = self.flags(rev)
484 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
488 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
485 return self.rawsize(rev)
489 return self.rawsize(rev)
486
490
487 return len(self.revision(rev, raw=False))
491 return len(self.revision(rev, raw=False))
488
492
489 def chainbase(self, rev):
493 def chainbase(self, rev):
490 base = self._chainbasecache.get(rev)
494 base = self._chainbasecache.get(rev)
491 if base is not None:
495 if base is not None:
492 return base
496 return base
493
497
494 index = self.index
498 index = self.index
495 base = index[rev][3]
499 base = index[rev][3]
496 while base != rev:
500 while base != rev:
497 rev = base
501 rev = base
498 base = index[rev][3]
502 base = index[rev][3]
499
503
500 self._chainbasecache[rev] = base
504 self._chainbasecache[rev] = base
501 return base
505 return base
502
506
503 def linkrev(self, rev):
507 def linkrev(self, rev):
504 return self.index[rev][4]
508 return self.index[rev][4]
505
509
506 def parentrevs(self, rev):
510 def parentrevs(self, rev):
507 try:
511 try:
508 return self.index[rev][5:7]
512 return self.index[rev][5:7]
509 except IndexError:
513 except IndexError:
510 if rev == wdirrev:
514 if rev == wdirrev:
511 raise error.WdirUnsupported
515 raise error.WdirUnsupported
512 raise
516 raise
513
517
514 def node(self, rev):
518 def node(self, rev):
515 try:
519 try:
516 return self.index[rev][7]
520 return self.index[rev][7]
517 except IndexError:
521 except IndexError:
518 if rev == wdirrev:
522 if rev == wdirrev:
519 raise error.WdirUnsupported
523 raise error.WdirUnsupported
520 raise
524 raise
521
525
522 # Derived from index values.
526 # Derived from index values.
523
527
524 def end(self, rev):
528 def end(self, rev):
525 return self.start(rev) + self.length(rev)
529 return self.start(rev) + self.length(rev)
526
530
527 def parents(self, node):
531 def parents(self, node):
528 i = self.index
532 i = self.index
529 d = i[self.rev(node)]
533 d = i[self.rev(node)]
530 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
534 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
531
535
532 def chainlen(self, rev):
536 def chainlen(self, rev):
533 return self._chaininfo(rev)[0]
537 return self._chaininfo(rev)[0]
534
538
535 def _chaininfo(self, rev):
539 def _chaininfo(self, rev):
536 chaininfocache = self._chaininfocache
540 chaininfocache = self._chaininfocache
537 if rev in chaininfocache:
541 if rev in chaininfocache:
538 return chaininfocache[rev]
542 return chaininfocache[rev]
539 index = self.index
543 index = self.index
540 generaldelta = self._generaldelta
544 generaldelta = self._generaldelta
541 iterrev = rev
545 iterrev = rev
542 e = index[iterrev]
546 e = index[iterrev]
543 clen = 0
547 clen = 0
544 compresseddeltalen = 0
548 compresseddeltalen = 0
545 while iterrev != e[3]:
549 while iterrev != e[3]:
546 clen += 1
550 clen += 1
547 compresseddeltalen += e[1]
551 compresseddeltalen += e[1]
548 if generaldelta:
552 if generaldelta:
549 iterrev = e[3]
553 iterrev = e[3]
550 else:
554 else:
551 iterrev -= 1
555 iterrev -= 1
552 if iterrev in chaininfocache:
556 if iterrev in chaininfocache:
553 t = chaininfocache[iterrev]
557 t = chaininfocache[iterrev]
554 clen += t[0]
558 clen += t[0]
555 compresseddeltalen += t[1]
559 compresseddeltalen += t[1]
556 break
560 break
557 e = index[iterrev]
561 e = index[iterrev]
558 else:
562 else:
559 # Add text length of base since decompressing that also takes
563 # Add text length of base since decompressing that also takes
560 # work. For cache hits the length is already included.
564 # work. For cache hits the length is already included.
561 compresseddeltalen += e[1]
565 compresseddeltalen += e[1]
562 r = (clen, compresseddeltalen)
566 r = (clen, compresseddeltalen)
563 chaininfocache[rev] = r
567 chaininfocache[rev] = r
564 return r
568 return r
565
569
566 def _deltachain(self, rev, stoprev=None):
570 def _deltachain(self, rev, stoprev=None):
567 """Obtain the delta chain for a revision.
571 """Obtain the delta chain for a revision.
568
572
569 ``stoprev`` specifies a revision to stop at. If not specified, we
573 ``stoprev`` specifies a revision to stop at. If not specified, we
570 stop at the base of the chain.
574 stop at the base of the chain.
571
575
572 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
576 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
573 revs in ascending order and ``stopped`` is a bool indicating whether
577 revs in ascending order and ``stopped`` is a bool indicating whether
574 ``stoprev`` was hit.
578 ``stoprev`` was hit.
575 """
579 """
576 # Try C implementation.
580 # Try C implementation.
577 try:
581 try:
578 return self.index.deltachain(rev, stoprev, self._generaldelta)
582 return self.index.deltachain(rev, stoprev, self._generaldelta)
579 except AttributeError:
583 except AttributeError:
580 pass
584 pass
581
585
582 chain = []
586 chain = []
583
587
584 # Alias to prevent attribute lookup in tight loop.
588 # Alias to prevent attribute lookup in tight loop.
585 index = self.index
589 index = self.index
586 generaldelta = self._generaldelta
590 generaldelta = self._generaldelta
587
591
588 iterrev = rev
592 iterrev = rev
589 e = index[iterrev]
593 e = index[iterrev]
590 while iterrev != e[3] and iterrev != stoprev:
594 while iterrev != e[3] and iterrev != stoprev:
591 chain.append(iterrev)
595 chain.append(iterrev)
592 if generaldelta:
596 if generaldelta:
593 iterrev = e[3]
597 iterrev = e[3]
594 else:
598 else:
595 iterrev -= 1
599 iterrev -= 1
596 e = index[iterrev]
600 e = index[iterrev]
597
601
598 if iterrev == stoprev:
602 if iterrev == stoprev:
599 stopped = True
603 stopped = True
600 else:
604 else:
601 chain.append(iterrev)
605 chain.append(iterrev)
602 stopped = False
606 stopped = False
603
607
604 chain.reverse()
608 chain.reverse()
605 return chain, stopped
609 return chain, stopped
606
610
607 def ancestors(self, revs, stoprev=0, inclusive=False):
611 def ancestors(self, revs, stoprev=0, inclusive=False):
608 """Generate the ancestors of 'revs' in reverse topological order.
612 """Generate the ancestors of 'revs' in reverse topological order.
609 Does not generate revs lower than stoprev.
613 Does not generate revs lower than stoprev.
610
614
611 See the documentation for ancestor.lazyancestors for more details."""
615 See the documentation for ancestor.lazyancestors for more details."""
612
616
613 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
617 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
614 inclusive=inclusive)
618 inclusive=inclusive)
615
619
616 def descendants(self, revs):
620 def descendants(self, revs):
617 """Generate the descendants of 'revs' in revision order.
621 """Generate the descendants of 'revs' in revision order.
618
622
619 Yield a sequence of revision numbers starting with a child of
623 Yield a sequence of revision numbers starting with a child of
620 some rev in revs, i.e., each revision is *not* considered a
624 some rev in revs, i.e., each revision is *not* considered a
621 descendant of itself. Results are ordered by revision number (a
625 descendant of itself. Results are ordered by revision number (a
622 topological sort)."""
626 topological sort)."""
623 first = min(revs)
627 first = min(revs)
624 if first == nullrev:
628 if first == nullrev:
625 for i in self:
629 for i in self:
626 yield i
630 yield i
627 return
631 return
628
632
629 seen = set(revs)
633 seen = set(revs)
630 for i in self.revs(start=first + 1):
634 for i in self.revs(start=first + 1):
631 for x in self.parentrevs(i):
635 for x in self.parentrevs(i):
632 if x != nullrev and x in seen:
636 if x != nullrev and x in seen:
633 seen.add(i)
637 seen.add(i)
634 yield i
638 yield i
635 break
639 break
636
640
637 def findcommonmissing(self, common=None, heads=None):
641 def findcommonmissing(self, common=None, heads=None):
638 """Return a tuple of the ancestors of common and the ancestors of heads
642 """Return a tuple of the ancestors of common and the ancestors of heads
639 that are not ancestors of common. In revset terminology, we return the
643 that are not ancestors of common. In revset terminology, we return the
640 tuple:
644 tuple:
641
645
642 ::common, (::heads) - (::common)
646 ::common, (::heads) - (::common)
643
647
644 The list is sorted by revision number, meaning it is
648 The list is sorted by revision number, meaning it is
645 topologically sorted.
649 topologically sorted.
646
650
647 'heads' and 'common' are both lists of node IDs. If heads is
651 'heads' and 'common' are both lists of node IDs. If heads is
648 not supplied, uses all of the revlog's heads. If common is not
652 not supplied, uses all of the revlog's heads. If common is not
649 supplied, uses nullid."""
653 supplied, uses nullid."""
650 if common is None:
654 if common is None:
651 common = [nullid]
655 common = [nullid]
652 if heads is None:
656 if heads is None:
653 heads = self.heads()
657 heads = self.heads()
654
658
655 common = [self.rev(n) for n in common]
659 common = [self.rev(n) for n in common]
656 heads = [self.rev(n) for n in heads]
660 heads = [self.rev(n) for n in heads]
657
661
658 # we want the ancestors, but inclusive
662 # we want the ancestors, but inclusive
659 class lazyset(object):
663 class lazyset(object):
660 def __init__(self, lazyvalues):
664 def __init__(self, lazyvalues):
661 self.addedvalues = set()
665 self.addedvalues = set()
662 self.lazyvalues = lazyvalues
666 self.lazyvalues = lazyvalues
663
667
664 def __contains__(self, value):
668 def __contains__(self, value):
665 return value in self.addedvalues or value in self.lazyvalues
669 return value in self.addedvalues or value in self.lazyvalues
666
670
667 def __iter__(self):
671 def __iter__(self):
668 added = self.addedvalues
672 added = self.addedvalues
669 for r in added:
673 for r in added:
670 yield r
674 yield r
671 for r in self.lazyvalues:
675 for r in self.lazyvalues:
672 if not r in added:
676 if not r in added:
673 yield r
677 yield r
674
678
675 def add(self, value):
679 def add(self, value):
676 self.addedvalues.add(value)
680 self.addedvalues.add(value)
677
681
678 def update(self, values):
682 def update(self, values):
679 self.addedvalues.update(values)
683 self.addedvalues.update(values)
680
684
681 has = lazyset(self.ancestors(common))
685 has = lazyset(self.ancestors(common))
682 has.add(nullrev)
686 has.add(nullrev)
683 has.update(common)
687 has.update(common)
684
688
685 # take all ancestors from heads that aren't in has
689 # take all ancestors from heads that aren't in has
686 missing = set()
690 missing = set()
687 visit = collections.deque(r for r in heads if r not in has)
691 visit = collections.deque(r for r in heads if r not in has)
688 while visit:
692 while visit:
689 r = visit.popleft()
693 r = visit.popleft()
690 if r in missing:
694 if r in missing:
691 continue
695 continue
692 else:
696 else:
693 missing.add(r)
697 missing.add(r)
694 for p in self.parentrevs(r):
698 for p in self.parentrevs(r):
695 if p not in has:
699 if p not in has:
696 visit.append(p)
700 visit.append(p)
697 missing = list(missing)
701 missing = list(missing)
698 missing.sort()
702 missing.sort()
699 return has, [self.node(miss) for miss in missing]
703 return has, [self.node(miss) for miss in missing]
700
704
701 def incrementalmissingrevs(self, common=None):
705 def incrementalmissingrevs(self, common=None):
702 """Return an object that can be used to incrementally compute the
706 """Return an object that can be used to incrementally compute the
703 revision numbers of the ancestors of arbitrary sets that are not
707 revision numbers of the ancestors of arbitrary sets that are not
704 ancestors of common. This is an ancestor.incrementalmissingancestors
708 ancestors of common. This is an ancestor.incrementalmissingancestors
705 object.
709 object.
706
710
707 'common' is a list of revision numbers. If common is not supplied, uses
711 'common' is a list of revision numbers. If common is not supplied, uses
708 nullrev.
712 nullrev.
709 """
713 """
710 if common is None:
714 if common is None:
711 common = [nullrev]
715 common = [nullrev]
712
716
713 return ancestor.incrementalmissingancestors(self.parentrevs, common)
717 return ancestor.incrementalmissingancestors(self.parentrevs, common)
714
718
715 def findmissingrevs(self, common=None, heads=None):
719 def findmissingrevs(self, common=None, heads=None):
716 """Return the revision numbers of the ancestors of heads that
720 """Return the revision numbers of the ancestors of heads that
717 are not ancestors of common.
721 are not ancestors of common.
718
722
719 More specifically, return a list of revision numbers corresponding to
723 More specifically, return a list of revision numbers corresponding to
720 nodes N such that every N satisfies the following constraints:
724 nodes N such that every N satisfies the following constraints:
721
725
722 1. N is an ancestor of some node in 'heads'
726 1. N is an ancestor of some node in 'heads'
723 2. N is not an ancestor of any node in 'common'
727 2. N is not an ancestor of any node in 'common'
724
728
725 The list is sorted by revision number, meaning it is
729 The list is sorted by revision number, meaning it is
726 topologically sorted.
730 topologically sorted.
727
731
728 'heads' and 'common' are both lists of revision numbers. If heads is
732 'heads' and 'common' are both lists of revision numbers. If heads is
729 not supplied, uses all of the revlog's heads. If common is not
733 not supplied, uses all of the revlog's heads. If common is not
730 supplied, uses nullid."""
734 supplied, uses nullid."""
731 if common is None:
735 if common is None:
732 common = [nullrev]
736 common = [nullrev]
733 if heads is None:
737 if heads is None:
734 heads = self.headrevs()
738 heads = self.headrevs()
735
739
736 inc = self.incrementalmissingrevs(common=common)
740 inc = self.incrementalmissingrevs(common=common)
737 return inc.missingancestors(heads)
741 return inc.missingancestors(heads)
738
742
739 def findmissing(self, common=None, heads=None):
743 def findmissing(self, common=None, heads=None):
740 """Return the ancestors of heads that are not ancestors of common.
744 """Return the ancestors of heads that are not ancestors of common.
741
745
742 More specifically, return a list of nodes N such that every N
746 More specifically, return a list of nodes N such that every N
743 satisfies the following constraints:
747 satisfies the following constraints:
744
748
745 1. N is an ancestor of some node in 'heads'
749 1. N is an ancestor of some node in 'heads'
746 2. N is not an ancestor of any node in 'common'
750 2. N is not an ancestor of any node in 'common'
747
751
748 The list is sorted by revision number, meaning it is
752 The list is sorted by revision number, meaning it is
749 topologically sorted.
753 topologically sorted.
750
754
751 'heads' and 'common' are both lists of node IDs. If heads is
755 'heads' and 'common' are both lists of node IDs. If heads is
752 not supplied, uses all of the revlog's heads. If common is not
756 not supplied, uses all of the revlog's heads. If common is not
753 supplied, uses nullid."""
757 supplied, uses nullid."""
754 if common is None:
758 if common is None:
755 common = [nullid]
759 common = [nullid]
756 if heads is None:
760 if heads is None:
757 heads = self.heads()
761 heads = self.heads()
758
762
759 common = [self.rev(n) for n in common]
763 common = [self.rev(n) for n in common]
760 heads = [self.rev(n) for n in heads]
764 heads = [self.rev(n) for n in heads]
761
765
762 inc = self.incrementalmissingrevs(common=common)
766 inc = self.incrementalmissingrevs(common=common)
763 return [self.node(r) for r in inc.missingancestors(heads)]
767 return [self.node(r) for r in inc.missingancestors(heads)]
764
768
765 def nodesbetween(self, roots=None, heads=None):
769 def nodesbetween(self, roots=None, heads=None):
766 """Return a topological path from 'roots' to 'heads'.
770 """Return a topological path from 'roots' to 'heads'.
767
771
768 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
772 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
769 topologically sorted list of all nodes N that satisfy both of
773 topologically sorted list of all nodes N that satisfy both of
770 these constraints:
774 these constraints:
771
775
772 1. N is a descendant of some node in 'roots'
776 1. N is a descendant of some node in 'roots'
773 2. N is an ancestor of some node in 'heads'
777 2. N is an ancestor of some node in 'heads'
774
778
775 Every node is considered to be both a descendant and an ancestor
779 Every node is considered to be both a descendant and an ancestor
776 of itself, so every reachable node in 'roots' and 'heads' will be
780 of itself, so every reachable node in 'roots' and 'heads' will be
777 included in 'nodes'.
781 included in 'nodes'.
778
782
779 'outroots' is the list of reachable nodes in 'roots', i.e., the
783 'outroots' is the list of reachable nodes in 'roots', i.e., the
780 subset of 'roots' that is returned in 'nodes'. Likewise,
784 subset of 'roots' that is returned in 'nodes'. Likewise,
781 'outheads' is the subset of 'heads' that is also in 'nodes'.
785 'outheads' is the subset of 'heads' that is also in 'nodes'.
782
786
783 'roots' and 'heads' are both lists of node IDs. If 'roots' is
787 'roots' and 'heads' are both lists of node IDs. If 'roots' is
784 unspecified, uses nullid as the only root. If 'heads' is
788 unspecified, uses nullid as the only root. If 'heads' is
785 unspecified, uses list of all of the revlog's heads."""
789 unspecified, uses list of all of the revlog's heads."""
786 nonodes = ([], [], [])
790 nonodes = ([], [], [])
787 if roots is not None:
791 if roots is not None:
788 roots = list(roots)
792 roots = list(roots)
789 if not roots:
793 if not roots:
790 return nonodes
794 return nonodes
791 lowestrev = min([self.rev(n) for n in roots])
795 lowestrev = min([self.rev(n) for n in roots])
792 else:
796 else:
793 roots = [nullid] # Everybody's a descendant of nullid
797 roots = [nullid] # Everybody's a descendant of nullid
794 lowestrev = nullrev
798 lowestrev = nullrev
795 if (lowestrev == nullrev) and (heads is None):
799 if (lowestrev == nullrev) and (heads is None):
796 # We want _all_ the nodes!
800 # We want _all_ the nodes!
797 return ([self.node(r) for r in self], [nullid], list(self.heads()))
801 return ([self.node(r) for r in self], [nullid], list(self.heads()))
798 if heads is None:
802 if heads is None:
799 # All nodes are ancestors, so the latest ancestor is the last
803 # All nodes are ancestors, so the latest ancestor is the last
800 # node.
804 # node.
801 highestrev = len(self) - 1
805 highestrev = len(self) - 1
802 # Set ancestors to None to signal that every node is an ancestor.
806 # Set ancestors to None to signal that every node is an ancestor.
803 ancestors = None
807 ancestors = None
804 # Set heads to an empty dictionary for later discovery of heads
808 # Set heads to an empty dictionary for later discovery of heads
805 heads = {}
809 heads = {}
806 else:
810 else:
807 heads = list(heads)
811 heads = list(heads)
808 if not heads:
812 if not heads:
809 return nonodes
813 return nonodes
810 ancestors = set()
814 ancestors = set()
811 # Turn heads into a dictionary so we can remove 'fake' heads.
815 # Turn heads into a dictionary so we can remove 'fake' heads.
812 # Also, later we will be using it to filter out the heads we can't
816 # Also, later we will be using it to filter out the heads we can't
813 # find from roots.
817 # find from roots.
814 heads = dict.fromkeys(heads, False)
818 heads = dict.fromkeys(heads, False)
815 # Start at the top and keep marking parents until we're done.
819 # Start at the top and keep marking parents until we're done.
816 nodestotag = set(heads)
820 nodestotag = set(heads)
817 # Remember where the top was so we can use it as a limit later.
821 # Remember where the top was so we can use it as a limit later.
818 highestrev = max([self.rev(n) for n in nodestotag])
822 highestrev = max([self.rev(n) for n in nodestotag])
819 while nodestotag:
823 while nodestotag:
820 # grab a node to tag
824 # grab a node to tag
821 n = nodestotag.pop()
825 n = nodestotag.pop()
822 # Never tag nullid
826 # Never tag nullid
823 if n == nullid:
827 if n == nullid:
824 continue
828 continue
825 # A node's revision number represents its place in a
829 # A node's revision number represents its place in a
826 # topologically sorted list of nodes.
830 # topologically sorted list of nodes.
827 r = self.rev(n)
831 r = self.rev(n)
828 if r >= lowestrev:
832 if r >= lowestrev:
829 if n not in ancestors:
833 if n not in ancestors:
830 # If we are possibly a descendant of one of the roots
834 # If we are possibly a descendant of one of the roots
831 # and we haven't already been marked as an ancestor
835 # and we haven't already been marked as an ancestor
832 ancestors.add(n) # Mark as ancestor
836 ancestors.add(n) # Mark as ancestor
833 # Add non-nullid parents to list of nodes to tag.
837 # Add non-nullid parents to list of nodes to tag.
834 nodestotag.update([p for p in self.parents(n) if
838 nodestotag.update([p for p in self.parents(n) if
835 p != nullid])
839 p != nullid])
836 elif n in heads: # We've seen it before, is it a fake head?
840 elif n in heads: # We've seen it before, is it a fake head?
837 # So it is, real heads should not be the ancestors of
841 # So it is, real heads should not be the ancestors of
838 # any other heads.
842 # any other heads.
839 heads.pop(n)
843 heads.pop(n)
840 if not ancestors:
844 if not ancestors:
841 return nonodes
845 return nonodes
842 # Now that we have our set of ancestors, we want to remove any
846 # Now that we have our set of ancestors, we want to remove any
843 # roots that are not ancestors.
847 # roots that are not ancestors.
844
848
845 # If one of the roots was nullid, everything is included anyway.
849 # If one of the roots was nullid, everything is included anyway.
846 if lowestrev > nullrev:
850 if lowestrev > nullrev:
847 # But, since we weren't, let's recompute the lowest rev to not
851 # But, since we weren't, let's recompute the lowest rev to not
848 # include roots that aren't ancestors.
852 # include roots that aren't ancestors.
849
853
850 # Filter out roots that aren't ancestors of heads
854 # Filter out roots that aren't ancestors of heads
851 roots = [root for root in roots if root in ancestors]
855 roots = [root for root in roots if root in ancestors]
852 # Recompute the lowest revision
856 # Recompute the lowest revision
853 if roots:
857 if roots:
854 lowestrev = min([self.rev(root) for root in roots])
858 lowestrev = min([self.rev(root) for root in roots])
855 else:
859 else:
856 # No more roots? Return empty list
860 # No more roots? Return empty list
857 return nonodes
861 return nonodes
858 else:
862 else:
859 # We are descending from nullid, and don't need to care about
863 # We are descending from nullid, and don't need to care about
860 # any other roots.
864 # any other roots.
861 lowestrev = nullrev
865 lowestrev = nullrev
862 roots = [nullid]
866 roots = [nullid]
863 # Transform our roots list into a set.
867 # Transform our roots list into a set.
864 descendants = set(roots)
868 descendants = set(roots)
865 # Also, keep the original roots so we can filter out roots that aren't
869 # Also, keep the original roots so we can filter out roots that aren't
866 # 'real' roots (i.e. are descended from other roots).
870 # 'real' roots (i.e. are descended from other roots).
867 roots = descendants.copy()
871 roots = descendants.copy()
868 # Our topologically sorted list of output nodes.
872 # Our topologically sorted list of output nodes.
869 orderedout = []
873 orderedout = []
870 # Don't start at nullid since we don't want nullid in our output list,
874 # Don't start at nullid since we don't want nullid in our output list,
871 # and if nullid shows up in descendants, empty parents will look like
875 # and if nullid shows up in descendants, empty parents will look like
872 # they're descendants.
876 # they're descendants.
873 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
877 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
874 n = self.node(r)
878 n = self.node(r)
875 isdescendant = False
879 isdescendant = False
876 if lowestrev == nullrev: # Everybody is a descendant of nullid
880 if lowestrev == nullrev: # Everybody is a descendant of nullid
877 isdescendant = True
881 isdescendant = True
878 elif n in descendants:
882 elif n in descendants:
879 # n is already a descendant
883 # n is already a descendant
880 isdescendant = True
884 isdescendant = True
881 # This check only needs to be done here because all the roots
885 # This check only needs to be done here because all the roots
882 # will start being marked is descendants before the loop.
886 # will start being marked is descendants before the loop.
883 if n in roots:
887 if n in roots:
884 # If n was a root, check if it's a 'real' root.
888 # If n was a root, check if it's a 'real' root.
885 p = tuple(self.parents(n))
889 p = tuple(self.parents(n))
886 # If any of its parents are descendants, it's not a root.
890 # If any of its parents are descendants, it's not a root.
887 if (p[0] in descendants) or (p[1] in descendants):
891 if (p[0] in descendants) or (p[1] in descendants):
888 roots.remove(n)
892 roots.remove(n)
889 else:
893 else:
890 p = tuple(self.parents(n))
894 p = tuple(self.parents(n))
891 # A node is a descendant if either of its parents are
895 # A node is a descendant if either of its parents are
892 # descendants. (We seeded the dependents list with the roots
896 # descendants. (We seeded the dependents list with the roots
893 # up there, remember?)
897 # up there, remember?)
894 if (p[0] in descendants) or (p[1] in descendants):
898 if (p[0] in descendants) or (p[1] in descendants):
895 descendants.add(n)
899 descendants.add(n)
896 isdescendant = True
900 isdescendant = True
897 if isdescendant and ((ancestors is None) or (n in ancestors)):
901 if isdescendant and ((ancestors is None) or (n in ancestors)):
898 # Only include nodes that are both descendants and ancestors.
902 # Only include nodes that are both descendants and ancestors.
899 orderedout.append(n)
903 orderedout.append(n)
900 if (ancestors is not None) and (n in heads):
904 if (ancestors is not None) and (n in heads):
901 # We're trying to figure out which heads are reachable
905 # We're trying to figure out which heads are reachable
902 # from roots.
906 # from roots.
903 # Mark this head as having been reached
907 # Mark this head as having been reached
904 heads[n] = True
908 heads[n] = True
905 elif ancestors is None:
909 elif ancestors is None:
906 # Otherwise, we're trying to discover the heads.
910 # Otherwise, we're trying to discover the heads.
907 # Assume this is a head because if it isn't, the next step
911 # Assume this is a head because if it isn't, the next step
908 # will eventually remove it.
912 # will eventually remove it.
909 heads[n] = True
913 heads[n] = True
910 # But, obviously its parents aren't.
914 # But, obviously its parents aren't.
911 for p in self.parents(n):
915 for p in self.parents(n):
912 heads.pop(p, None)
916 heads.pop(p, None)
913 heads = [head for head, flag in heads.iteritems() if flag]
917 heads = [head for head, flag in heads.iteritems() if flag]
914 roots = list(roots)
918 roots = list(roots)
915 assert orderedout
919 assert orderedout
916 assert roots
920 assert roots
917 assert heads
921 assert heads
918 return (orderedout, roots, heads)
922 return (orderedout, roots, heads)
919
923
920 def headrevs(self):
924 def headrevs(self):
921 try:
925 try:
922 return self.index.headrevs()
926 return self.index.headrevs()
923 except AttributeError:
927 except AttributeError:
924 return self._headrevs()
928 return self._headrevs()
925
929
926 def computephases(self, roots):
930 def computephases(self, roots):
927 return self.index.computephasesmapsets(roots)
931 return self.index.computephasesmapsets(roots)
928
932
929 def _headrevs(self):
933 def _headrevs(self):
930 count = len(self)
934 count = len(self)
931 if not count:
935 if not count:
932 return [nullrev]
936 return [nullrev]
933 # we won't iter over filtered rev so nobody is a head at start
937 # we won't iter over filtered rev so nobody is a head at start
934 ishead = [0] * (count + 1)
938 ishead = [0] * (count + 1)
935 index = self.index
939 index = self.index
936 for r in self:
940 for r in self:
937 ishead[r] = 1 # I may be an head
941 ishead[r] = 1 # I may be an head
938 e = index[r]
942 e = index[r]
939 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
943 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
940 return [r for r, val in enumerate(ishead) if val]
944 return [r for r, val in enumerate(ishead) if val]
941
945
942 def heads(self, start=None, stop=None):
946 def heads(self, start=None, stop=None):
943 """return the list of all nodes that have no children
947 """return the list of all nodes that have no children
944
948
945 if start is specified, only heads that are descendants of
949 if start is specified, only heads that are descendants of
946 start will be returned
950 start will be returned
947 if stop is specified, it will consider all the revs from stop
951 if stop is specified, it will consider all the revs from stop
948 as if they had no children
952 as if they had no children
949 """
953 """
950 if start is None and stop is None:
954 if start is None and stop is None:
951 if not len(self):
955 if not len(self):
952 return [nullid]
956 return [nullid]
953 return [self.node(r) for r in self.headrevs()]
957 return [self.node(r) for r in self.headrevs()]
954
958
955 if start is None:
959 if start is None:
956 start = nullid
960 start = nullid
957 if stop is None:
961 if stop is None:
958 stop = []
962 stop = []
959 stoprevs = set([self.rev(n) for n in stop])
963 stoprevs = set([self.rev(n) for n in stop])
960 startrev = self.rev(start)
964 startrev = self.rev(start)
961 reachable = {startrev}
965 reachable = {startrev}
962 heads = {startrev}
966 heads = {startrev}
963
967
964 parentrevs = self.parentrevs
968 parentrevs = self.parentrevs
965 for r in self.revs(start=startrev + 1):
969 for r in self.revs(start=startrev + 1):
966 for p in parentrevs(r):
970 for p in parentrevs(r):
967 if p in reachable:
971 if p in reachable:
968 if r not in stoprevs:
972 if r not in stoprevs:
969 reachable.add(r)
973 reachable.add(r)
970 heads.add(r)
974 heads.add(r)
971 if p in heads and p not in stoprevs:
975 if p in heads and p not in stoprevs:
972 heads.remove(p)
976 heads.remove(p)
973
977
974 return [self.node(r) for r in heads]
978 return [self.node(r) for r in heads]
975
979
976 def children(self, node):
980 def children(self, node):
977 """find the children of a given node"""
981 """find the children of a given node"""
978 c = []
982 c = []
979 p = self.rev(node)
983 p = self.rev(node)
980 for r in self.revs(start=p + 1):
984 for r in self.revs(start=p + 1):
981 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
985 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
982 if prevs:
986 if prevs:
983 for pr in prevs:
987 for pr in prevs:
984 if pr == p:
988 if pr == p:
985 c.append(self.node(r))
989 c.append(self.node(r))
986 elif p == nullrev:
990 elif p == nullrev:
987 c.append(self.node(r))
991 c.append(self.node(r))
988 return c
992 return c
989
993
990 def descendant(self, start, end):
994 def descendant(self, start, end):
991 if start == nullrev:
995 if start == nullrev:
992 return True
996 return True
993 for i in self.descendants([start]):
997 for i in self.descendants([start]):
994 if i == end:
998 if i == end:
995 return True
999 return True
996 elif i > end:
1000 elif i > end:
997 break
1001 break
998 return False
1002 return False
999
1003
1000 def commonancestorsheads(self, a, b):
1004 def commonancestorsheads(self, a, b):
1001 """calculate all the heads of the common ancestors of nodes a and b"""
1005 """calculate all the heads of the common ancestors of nodes a and b"""
1002 a, b = self.rev(a), self.rev(b)
1006 a, b = self.rev(a), self.rev(b)
1003 try:
1007 try:
1004 ancs = self.index.commonancestorsheads(a, b)
1008 ancs = self.index.commonancestorsheads(a, b)
1005 except (AttributeError, OverflowError): # C implementation failed
1009 except (AttributeError, OverflowError): # C implementation failed
1006 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1010 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1007 return pycompat.maplist(self.node, ancs)
1011 return pycompat.maplist(self.node, ancs)
1008
1012
1009 def isancestor(self, a, b):
1013 def isancestor(self, a, b):
1010 """return True if node a is an ancestor of node b
1014 """return True if node a is an ancestor of node b
1011
1015
1012 The implementation of this is trivial but the use of
1016 The implementation of this is trivial but the use of
1013 commonancestorsheads is not."""
1017 commonancestorsheads is not."""
1014 return a in self.commonancestorsheads(a, b)
1018 return a in self.commonancestorsheads(a, b)
1015
1019
1016 def ancestor(self, a, b):
1020 def ancestor(self, a, b):
1017 """calculate the "best" common ancestor of nodes a and b"""
1021 """calculate the "best" common ancestor of nodes a and b"""
1018
1022
1019 a, b = self.rev(a), self.rev(b)
1023 a, b = self.rev(a), self.rev(b)
1020 try:
1024 try:
1021 ancs = self.index.ancestors(a, b)
1025 ancs = self.index.ancestors(a, b)
1022 except (AttributeError, OverflowError):
1026 except (AttributeError, OverflowError):
1023 ancs = ancestor.ancestors(self.parentrevs, a, b)
1027 ancs = ancestor.ancestors(self.parentrevs, a, b)
1024 if ancs:
1028 if ancs:
1025 # choose a consistent winner when there's a tie
1029 # choose a consistent winner when there's a tie
1026 return min(map(self.node, ancs))
1030 return min(map(self.node, ancs))
1027 return nullid
1031 return nullid
1028
1032
1029 def _match(self, id):
1033 def _match(self, id):
1030 if isinstance(id, int):
1034 if isinstance(id, int):
1031 # rev
1035 # rev
1032 return self.node(id)
1036 return self.node(id)
1033 if len(id) == 20:
1037 if len(id) == 20:
1034 # possibly a binary node
1038 # possibly a binary node
1035 # odds of a binary node being all hex in ASCII are 1 in 10**25
1039 # odds of a binary node being all hex in ASCII are 1 in 10**25
1036 try:
1040 try:
1037 node = id
1041 node = id
1038 self.rev(node) # quick search the index
1042 self.rev(node) # quick search the index
1039 return node
1043 return node
1040 except LookupError:
1044 except LookupError:
1041 pass # may be partial hex id
1045 pass # may be partial hex id
1042 try:
1046 try:
1043 # str(rev)
1047 # str(rev)
1044 rev = int(id)
1048 rev = int(id)
1045 if str(rev) != id:
1049 if str(rev) != id:
1046 raise ValueError
1050 raise ValueError
1047 if rev < 0:
1051 if rev < 0:
1048 rev = len(self) + rev
1052 rev = len(self) + rev
1049 if rev < 0 or rev >= len(self):
1053 if rev < 0 or rev >= len(self):
1050 raise ValueError
1054 raise ValueError
1051 return self.node(rev)
1055 return self.node(rev)
1052 except (ValueError, OverflowError):
1056 except (ValueError, OverflowError):
1053 pass
1057 pass
1054 if len(id) == 40:
1058 if len(id) == 40:
1055 try:
1059 try:
1056 # a full hex nodeid?
1060 # a full hex nodeid?
1057 node = bin(id)
1061 node = bin(id)
1058 self.rev(node)
1062 self.rev(node)
1059 return node
1063 return node
1060 except (TypeError, LookupError):
1064 except (TypeError, LookupError):
1061 pass
1065 pass
1062
1066
1063 def _partialmatch(self, id):
1067 def _partialmatch(self, id):
1064 maybewdir = wdirhex.startswith(id)
1068 maybewdir = wdirhex.startswith(id)
1065 try:
1069 try:
1066 partial = self.index.partialmatch(id)
1070 partial = self.index.partialmatch(id)
1067 if partial and self.hasnode(partial):
1071 if partial and self.hasnode(partial):
1068 if maybewdir:
1072 if maybewdir:
1069 # single 'ff...' match in radix tree, ambiguous with wdir
1073 # single 'ff...' match in radix tree, ambiguous with wdir
1070 raise RevlogError
1074 raise RevlogError
1071 return partial
1075 return partial
1072 if maybewdir:
1076 if maybewdir:
1073 # no 'ff...' match in radix tree, wdir identified
1077 # no 'ff...' match in radix tree, wdir identified
1074 raise error.WdirUnsupported
1078 raise error.WdirUnsupported
1075 return None
1079 return None
1076 except RevlogError:
1080 except RevlogError:
1077 # parsers.c radix tree lookup gave multiple matches
1081 # parsers.c radix tree lookup gave multiple matches
1078 # fast path: for unfiltered changelog, radix tree is accurate
1082 # fast path: for unfiltered changelog, radix tree is accurate
1079 if not getattr(self, 'filteredrevs', None):
1083 if not getattr(self, 'filteredrevs', None):
1080 raise LookupError(id, self.indexfile,
1084 raise LookupError(id, self.indexfile,
1081 _('ambiguous identifier'))
1085 _('ambiguous identifier'))
1082 # fall through to slow path that filters hidden revisions
1086 # fall through to slow path that filters hidden revisions
1083 except (AttributeError, ValueError):
1087 except (AttributeError, ValueError):
1084 # we are pure python, or key was too short to search radix tree
1088 # we are pure python, or key was too short to search radix tree
1085 pass
1089 pass
1086
1090
1087 if id in self._pcache:
1091 if id in self._pcache:
1088 return self._pcache[id]
1092 return self._pcache[id]
1089
1093
1090 if len(id) < 40:
1094 if len(id) < 40:
1091 try:
1095 try:
1092 # hex(node)[:...]
1096 # hex(node)[:...]
1093 l = len(id) // 2 # grab an even number of digits
1097 l = len(id) // 2 # grab an even number of digits
1094 prefix = bin(id[:l * 2])
1098 prefix = bin(id[:l * 2])
1095 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1099 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1096 nl = [n for n in nl if hex(n).startswith(id) and
1100 nl = [n for n in nl if hex(n).startswith(id) and
1097 self.hasnode(n)]
1101 self.hasnode(n)]
1098 if len(nl) > 0:
1102 if len(nl) > 0:
1099 if len(nl) == 1 and not maybewdir:
1103 if len(nl) == 1 and not maybewdir:
1100 self._pcache[id] = nl[0]
1104 self._pcache[id] = nl[0]
1101 return nl[0]
1105 return nl[0]
1102 raise LookupError(id, self.indexfile,
1106 raise LookupError(id, self.indexfile,
1103 _('ambiguous identifier'))
1107 _('ambiguous identifier'))
1104 if maybewdir:
1108 if maybewdir:
1105 raise error.WdirUnsupported
1109 raise error.WdirUnsupported
1106 return None
1110 return None
1107 except (TypeError, binascii.Error):
1111 except (TypeError, binascii.Error):
1108 pass
1112 pass
1109
1113
1110 def lookup(self, id):
1114 def lookup(self, id):
1111 """locate a node based on:
1115 """locate a node based on:
1112 - revision number or str(revision number)
1116 - revision number or str(revision number)
1113 - nodeid or subset of hex nodeid
1117 - nodeid or subset of hex nodeid
1114 """
1118 """
1115 n = self._match(id)
1119 n = self._match(id)
1116 if n is not None:
1120 if n is not None:
1117 return n
1121 return n
1118 n = self._partialmatch(id)
1122 n = self._partialmatch(id)
1119 if n:
1123 if n:
1120 return n
1124 return n
1121
1125
1122 raise LookupError(id, self.indexfile, _('no match found'))
1126 raise LookupError(id, self.indexfile, _('no match found'))
1123
1127
1124 def cmp(self, node, text):
1128 def cmp(self, node, text):
1125 """compare text with a given file revision
1129 """compare text with a given file revision
1126
1130
1127 returns True if text is different than what is stored.
1131 returns True if text is different than what is stored.
1128 """
1132 """
1129 p1, p2 = self.parents(node)
1133 p1, p2 = self.parents(node)
1130 return hash(text, p1, p2) != node
1134 return hash(text, p1, p2) != node
1131
1135
1132 def _cachesegment(self, offset, data):
1136 def _cachesegment(self, offset, data):
1133 """Add a segment to the revlog cache.
1137 """Add a segment to the revlog cache.
1134
1138
1135 Accepts an absolute offset and the data that is at that location.
1139 Accepts an absolute offset and the data that is at that location.
1136 """
1140 """
1137 o, d = self._chunkcache
1141 o, d = self._chunkcache
1138 # try to add to existing cache
1142 # try to add to existing cache
1139 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1143 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1140 self._chunkcache = o, d + data
1144 self._chunkcache = o, d + data
1141 else:
1145 else:
1142 self._chunkcache = offset, data
1146 self._chunkcache = offset, data
1143
1147
1144 def _readsegment(self, offset, length, df=None):
1148 def _readsegment(self, offset, length, df=None):
1145 """Load a segment of raw data from the revlog.
1149 """Load a segment of raw data from the revlog.
1146
1150
1147 Accepts an absolute offset, length to read, and an optional existing
1151 Accepts an absolute offset, length to read, and an optional existing
1148 file handle to read from.
1152 file handle to read from.
1149
1153
1150 If an existing file handle is passed, it will be seeked and the
1154 If an existing file handle is passed, it will be seeked and the
1151 original seek position will NOT be restored.
1155 original seek position will NOT be restored.
1152
1156
1153 Returns a str or buffer of raw byte data.
1157 Returns a str or buffer of raw byte data.
1154 """
1158 """
1155 if df is not None:
1159 if df is not None:
1156 closehandle = False
1160 closehandle = False
1157 else:
1161 else:
1158 if self._inline:
1162 if self._inline:
1159 df = self.opener(self.indexfile)
1163 df = self.opener(self.indexfile)
1160 else:
1164 else:
1161 df = self.opener(self.datafile)
1165 df = self.opener(self.datafile)
1162 closehandle = True
1166 closehandle = True
1163
1167
1164 # Cache data both forward and backward around the requested
1168 # Cache data both forward and backward around the requested
1165 # data, in a fixed size window. This helps speed up operations
1169 # data, in a fixed size window. This helps speed up operations
1166 # involving reading the revlog backwards.
1170 # involving reading the revlog backwards.
1167 cachesize = self._chunkcachesize
1171 cachesize = self._chunkcachesize
1168 realoffset = offset & ~(cachesize - 1)
1172 realoffset = offset & ~(cachesize - 1)
1169 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1173 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1170 - realoffset)
1174 - realoffset)
1171 df.seek(realoffset)
1175 df.seek(realoffset)
1172 d = df.read(reallength)
1176 d = df.read(reallength)
1173 if closehandle:
1177 if closehandle:
1174 df.close()
1178 df.close()
1175 self._cachesegment(realoffset, d)
1179 self._cachesegment(realoffset, d)
1176 if offset != realoffset or reallength != length:
1180 if offset != realoffset or reallength != length:
1177 return util.buffer(d, offset - realoffset, length)
1181 return util.buffer(d, offset - realoffset, length)
1178 return d
1182 return d
1179
1183
1180 def _getsegment(self, offset, length, df=None):
1184 def _getsegment(self, offset, length, df=None):
1181 """Obtain a segment of raw data from the revlog.
1185 """Obtain a segment of raw data from the revlog.
1182
1186
1183 Accepts an absolute offset, length of bytes to obtain, and an
1187 Accepts an absolute offset, length of bytes to obtain, and an
1184 optional file handle to the already-opened revlog. If the file
1188 optional file handle to the already-opened revlog. If the file
1185 handle is used, it's original seek position will not be preserved.
1189 handle is used, it's original seek position will not be preserved.
1186
1190
1187 Requests for data may be returned from a cache.
1191 Requests for data may be returned from a cache.
1188
1192
1189 Returns a str or a buffer instance of raw byte data.
1193 Returns a str or a buffer instance of raw byte data.
1190 """
1194 """
1191 o, d = self._chunkcache
1195 o, d = self._chunkcache
1192 l = len(d)
1196 l = len(d)
1193
1197
1194 # is it in the cache?
1198 # is it in the cache?
1195 cachestart = offset - o
1199 cachestart = offset - o
1196 cacheend = cachestart + length
1200 cacheend = cachestart + length
1197 if cachestart >= 0 and cacheend <= l:
1201 if cachestart >= 0 and cacheend <= l:
1198 if cachestart == 0 and cacheend == l:
1202 if cachestart == 0 and cacheend == l:
1199 return d # avoid a copy
1203 return d # avoid a copy
1200 return util.buffer(d, cachestart, cacheend - cachestart)
1204 return util.buffer(d, cachestart, cacheend - cachestart)
1201
1205
1202 return self._readsegment(offset, length, df=df)
1206 return self._readsegment(offset, length, df=df)
1203
1207
1204 def _getsegmentforrevs(self, startrev, endrev, df=None):
1208 def _getsegmentforrevs(self, startrev, endrev, df=None):
1205 """Obtain a segment of raw data corresponding to a range of revisions.
1209 """Obtain a segment of raw data corresponding to a range of revisions.
1206
1210
1207 Accepts the start and end revisions and an optional already-open
1211 Accepts the start and end revisions and an optional already-open
1208 file handle to be used for reading. If the file handle is read, its
1212 file handle to be used for reading. If the file handle is read, its
1209 seek position will not be preserved.
1213 seek position will not be preserved.
1210
1214
1211 Requests for data may be satisfied by a cache.
1215 Requests for data may be satisfied by a cache.
1212
1216
1213 Returns a 2-tuple of (offset, data) for the requested range of
1217 Returns a 2-tuple of (offset, data) for the requested range of
1214 revisions. Offset is the integer offset from the beginning of the
1218 revisions. Offset is the integer offset from the beginning of the
1215 revlog and data is a str or buffer of the raw byte data.
1219 revlog and data is a str or buffer of the raw byte data.
1216
1220
1217 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1221 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1218 to determine where each revision's data begins and ends.
1222 to determine where each revision's data begins and ends.
1219 """
1223 """
1220 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1224 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1221 # (functions are expensive).
1225 # (functions are expensive).
1222 index = self.index
1226 index = self.index
1223 istart = index[startrev]
1227 istart = index[startrev]
1224 start = int(istart[0] >> 16)
1228 start = int(istart[0] >> 16)
1225 if startrev == endrev:
1229 if startrev == endrev:
1226 end = start + istart[1]
1230 end = start + istart[1]
1227 else:
1231 else:
1228 iend = index[endrev]
1232 iend = index[endrev]
1229 end = int(iend[0] >> 16) + iend[1]
1233 end = int(iend[0] >> 16) + iend[1]
1230
1234
1231 if self._inline:
1235 if self._inline:
1232 start += (startrev + 1) * self._io.size
1236 start += (startrev + 1) * self._io.size
1233 end += (endrev + 1) * self._io.size
1237 end += (endrev + 1) * self._io.size
1234 length = end - start
1238 length = end - start
1235
1239
1236 return start, self._getsegment(start, length, df=df)
1240 return start, self._getsegment(start, length, df=df)
1237
1241
1238 def _chunk(self, rev, df=None):
1242 def _chunk(self, rev, df=None):
1239 """Obtain a single decompressed chunk for a revision.
1243 """Obtain a single decompressed chunk for a revision.
1240
1244
1241 Accepts an integer revision and an optional already-open file handle
1245 Accepts an integer revision and an optional already-open file handle
1242 to be used for reading. If used, the seek position of the file will not
1246 to be used for reading. If used, the seek position of the file will not
1243 be preserved.
1247 be preserved.
1244
1248
1245 Returns a str holding uncompressed data for the requested revision.
1249 Returns a str holding uncompressed data for the requested revision.
1246 """
1250 """
1247 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1251 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1248
1252
1249 def _chunks(self, revs, df=None):
1253 def _chunks(self, revs, df=None):
1250 """Obtain decompressed chunks for the specified revisions.
1254 """Obtain decompressed chunks for the specified revisions.
1251
1255
1252 Accepts an iterable of numeric revisions that are assumed to be in
1256 Accepts an iterable of numeric revisions that are assumed to be in
1253 ascending order. Also accepts an optional already-open file handle
1257 ascending order. Also accepts an optional already-open file handle
1254 to be used for reading. If used, the seek position of the file will
1258 to be used for reading. If used, the seek position of the file will
1255 not be preserved.
1259 not be preserved.
1256
1260
1257 This function is similar to calling ``self._chunk()`` multiple times,
1261 This function is similar to calling ``self._chunk()`` multiple times,
1258 but is faster.
1262 but is faster.
1259
1263
1260 Returns a list with decompressed data for each requested revision.
1264 Returns a list with decompressed data for each requested revision.
1261 """
1265 """
1262 if not revs:
1266 if not revs:
1263 return []
1267 return []
1264 start = self.start
1268 start = self.start
1265 length = self.length
1269 length = self.length
1266 inline = self._inline
1270 inline = self._inline
1267 iosize = self._io.size
1271 iosize = self._io.size
1268 buffer = util.buffer
1272 buffer = util.buffer
1269
1273
1270 l = []
1274 l = []
1271 ladd = l.append
1275 ladd = l.append
1272
1276
1273 try:
1277 try:
1274 offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)
1278 offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)
1275 except OverflowError:
1279 except OverflowError:
1276 # issue4215 - we can't cache a run of chunks greater than
1280 # issue4215 - we can't cache a run of chunks greater than
1277 # 2G on Windows
1281 # 2G on Windows
1278 return [self._chunk(rev, df=df) for rev in revs]
1282 return [self._chunk(rev, df=df) for rev in revs]
1279
1283
1280 decomp = self.decompress
1284 decomp = self.decompress
1281 for rev in revs:
1285 for rev in revs:
1282 chunkstart = start(rev)
1286 chunkstart = start(rev)
1283 if inline:
1287 if inline:
1284 chunkstart += (rev + 1) * iosize
1288 chunkstart += (rev + 1) * iosize
1285 chunklength = length(rev)
1289 chunklength = length(rev)
1286 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1290 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1287
1291
1288 return l
1292 return l
1289
1293
1290 def _chunkclear(self):
1294 def _chunkclear(self):
1291 """Clear the raw chunk cache."""
1295 """Clear the raw chunk cache."""
1292 self._chunkcache = (0, '')
1296 self._chunkcache = (0, '')
1293
1297
1294 def deltaparent(self, rev):
1298 def deltaparent(self, rev):
1295 """return deltaparent of the given revision"""
1299 """return deltaparent of the given revision"""
1296 base = self.index[rev][3]
1300 base = self.index[rev][3]
1297 if base == rev:
1301 if base == rev:
1298 return nullrev
1302 return nullrev
1299 elif self._generaldelta:
1303 elif self._generaldelta:
1300 return base
1304 return base
1301 else:
1305 else:
1302 return rev - 1
1306 return rev - 1
1303
1307
1304 def revdiff(self, rev1, rev2):
1308 def revdiff(self, rev1, rev2):
1305 """return or calculate a delta between two revisions
1309 """return or calculate a delta between two revisions
1306
1310
1307 The delta calculated is in binary form and is intended to be written to
1311 The delta calculated is in binary form and is intended to be written to
1308 revlog data directly. So this function needs raw revision data.
1312 revlog data directly. So this function needs raw revision data.
1309 """
1313 """
1310 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1314 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1311 return bytes(self._chunk(rev2))
1315 return bytes(self._chunk(rev2))
1312
1316
1313 return mdiff.textdiff(self.revision(rev1, raw=True),
1317 return mdiff.textdiff(self.revision(rev1, raw=True),
1314 self.revision(rev2, raw=True))
1318 self.revision(rev2, raw=True))
1315
1319
1316 def revision(self, nodeorrev, _df=None, raw=False):
1320 def revision(self, nodeorrev, _df=None, raw=False):
1317 """return an uncompressed revision of a given node or revision
1321 """return an uncompressed revision of a given node or revision
1318 number.
1322 number.
1319
1323
1320 _df - an existing file handle to read from. (internal-only)
1324 _df - an existing file handle to read from. (internal-only)
1321 raw - an optional argument specifying if the revision data is to be
1325 raw - an optional argument specifying if the revision data is to be
1322 treated as raw data when applying flag transforms. 'raw' should be set
1326 treated as raw data when applying flag transforms. 'raw' should be set
1323 to True when generating changegroups or in debug commands.
1327 to True when generating changegroups or in debug commands.
1324 """
1328 """
1325 if isinstance(nodeorrev, int):
1329 if isinstance(nodeorrev, int):
1326 rev = nodeorrev
1330 rev = nodeorrev
1327 node = self.node(rev)
1331 node = self.node(rev)
1328 else:
1332 else:
1329 node = nodeorrev
1333 node = nodeorrev
1330 rev = None
1334 rev = None
1331
1335
1332 cachedrev = None
1336 cachedrev = None
1333 flags = None
1337 flags = None
1334 rawtext = None
1338 rawtext = None
1335 if node == nullid:
1339 if node == nullid:
1336 return ""
1340 return ""
1337 if self._cache:
1341 if self._cache:
1338 if self._cache[0] == node:
1342 if self._cache[0] == node:
1339 # _cache only stores rawtext
1343 # _cache only stores rawtext
1340 if raw:
1344 if raw:
1341 return self._cache[2]
1345 return self._cache[2]
1342 # duplicated, but good for perf
1346 # duplicated, but good for perf
1343 if rev is None:
1347 if rev is None:
1344 rev = self.rev(node)
1348 rev = self.rev(node)
1345 if flags is None:
1349 if flags is None:
1346 flags = self.flags(rev)
1350 flags = self.flags(rev)
1347 # no extra flags set, no flag processor runs, text = rawtext
1351 # no extra flags set, no flag processor runs, text = rawtext
1348 if flags == REVIDX_DEFAULT_FLAGS:
1352 if flags == REVIDX_DEFAULT_FLAGS:
1349 return self._cache[2]
1353 return self._cache[2]
1350 # rawtext is reusable. need to run flag processor
1354 # rawtext is reusable. need to run flag processor
1351 rawtext = self._cache[2]
1355 rawtext = self._cache[2]
1352
1356
1353 cachedrev = self._cache[1]
1357 cachedrev = self._cache[1]
1354
1358
1355 # look up what we need to read
1359 # look up what we need to read
1356 if rawtext is None:
1360 if rawtext is None:
1357 if rev is None:
1361 if rev is None:
1358 rev = self.rev(node)
1362 rev = self.rev(node)
1359
1363
1360 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1364 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1361 if stopped:
1365 if stopped:
1362 rawtext = self._cache[2]
1366 rawtext = self._cache[2]
1363
1367
1364 # drop cache to save memory
1368 # drop cache to save memory
1365 self._cache = None
1369 self._cache = None
1366
1370
1367 bins = self._chunks(chain, df=_df)
1371 bins = self._chunks(chain, df=_df)
1368 if rawtext is None:
1372 if rawtext is None:
1369 rawtext = bytes(bins[0])
1373 rawtext = bytes(bins[0])
1370 bins = bins[1:]
1374 bins = bins[1:]
1371
1375
1372 rawtext = mdiff.patches(rawtext, bins)
1376 rawtext = mdiff.patches(rawtext, bins)
1373 self._cache = (node, rev, rawtext)
1377 self._cache = (node, rev, rawtext)
1374
1378
1375 if flags is None:
1379 if flags is None:
1376 if rev is None:
1380 if rev is None:
1377 rev = self.rev(node)
1381 rev = self.rev(node)
1378 flags = self.flags(rev)
1382 flags = self.flags(rev)
1379
1383
1380 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1384 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1381 if validatehash:
1385 if validatehash:
1382 self.checkhash(text, node, rev=rev)
1386 self.checkhash(text, node, rev=rev)
1383
1387
1384 return text
1388 return text
1385
1389
1386 def hash(self, text, p1, p2):
1390 def hash(self, text, p1, p2):
1387 """Compute a node hash.
1391 """Compute a node hash.
1388
1392
1389 Available as a function so that subclasses can replace the hash
1393 Available as a function so that subclasses can replace the hash
1390 as needed.
1394 as needed.
1391 """
1395 """
1392 return hash(text, p1, p2)
1396 return hash(text, p1, p2)
1393
1397
1394 def _processflags(self, text, flags, operation, raw=False):
1398 def _processflags(self, text, flags, operation, raw=False):
1395 """Inspect revision data flags and applies transforms defined by
1399 """Inspect revision data flags and applies transforms defined by
1396 registered flag processors.
1400 registered flag processors.
1397
1401
1398 ``text`` - the revision data to process
1402 ``text`` - the revision data to process
1399 ``flags`` - the revision flags
1403 ``flags`` - the revision flags
1400 ``operation`` - the operation being performed (read or write)
1404 ``operation`` - the operation being performed (read or write)
1401 ``raw`` - an optional argument describing if the raw transform should be
1405 ``raw`` - an optional argument describing if the raw transform should be
1402 applied.
1406 applied.
1403
1407
1404 This method processes the flags in the order (or reverse order if
1408 This method processes the flags in the order (or reverse order if
1405 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1409 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1406 flag processors registered for present flags. The order of flags defined
1410 flag processors registered for present flags. The order of flags defined
1407 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1411 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1408
1412
1409 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1413 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1410 processed text and ``validatehash`` is a bool indicating whether the
1414 processed text and ``validatehash`` is a bool indicating whether the
1411 returned text should be checked for hash integrity.
1415 returned text should be checked for hash integrity.
1412
1416
1413 Note: If the ``raw`` argument is set, it has precedence over the
1417 Note: If the ``raw`` argument is set, it has precedence over the
1414 operation and will only update the value of ``validatehash``.
1418 operation and will only update the value of ``validatehash``.
1415 """
1419 """
1416 # fast path: no flag processors will run
1420 # fast path: no flag processors will run
1417 if flags == 0:
1421 if flags == 0:
1418 return text, True
1422 return text, True
1419 if not operation in ('read', 'write'):
1423 if not operation in ('read', 'write'):
1420 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1424 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1421 # Check all flags are known.
1425 # Check all flags are known.
1422 if flags & ~REVIDX_KNOWN_FLAGS:
1426 if flags & ~REVIDX_KNOWN_FLAGS:
1423 raise RevlogError(_("incompatible revision flag '%#x'") %
1427 raise RevlogError(_("incompatible revision flag '%#x'") %
1424 (flags & ~REVIDX_KNOWN_FLAGS))
1428 (flags & ~REVIDX_KNOWN_FLAGS))
1425 validatehash = True
1429 validatehash = True
1426 # Depending on the operation (read or write), the order might be
1430 # Depending on the operation (read or write), the order might be
1427 # reversed due to non-commutative transforms.
1431 # reversed due to non-commutative transforms.
1428 orderedflags = REVIDX_FLAGS_ORDER
1432 orderedflags = REVIDX_FLAGS_ORDER
1429 if operation == 'write':
1433 if operation == 'write':
1430 orderedflags = reversed(orderedflags)
1434 orderedflags = reversed(orderedflags)
1431
1435
1432 for flag in orderedflags:
1436 for flag in orderedflags:
1433 # If a flagprocessor has been registered for a known flag, apply the
1437 # If a flagprocessor has been registered for a known flag, apply the
1434 # related operation transform and update result tuple.
1438 # related operation transform and update result tuple.
1435 if flag & flags:
1439 if flag & flags:
1436 vhash = True
1440 vhash = True
1437
1441
1438 if flag not in _flagprocessors:
1442 if flag not in _flagprocessors:
1439 message = _("missing processor for flag '%#x'") % (flag)
1443 message = _("missing processor for flag '%#x'") % (flag)
1440 raise RevlogError(message)
1444 raise RevlogError(message)
1441
1445
1442 processor = _flagprocessors[flag]
1446 processor = _flagprocessors[flag]
1443 if processor is not None:
1447 if processor is not None:
1444 readtransform, writetransform, rawtransform = processor
1448 readtransform, writetransform, rawtransform = processor
1445
1449
1446 if raw:
1450 if raw:
1447 vhash = rawtransform(self, text)
1451 vhash = rawtransform(self, text)
1448 elif operation == 'read':
1452 elif operation == 'read':
1449 text, vhash = readtransform(self, text)
1453 text, vhash = readtransform(self, text)
1450 else: # write operation
1454 else: # write operation
1451 text, vhash = writetransform(self, text)
1455 text, vhash = writetransform(self, text)
1452 validatehash = validatehash and vhash
1456 validatehash = validatehash and vhash
1453
1457
1454 return text, validatehash
1458 return text, validatehash
1455
1459
1456 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1460 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1457 """Check node hash integrity.
1461 """Check node hash integrity.
1458
1462
1459 Available as a function so that subclasses can extend hash mismatch
1463 Available as a function so that subclasses can extend hash mismatch
1460 behaviors as needed.
1464 behaviors as needed.
1461 """
1465 """
1462 if p1 is None and p2 is None:
1466 if p1 is None and p2 is None:
1463 p1, p2 = self.parents(node)
1467 p1, p2 = self.parents(node)
1464 if node != self.hash(text, p1, p2):
1468 if node != self.hash(text, p1, p2):
1465 revornode = rev
1469 revornode = rev
1466 if revornode is None:
1470 if revornode is None:
1467 revornode = templatefilters.short(hex(node))
1471 revornode = templatefilters.short(hex(node))
1468 raise RevlogError(_("integrity check failed on %s:%s")
1472 raise RevlogError(_("integrity check failed on %s:%s")
1469 % (self.indexfile, revornode))
1473 % (self.indexfile, revornode))
1470
1474
1471 def checkinlinesize(self, tr, fp=None):
1475 def checkinlinesize(self, tr, fp=None):
1472 """Check if the revlog is too big for inline and convert if so.
1476 """Check if the revlog is too big for inline and convert if so.
1473
1477
1474 This should be called after revisions are added to the revlog. If the
1478 This should be called after revisions are added to the revlog. If the
1475 revlog has grown too large to be an inline revlog, it will convert it
1479 revlog has grown too large to be an inline revlog, it will convert it
1476 to use multiple index and data files.
1480 to use multiple index and data files.
1477 """
1481 """
1478 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1482 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1479 return
1483 return
1480
1484
1481 trinfo = tr.find(self.indexfile)
1485 trinfo = tr.find(self.indexfile)
1482 if trinfo is None:
1486 if trinfo is None:
1483 raise RevlogError(_("%s not found in the transaction")
1487 raise RevlogError(_("%s not found in the transaction")
1484 % self.indexfile)
1488 % self.indexfile)
1485
1489
1486 trindex = trinfo[2]
1490 trindex = trinfo[2]
1487 if trindex is not None:
1491 if trindex is not None:
1488 dataoff = self.start(trindex)
1492 dataoff = self.start(trindex)
1489 else:
1493 else:
1490 # revlog was stripped at start of transaction, use all leftover data
1494 # revlog was stripped at start of transaction, use all leftover data
1491 trindex = len(self) - 1
1495 trindex = len(self) - 1
1492 dataoff = self.end(-2)
1496 dataoff = self.end(-2)
1493
1497
1494 tr.add(self.datafile, dataoff)
1498 tr.add(self.datafile, dataoff)
1495
1499
1496 if fp:
1500 if fp:
1497 fp.flush()
1501 fp.flush()
1498 fp.close()
1502 fp.close()
1499
1503
1500 df = self.opener(self.datafile, 'w')
1504 df = self.opener(self.datafile, 'w')
1501 try:
1505 try:
1502 for r in self:
1506 for r in self:
1503 df.write(self._getsegmentforrevs(r, r)[1])
1507 df.write(self._getsegmentforrevs(r, r)[1])
1504 finally:
1508 finally:
1505 df.close()
1509 df.close()
1506
1510
1507 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1511 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1508 checkambig=self._checkambig)
1512 checkambig=self._checkambig)
1509 self.version &= ~FLAG_INLINE_DATA
1513 self.version &= ~FLAG_INLINE_DATA
1510 self._inline = False
1514 self._inline = False
1511 for i in self:
1515 for i in self:
1512 e = self._io.packentry(self.index[i], self.node, self.version, i)
1516 e = self._io.packentry(self.index[i], self.node, self.version, i)
1513 fp.write(e)
1517 fp.write(e)
1514
1518
1515 # if we don't call close, the temp file will never replace the
1519 # if we don't call close, the temp file will never replace the
1516 # real index
1520 # real index
1517 fp.close()
1521 fp.close()
1518
1522
1519 tr.replace(self.indexfile, trindex * self._io.size)
1523 tr.replace(self.indexfile, trindex * self._io.size)
1520 self._chunkclear()
1524 self._chunkclear()
1521
1525
1522 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1526 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1523 node=None, flags=REVIDX_DEFAULT_FLAGS):
1527 node=None, flags=REVIDX_DEFAULT_FLAGS):
1524 """add a revision to the log
1528 """add a revision to the log
1525
1529
1526 text - the revision data to add
1530 text - the revision data to add
1527 transaction - the transaction object used for rollback
1531 transaction - the transaction object used for rollback
1528 link - the linkrev data to add
1532 link - the linkrev data to add
1529 p1, p2 - the parent nodeids of the revision
1533 p1, p2 - the parent nodeids of the revision
1530 cachedelta - an optional precomputed delta
1534 cachedelta - an optional precomputed delta
1531 node - nodeid of revision; typically node is not specified, and it is
1535 node - nodeid of revision; typically node is not specified, and it is
1532 computed by default as hash(text, p1, p2), however subclasses might
1536 computed by default as hash(text, p1, p2), however subclasses might
1533 use different hashing method (and override checkhash() in such case)
1537 use different hashing method (and override checkhash() in such case)
1534 flags - the known flags to set on the revision
1538 flags - the known flags to set on the revision
1535 """
1539 """
1536 if link == nullrev:
1540 if link == nullrev:
1537 raise RevlogError(_("attempted to add linkrev -1 to %s")
1541 raise RevlogError(_("attempted to add linkrev -1 to %s")
1538 % self.indexfile)
1542 % self.indexfile)
1539
1543
1540 if flags:
1544 if flags:
1541 node = node or self.hash(text, p1, p2)
1545 node = node or self.hash(text, p1, p2)
1542
1546
1543 rawtext, validatehash = self._processflags(text, flags, 'write')
1547 rawtext, validatehash = self._processflags(text, flags, 'write')
1544
1548
1545 # If the flag processor modifies the revision data, ignore any provided
1549 # If the flag processor modifies the revision data, ignore any provided
1546 # cachedelta.
1550 # cachedelta.
1547 if rawtext != text:
1551 if rawtext != text:
1548 cachedelta = None
1552 cachedelta = None
1549
1553
1550 if len(rawtext) > _maxentrysize:
1554 if len(rawtext) > _maxentrysize:
1551 raise RevlogError(
1555 raise RevlogError(
1552 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1556 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1553 % (self.indexfile, len(rawtext)))
1557 % (self.indexfile, len(rawtext)))
1554
1558
1555 node = node or self.hash(rawtext, p1, p2)
1559 node = node or self.hash(rawtext, p1, p2)
1556 if node in self.nodemap:
1560 if node in self.nodemap:
1557 return node
1561 return node
1558
1562
1559 if validatehash:
1563 if validatehash:
1560 self.checkhash(rawtext, node, p1=p1, p2=p2)
1564 self.checkhash(rawtext, node, p1=p1, p2=p2)
1561
1565
1562 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1566 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1563 flags, cachedelta=cachedelta)
1567 flags, cachedelta=cachedelta)
1564
1568
1565 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1569 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1566 cachedelta=None):
1570 cachedelta=None):
1567 """add a raw revision with known flags, node and parents
1571 """add a raw revision with known flags, node and parents
1568 useful when reusing a revision not stored in this revlog (ex: received
1572 useful when reusing a revision not stored in this revlog (ex: received
1569 over wire, or read from an external bundle).
1573 over wire, or read from an external bundle).
1570 """
1574 """
1571 dfh = None
1575 dfh = None
1572 if not self._inline:
1576 if not self._inline:
1573 dfh = self.opener(self.datafile, "a+")
1577 dfh = self.opener(self.datafile, "a+")
1574 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1578 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1575 try:
1579 try:
1576 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1580 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1577 flags, cachedelta, ifh, dfh)
1581 flags, cachedelta, ifh, dfh)
1578 finally:
1582 finally:
1579 if dfh:
1583 if dfh:
1580 dfh.close()
1584 dfh.close()
1581 ifh.close()
1585 ifh.close()
1582
1586
1583 def compress(self, data):
1587 def compress(self, data):
1584 """Generate a possibly-compressed representation of data."""
1588 """Generate a possibly-compressed representation of data."""
1585 if not data:
1589 if not data:
1586 return '', data
1590 return '', data
1587
1591
1588 compressed = self._compressor.compress(data)
1592 compressed = self._compressor.compress(data)
1589
1593
1590 if compressed:
1594 if compressed:
1591 # The revlog compressor added the header in the returned data.
1595 # The revlog compressor added the header in the returned data.
1592 return '', compressed
1596 return '', compressed
1593
1597
1594 if data[0:1] == '\0':
1598 if data[0:1] == '\0':
1595 return '', data
1599 return '', data
1596 return 'u', data
1600 return 'u', data
1597
1601
1598 def decompress(self, data):
1602 def decompress(self, data):
1599 """Decompress a revlog chunk.
1603 """Decompress a revlog chunk.
1600
1604
1601 The chunk is expected to begin with a header identifying the
1605 The chunk is expected to begin with a header identifying the
1602 format type so it can be routed to an appropriate decompressor.
1606 format type so it can be routed to an appropriate decompressor.
1603 """
1607 """
1604 if not data:
1608 if not data:
1605 return data
1609 return data
1606
1610
1607 # Revlogs are read much more frequently than they are written and many
1611 # Revlogs are read much more frequently than they are written and many
1608 # chunks only take microseconds to decompress, so performance is
1612 # chunks only take microseconds to decompress, so performance is
1609 # important here.
1613 # important here.
1610 #
1614 #
1611 # We can make a few assumptions about revlogs:
1615 # We can make a few assumptions about revlogs:
1612 #
1616 #
1613 # 1) the majority of chunks will be compressed (as opposed to inline
1617 # 1) the majority of chunks will be compressed (as opposed to inline
1614 # raw data).
1618 # raw data).
1615 # 2) decompressing *any* data will likely by at least 10x slower than
1619 # 2) decompressing *any* data will likely by at least 10x slower than
1616 # returning raw inline data.
1620 # returning raw inline data.
1617 # 3) we want to prioritize common and officially supported compression
1621 # 3) we want to prioritize common and officially supported compression
1618 # engines
1622 # engines
1619 #
1623 #
1620 # It follows that we want to optimize for "decompress compressed data
1624 # It follows that we want to optimize for "decompress compressed data
1621 # when encoded with common and officially supported compression engines"
1625 # when encoded with common and officially supported compression engines"
1622 # case over "raw data" and "data encoded by less common or non-official
1626 # case over "raw data" and "data encoded by less common or non-official
1623 # compression engines." That is why we have the inline lookup first
1627 # compression engines." That is why we have the inline lookup first
1624 # followed by the compengines lookup.
1628 # followed by the compengines lookup.
1625 #
1629 #
1626 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1630 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1627 # compressed chunks. And this matters for changelog and manifest reads.
1631 # compressed chunks. And this matters for changelog and manifest reads.
1628 t = data[0:1]
1632 t = data[0:1]
1629
1633
1630 if t == 'x':
1634 if t == 'x':
1631 try:
1635 try:
1632 return _zlibdecompress(data)
1636 return _zlibdecompress(data)
1633 except zlib.error as e:
1637 except zlib.error as e:
1634 raise RevlogError(_('revlog decompress error: %s') % str(e))
1638 raise RevlogError(_('revlog decompress error: %s') % str(e))
1635 # '\0' is more common than 'u' so it goes first.
1639 # '\0' is more common than 'u' so it goes first.
1636 elif t == '\0':
1640 elif t == '\0':
1637 return data
1641 return data
1638 elif t == 'u':
1642 elif t == 'u':
1639 return util.buffer(data, 1)
1643 return util.buffer(data, 1)
1640
1644
1641 try:
1645 try:
1642 compressor = self._decompressors[t]
1646 compressor = self._decompressors[t]
1643 except KeyError:
1647 except KeyError:
1644 try:
1648 try:
1645 engine = util.compengines.forrevlogheader(t)
1649 engine = util.compengines.forrevlogheader(t)
1646 compressor = engine.revlogcompressor()
1650 compressor = engine.revlogcompressor()
1647 self._decompressors[t] = compressor
1651 self._decompressors[t] = compressor
1648 except KeyError:
1652 except KeyError:
1649 raise RevlogError(_('unknown compression type %r') % t)
1653 raise RevlogError(_('unknown compression type %r') % t)
1650
1654
1651 return compressor.decompress(data)
1655 return compressor.decompress(data)
1652
1656
1653 def _isgooddelta(self, d, textlen):
1657 def _isgooddelta(self, d, textlen):
1654 """Returns True if the given delta is good. Good means that it is within
1658 """Returns True if the given delta is good. Good means that it is within
1655 the disk span, disk size, and chain length bounds that we know to be
1659 the disk span, disk size, and chain length bounds that we know to be
1656 performant."""
1660 performant."""
1657 if d is None:
1661 if d is None:
1658 return False
1662 return False
1659
1663
1660 # - 'dist' is the distance from the base revision -- bounding it limits
1664 # - 'dist' is the distance from the base revision -- bounding it limits
1661 # the amount of I/O we need to do.
1665 # the amount of I/O we need to do.
1662 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1666 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1663 # to apply -- bounding it limits the amount of CPU we consume.
1667 # to apply -- bounding it limits the amount of CPU we consume.
1664 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1668 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1665
1669
1666 defaultmax = textlen * 4
1670 defaultmax = textlen * 4
1667 maxdist = self._maxdeltachainspan
1671 maxdist = self._maxdeltachainspan
1668 if not maxdist:
1672 if not maxdist:
1669 maxdist = dist # ensure the conditional pass
1673 maxdist = dist # ensure the conditional pass
1670 maxdist = max(maxdist, defaultmax)
1674 maxdist = max(maxdist, defaultmax)
1671 if (dist > maxdist or l > textlen or
1675 if (dist > maxdist or l > textlen or
1672 compresseddeltalen > textlen * 2 or
1676 compresseddeltalen > textlen * 2 or
1673 (self._maxchainlen and chainlen > self._maxchainlen)):
1677 (self._maxchainlen and chainlen > self._maxchainlen)):
1674 return False
1678 return False
1675
1679
1676 return True
1680 return True
1677
1681
1678 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1682 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1679 cachedelta, ifh, dfh, alwayscache=False):
1683 cachedelta, ifh, dfh, alwayscache=False):
1680 """internal function to add revisions to the log
1684 """internal function to add revisions to the log
1681
1685
1682 see addrevision for argument descriptions.
1686 see addrevision for argument descriptions.
1683
1687
1684 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1688 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1685
1689
1686 invariants:
1690 invariants:
1687 - rawtext is optional (can be None); if not set, cachedelta must be set.
1691 - rawtext is optional (can be None); if not set, cachedelta must be set.
1688 if both are set, they must correspond to each other.
1692 if both are set, they must correspond to each other.
1689 """
1693 """
1690 btext = [rawtext]
1694 btext = [rawtext]
1691 def buildtext():
1695 def buildtext():
1692 if btext[0] is not None:
1696 if btext[0] is not None:
1693 return btext[0]
1697 return btext[0]
1694 baserev = cachedelta[0]
1698 baserev = cachedelta[0]
1695 delta = cachedelta[1]
1699 delta = cachedelta[1]
1696 # special case deltas which replace entire base; no need to decode
1700 # special case deltas which replace entire base; no need to decode
1697 # base revision. this neatly avoids censored bases, which throw when
1701 # base revision. this neatly avoids censored bases, which throw when
1698 # they're decoded.
1702 # they're decoded.
1699 hlen = struct.calcsize(">lll")
1703 hlen = struct.calcsize(">lll")
1700 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1704 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1701 len(delta) - hlen):
1705 len(delta) - hlen):
1702 btext[0] = delta[hlen:]
1706 btext[0] = delta[hlen:]
1703 else:
1707 else:
1704 if self._inline:
1708 if self._inline:
1705 fh = ifh
1709 fh = ifh
1706 else:
1710 else:
1707 fh = dfh
1711 fh = dfh
1708 basetext = self.revision(baserev, _df=fh, raw=True)
1712 basetext = self.revision(baserev, _df=fh, raw=True)
1709 btext[0] = mdiff.patch(basetext, delta)
1713 btext[0] = mdiff.patch(basetext, delta)
1710
1714
1711 try:
1715 try:
1712 res = self._processflags(btext[0], flags, 'read', raw=True)
1716 res = self._processflags(btext[0], flags, 'read', raw=True)
1713 btext[0], validatehash = res
1717 btext[0], validatehash = res
1714 if validatehash:
1718 if validatehash:
1715 self.checkhash(btext[0], node, p1=p1, p2=p2)
1719 self.checkhash(btext[0], node, p1=p1, p2=p2)
1716 if flags & REVIDX_ISCENSORED:
1720 if flags & REVIDX_ISCENSORED:
1717 raise RevlogError(_('node %s is not censored') % node)
1721 raise RevlogError(_('node %s is not censored') % node)
1718 except CensoredNodeError:
1722 except CensoredNodeError:
1719 # must pass the censored index flag to add censored revisions
1723 # must pass the censored index flag to add censored revisions
1720 if not flags & REVIDX_ISCENSORED:
1724 if not flags & REVIDX_ISCENSORED:
1721 raise
1725 raise
1722 return btext[0]
1726 return btext[0]
1723
1727
1724 def builddelta(rev):
1728 def builddelta(rev):
1725 # can we use the cached delta?
1729 # can we use the cached delta?
1726 if cachedelta and cachedelta[0] == rev:
1730 if cachedelta and cachedelta[0] == rev:
1727 delta = cachedelta[1]
1731 delta = cachedelta[1]
1728 else:
1732 else:
1729 t = buildtext()
1733 t = buildtext()
1730 if self.iscensored(rev):
1734 if self.iscensored(rev):
1731 # deltas based on a censored revision must replace the
1735 # deltas based on a censored revision must replace the
1732 # full content in one patch, so delta works everywhere
1736 # full content in one patch, so delta works everywhere
1733 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1737 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1734 delta = header + t
1738 delta = header + t
1735 else:
1739 else:
1736 if self._inline:
1740 if self._inline:
1737 fh = ifh
1741 fh = ifh
1738 else:
1742 else:
1739 fh = dfh
1743 fh = dfh
1740 ptext = self.revision(rev, _df=fh, raw=True)
1744 ptext = self.revision(rev, _df=fh, raw=True)
1741 delta = mdiff.textdiff(ptext, t)
1745 delta = mdiff.textdiff(ptext, t)
1742 header, data = self.compress(delta)
1746 header, data = self.compress(delta)
1743 deltalen = len(header) + len(data)
1747 deltalen = len(header) + len(data)
1744 chainbase = self.chainbase(rev)
1748 chainbase = self.chainbase(rev)
1745 dist = deltalen + offset - self.start(chainbase)
1749 dist = deltalen + offset - self.start(chainbase)
1746 if self._generaldelta:
1750 if self._generaldelta:
1747 base = rev
1751 base = rev
1748 else:
1752 else:
1749 base = chainbase
1753 base = chainbase
1750 chainlen, compresseddeltalen = self._chaininfo(rev)
1754 chainlen, compresseddeltalen = self._chaininfo(rev)
1751 chainlen += 1
1755 chainlen += 1
1752 compresseddeltalen += deltalen
1756 compresseddeltalen += deltalen
1753 return (dist, deltalen, (header, data), base,
1757 return (dist, deltalen, (header, data), base,
1754 chainbase, chainlen, compresseddeltalen)
1758 chainbase, chainlen, compresseddeltalen)
1755
1759
1756 curr = len(self)
1760 curr = len(self)
1757 prev = curr - 1
1761 prev = curr - 1
1758 offset = self.end(prev)
1762 offset = self.end(prev)
1759 delta = None
1763 delta = None
1760 p1r, p2r = self.rev(p1), self.rev(p2)
1764 p1r, p2r = self.rev(p1), self.rev(p2)
1761
1765
1762 # full versions are inserted when the needed deltas
1766 # full versions are inserted when the needed deltas
1763 # become comparable to the uncompressed text
1767 # become comparable to the uncompressed text
1764 if rawtext is None:
1768 if rawtext is None:
1765 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1769 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1766 cachedelta[1])
1770 cachedelta[1])
1767 else:
1771 else:
1768 textlen = len(rawtext)
1772 textlen = len(rawtext)
1769
1773
1770 # should we try to build a delta?
1774 # should we try to build a delta?
1771 if prev != nullrev and self.storedeltachains:
1775 if prev != nullrev and self.storedeltachains:
1772 tested = set()
1776 tested = set()
1773 # This condition is true most of the time when processing
1777 # This condition is true most of the time when processing
1774 # changegroup data into a generaldelta repo. The only time it
1778 # changegroup data into a generaldelta repo. The only time it
1775 # isn't true is if this is the first revision in a delta chain
1779 # isn't true is if this is the first revision in a delta chain
1776 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1780 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1777 if cachedelta and self._generaldelta and self._lazydeltabase:
1781 if cachedelta and self._generaldelta and self._lazydeltabase:
1778 # Assume what we received from the server is a good choice
1782 # Assume what we received from the server is a good choice
1779 # build delta will reuse the cache
1783 # build delta will reuse the cache
1780 candidatedelta = builddelta(cachedelta[0])
1784 candidatedelta = builddelta(cachedelta[0])
1781 tested.add(cachedelta[0])
1785 tested.add(cachedelta[0])
1782 if self._isgooddelta(candidatedelta, textlen):
1786 if self._isgooddelta(candidatedelta, textlen):
1783 delta = candidatedelta
1787 delta = candidatedelta
1784 if delta is None and self._generaldelta:
1788 if delta is None and self._generaldelta:
1785 # exclude already lazy tested base if any
1789 # exclude already lazy tested base if any
1786 parents = [p for p in (p1r, p2r)
1790 parents = [p for p in (p1r, p2r)
1787 if p != nullrev and p not in tested]
1791 if p != nullrev and p not in tested]
1788 if parents and not self._aggressivemergedeltas:
1792 if parents and not self._aggressivemergedeltas:
1789 # Pick whichever parent is closer to us (to minimize the
1793 # Pick whichever parent is closer to us (to minimize the
1790 # chance of having to build a fulltext).
1794 # chance of having to build a fulltext).
1791 parents = [max(parents)]
1795 parents = [max(parents)]
1792 tested.update(parents)
1796 tested.update(parents)
1793 pdeltas = []
1797 pdeltas = []
1794 for p in parents:
1798 for p in parents:
1795 pd = builddelta(p)
1799 pd = builddelta(p)
1796 if self._isgooddelta(pd, textlen):
1800 if self._isgooddelta(pd, textlen):
1797 pdeltas.append(pd)
1801 pdeltas.append(pd)
1798 if pdeltas:
1802 if pdeltas:
1799 delta = min(pdeltas, key=lambda x: x[1])
1803 delta = min(pdeltas, key=lambda x: x[1])
1800 if delta is None and prev not in tested:
1804 if delta is None and prev not in tested:
1801 # other approach failed try against prev to hopefully save us a
1805 # other approach failed try against prev to hopefully save us a
1802 # fulltext.
1806 # fulltext.
1803 candidatedelta = builddelta(prev)
1807 candidatedelta = builddelta(prev)
1804 if self._isgooddelta(candidatedelta, textlen):
1808 if self._isgooddelta(candidatedelta, textlen):
1805 delta = candidatedelta
1809 delta = candidatedelta
1806 if delta is not None:
1810 if delta is not None:
1807 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1811 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1808 else:
1812 else:
1809 rawtext = buildtext()
1813 rawtext = buildtext()
1810 data = self.compress(rawtext)
1814 data = self.compress(rawtext)
1811 l = len(data[1]) + len(data[0])
1815 l = len(data[1]) + len(data[0])
1812 base = chainbase = curr
1816 base = chainbase = curr
1813
1817
1814 e = (offset_type(offset, flags), l, textlen,
1818 e = (offset_type(offset, flags), l, textlen,
1815 base, link, p1r, p2r, node)
1819 base, link, p1r, p2r, node)
1816 self.index.insert(-1, e)
1820 self.index.insert(-1, e)
1817 self.nodemap[node] = curr
1821 self.nodemap[node] = curr
1818
1822
1819 entry = self._io.packentry(e, self.node, self.version, curr)
1823 entry = self._io.packentry(e, self.node, self.version, curr)
1820 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1824 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1821
1825
1822 if alwayscache and rawtext is None:
1826 if alwayscache and rawtext is None:
1823 rawtext = buildtext()
1827 rawtext = buildtext()
1824
1828
1825 if type(rawtext) == str: # only accept immutable objects
1829 if type(rawtext) == str: # only accept immutable objects
1826 self._cache = (node, curr, rawtext)
1830 self._cache = (node, curr, rawtext)
1827 self._chainbasecache[curr] = chainbase
1831 self._chainbasecache[curr] = chainbase
1828 return node
1832 return node
1829
1833
1830 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1834 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1831 # Files opened in a+ mode have inconsistent behavior on various
1835 # Files opened in a+ mode have inconsistent behavior on various
1832 # platforms. Windows requires that a file positioning call be made
1836 # platforms. Windows requires that a file positioning call be made
1833 # when the file handle transitions between reads and writes. See
1837 # when the file handle transitions between reads and writes. See
1834 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1838 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1835 # platforms, Python or the platform itself can be buggy. Some versions
1839 # platforms, Python or the platform itself can be buggy. Some versions
1836 # of Solaris have been observed to not append at the end of the file
1840 # of Solaris have been observed to not append at the end of the file
1837 # if the file was seeked to before the end. See issue4943 for more.
1841 # if the file was seeked to before the end. See issue4943 for more.
1838 #
1842 #
1839 # We work around this issue by inserting a seek() before writing.
1843 # We work around this issue by inserting a seek() before writing.
1840 # Note: This is likely not necessary on Python 3.
1844 # Note: This is likely not necessary on Python 3.
1841 ifh.seek(0, os.SEEK_END)
1845 ifh.seek(0, os.SEEK_END)
1842 if dfh:
1846 if dfh:
1843 dfh.seek(0, os.SEEK_END)
1847 dfh.seek(0, os.SEEK_END)
1844
1848
1845 curr = len(self) - 1
1849 curr = len(self) - 1
1846 if not self._inline:
1850 if not self._inline:
1847 transaction.add(self.datafile, offset)
1851 transaction.add(self.datafile, offset)
1848 transaction.add(self.indexfile, curr * len(entry))
1852 transaction.add(self.indexfile, curr * len(entry))
1849 if data[0]:
1853 if data[0]:
1850 dfh.write(data[0])
1854 dfh.write(data[0])
1851 dfh.write(data[1])
1855 dfh.write(data[1])
1852 ifh.write(entry)
1856 ifh.write(entry)
1853 else:
1857 else:
1854 offset += curr * self._io.size
1858 offset += curr * self._io.size
1855 transaction.add(self.indexfile, offset, curr)
1859 transaction.add(self.indexfile, offset, curr)
1856 ifh.write(entry)
1860 ifh.write(entry)
1857 ifh.write(data[0])
1861 ifh.write(data[0])
1858 ifh.write(data[1])
1862 ifh.write(data[1])
1859 self.checkinlinesize(transaction, ifh)
1863 self.checkinlinesize(transaction, ifh)
1860
1864
1861 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1865 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1862 """
1866 """
1863 add a delta group
1867 add a delta group
1864
1868
1865 given a set of deltas, add them to the revision log. the
1869 given a set of deltas, add them to the revision log. the
1866 first delta is against its parent, which should be in our
1870 first delta is against its parent, which should be in our
1867 log, the rest are against the previous delta.
1871 log, the rest are against the previous delta.
1868
1872
1869 If ``addrevisioncb`` is defined, it will be called with arguments of
1873 If ``addrevisioncb`` is defined, it will be called with arguments of
1870 this revlog and the node that was added.
1874 this revlog and the node that was added.
1871 """
1875 """
1872
1876
1873 nodes = []
1877 nodes = []
1874
1878
1875 r = len(self)
1879 r = len(self)
1876 end = 0
1880 end = 0
1877 if r:
1881 if r:
1878 end = self.end(r - 1)
1882 end = self.end(r - 1)
1879 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1883 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1880 isize = r * self._io.size
1884 isize = r * self._io.size
1881 if self._inline:
1885 if self._inline:
1882 transaction.add(self.indexfile, end + isize, r)
1886 transaction.add(self.indexfile, end + isize, r)
1883 dfh = None
1887 dfh = None
1884 else:
1888 else:
1885 transaction.add(self.indexfile, isize, r)
1889 transaction.add(self.indexfile, isize, r)
1886 transaction.add(self.datafile, end)
1890 transaction.add(self.datafile, end)
1887 dfh = self.opener(self.datafile, "a+")
1891 dfh = self.opener(self.datafile, "a+")
1888 def flush():
1892 def flush():
1889 if dfh:
1893 if dfh:
1890 dfh.flush()
1894 dfh.flush()
1891 ifh.flush()
1895 ifh.flush()
1892 try:
1896 try:
1893 # loop through our set of deltas
1897 # loop through our set of deltas
1894 chain = None
1898 chain = None
1895 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1899 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1896 node = chunkdata['node']
1900 node = chunkdata['node']
1897 p1 = chunkdata['p1']
1901 p1 = chunkdata['p1']
1898 p2 = chunkdata['p2']
1902 p2 = chunkdata['p2']
1899 cs = chunkdata['cs']
1903 cs = chunkdata['cs']
1900 deltabase = chunkdata['deltabase']
1904 deltabase = chunkdata['deltabase']
1901 delta = chunkdata['delta']
1905 delta = chunkdata['delta']
1902 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1906 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1903
1907
1904 nodes.append(node)
1908 nodes.append(node)
1905
1909
1906 link = linkmapper(cs)
1910 link = linkmapper(cs)
1907 if node in self.nodemap:
1911 if node in self.nodemap:
1908 # this can happen if two branches make the same change
1912 # this can happen if two branches make the same change
1909 chain = node
1913 chain = node
1910 continue
1914 continue
1911
1915
1912 for p in (p1, p2):
1916 for p in (p1, p2):
1913 if p not in self.nodemap:
1917 if p not in self.nodemap:
1914 raise LookupError(p, self.indexfile,
1918 raise LookupError(p, self.indexfile,
1915 _('unknown parent'))
1919 _('unknown parent'))
1916
1920
1917 if deltabase not in self.nodemap:
1921 if deltabase not in self.nodemap:
1918 raise LookupError(deltabase, self.indexfile,
1922 raise LookupError(deltabase, self.indexfile,
1919 _('unknown delta base'))
1923 _('unknown delta base'))
1920
1924
1921 baserev = self.rev(deltabase)
1925 baserev = self.rev(deltabase)
1922
1926
1923 if baserev != nullrev and self.iscensored(baserev):
1927 if baserev != nullrev and self.iscensored(baserev):
1924 # if base is censored, delta must be full replacement in a
1928 # if base is censored, delta must be full replacement in a
1925 # single patch operation
1929 # single patch operation
1926 hlen = struct.calcsize(">lll")
1930 hlen = struct.calcsize(">lll")
1927 oldlen = self.rawsize(baserev)
1931 oldlen = self.rawsize(baserev)
1928 newlen = len(delta) - hlen
1932 newlen = len(delta) - hlen
1929 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1933 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1930 raise error.CensoredBaseError(self.indexfile,
1934 raise error.CensoredBaseError(self.indexfile,
1931 self.node(baserev))
1935 self.node(baserev))
1932
1936
1933 if not flags and self._peek_iscensored(baserev, delta, flush):
1937 if not flags and self._peek_iscensored(baserev, delta, flush):
1934 flags |= REVIDX_ISCENSORED
1938 flags |= REVIDX_ISCENSORED
1935
1939
1936 # We assume consumers of addrevisioncb will want to retrieve
1940 # We assume consumers of addrevisioncb will want to retrieve
1937 # the added revision, which will require a call to
1941 # the added revision, which will require a call to
1938 # revision(). revision() will fast path if there is a cache
1942 # revision(). revision() will fast path if there is a cache
1939 # hit. So, we tell _addrevision() to always cache in this case.
1943 # hit. So, we tell _addrevision() to always cache in this case.
1940 # We're only using addgroup() in the context of changegroup
1944 # We're only using addgroup() in the context of changegroup
1941 # generation so the revision data can always be handled as raw
1945 # generation so the revision data can always be handled as raw
1942 # by the flagprocessor.
1946 # by the flagprocessor.
1943 chain = self._addrevision(node, None, transaction, link,
1947 chain = self._addrevision(node, None, transaction, link,
1944 p1, p2, flags, (baserev, delta),
1948 p1, p2, flags, (baserev, delta),
1945 ifh, dfh,
1949 ifh, dfh,
1946 alwayscache=bool(addrevisioncb))
1950 alwayscache=bool(addrevisioncb))
1947
1951
1948 if addrevisioncb:
1952 if addrevisioncb:
1949 addrevisioncb(self, chain)
1953 addrevisioncb(self, chain)
1950
1954
1951 if not dfh and not self._inline:
1955 if not dfh and not self._inline:
1952 # addrevision switched from inline to conventional
1956 # addrevision switched from inline to conventional
1953 # reopen the index
1957 # reopen the index
1954 ifh.close()
1958 ifh.close()
1955 dfh = self.opener(self.datafile, "a+")
1959 dfh = self.opener(self.datafile, "a+")
1956 ifh = self.opener(self.indexfile, "a+",
1960 ifh = self.opener(self.indexfile, "a+",
1957 checkambig=self._checkambig)
1961 checkambig=self._checkambig)
1958 finally:
1962 finally:
1959 if dfh:
1963 if dfh:
1960 dfh.close()
1964 dfh.close()
1961 ifh.close()
1965 ifh.close()
1962
1966
1963 return nodes
1967 return nodes
1964
1968
1965 def iscensored(self, rev):
1969 def iscensored(self, rev):
1966 """Check if a file revision is censored."""
1970 """Check if a file revision is censored."""
1967 return False
1971 return False
1968
1972
1969 def _peek_iscensored(self, baserev, delta, flush):
1973 def _peek_iscensored(self, baserev, delta, flush):
1970 """Quickly check if a delta produces a censored revision."""
1974 """Quickly check if a delta produces a censored revision."""
1971 return False
1975 return False
1972
1976
1973 def getstrippoint(self, minlink):
1977 def getstrippoint(self, minlink):
1974 """find the minimum rev that must be stripped to strip the linkrev
1978 """find the minimum rev that must be stripped to strip the linkrev
1975
1979
1976 Returns a tuple containing the minimum rev and a set of all revs that
1980 Returns a tuple containing the minimum rev and a set of all revs that
1977 have linkrevs that will be broken by this strip.
1981 have linkrevs that will be broken by this strip.
1978 """
1982 """
1979 brokenrevs = set()
1983 brokenrevs = set()
1980 strippoint = len(self)
1984 strippoint = len(self)
1981
1985
1982 heads = {}
1986 heads = {}
1983 futurelargelinkrevs = set()
1987 futurelargelinkrevs = set()
1984 for head in self.headrevs():
1988 for head in self.headrevs():
1985 headlinkrev = self.linkrev(head)
1989 headlinkrev = self.linkrev(head)
1986 heads[head] = headlinkrev
1990 heads[head] = headlinkrev
1987 if headlinkrev >= minlink:
1991 if headlinkrev >= minlink:
1988 futurelargelinkrevs.add(headlinkrev)
1992 futurelargelinkrevs.add(headlinkrev)
1989
1993
1990 # This algorithm involves walking down the rev graph, starting at the
1994 # This algorithm involves walking down the rev graph, starting at the
1991 # heads. Since the revs are topologically sorted according to linkrev,
1995 # heads. Since the revs are topologically sorted according to linkrev,
1992 # once all head linkrevs are below the minlink, we know there are
1996 # once all head linkrevs are below the minlink, we know there are
1993 # no more revs that could have a linkrev greater than minlink.
1997 # no more revs that could have a linkrev greater than minlink.
1994 # So we can stop walking.
1998 # So we can stop walking.
1995 while futurelargelinkrevs:
1999 while futurelargelinkrevs:
1996 strippoint -= 1
2000 strippoint -= 1
1997 linkrev = heads.pop(strippoint)
2001 linkrev = heads.pop(strippoint)
1998
2002
1999 if linkrev < minlink:
2003 if linkrev < minlink:
2000 brokenrevs.add(strippoint)
2004 brokenrevs.add(strippoint)
2001 else:
2005 else:
2002 futurelargelinkrevs.remove(linkrev)
2006 futurelargelinkrevs.remove(linkrev)
2003
2007
2004 for p in self.parentrevs(strippoint):
2008 for p in self.parentrevs(strippoint):
2005 if p != nullrev:
2009 if p != nullrev:
2006 plinkrev = self.linkrev(p)
2010 plinkrev = self.linkrev(p)
2007 heads[p] = plinkrev
2011 heads[p] = plinkrev
2008 if plinkrev >= minlink:
2012 if plinkrev >= minlink:
2009 futurelargelinkrevs.add(plinkrev)
2013 futurelargelinkrevs.add(plinkrev)
2010
2014
2011 return strippoint, brokenrevs
2015 return strippoint, brokenrevs
2012
2016
2013 def strip(self, minlink, transaction):
2017 def strip(self, minlink, transaction):
2014 """truncate the revlog on the first revision with a linkrev >= minlink
2018 """truncate the revlog on the first revision with a linkrev >= minlink
2015
2019
2016 This function is called when we're stripping revision minlink and
2020 This function is called when we're stripping revision minlink and
2017 its descendants from the repository.
2021 its descendants from the repository.
2018
2022
2019 We have to remove all revisions with linkrev >= minlink, because
2023 We have to remove all revisions with linkrev >= minlink, because
2020 the equivalent changelog revisions will be renumbered after the
2024 the equivalent changelog revisions will be renumbered after the
2021 strip.
2025 strip.
2022
2026
2023 So we truncate the revlog on the first of these revisions, and
2027 So we truncate the revlog on the first of these revisions, and
2024 trust that the caller has saved the revisions that shouldn't be
2028 trust that the caller has saved the revisions that shouldn't be
2025 removed and that it'll re-add them after this truncation.
2029 removed and that it'll re-add them after this truncation.
2026 """
2030 """
2027 if len(self) == 0:
2031 if len(self) == 0:
2028 return
2032 return
2029
2033
2030 rev, _ = self.getstrippoint(minlink)
2034 rev, _ = self.getstrippoint(minlink)
2031 if rev == len(self):
2035 if rev == len(self):
2032 return
2036 return
2033
2037
2034 # first truncate the files on disk
2038 # first truncate the files on disk
2035 end = self.start(rev)
2039 end = self.start(rev)
2036 if not self._inline:
2040 if not self._inline:
2037 transaction.add(self.datafile, end)
2041 transaction.add(self.datafile, end)
2038 end = rev * self._io.size
2042 end = rev * self._io.size
2039 else:
2043 else:
2040 end += rev * self._io.size
2044 end += rev * self._io.size
2041
2045
2042 transaction.add(self.indexfile, end)
2046 transaction.add(self.indexfile, end)
2043
2047
2044 # then reset internal state in memory to forget those revisions
2048 # then reset internal state in memory to forget those revisions
2045 self._cache = None
2049 self._cache = None
2046 self._chaininfocache = {}
2050 self._chaininfocache = {}
2047 self._chunkclear()
2051 self._chunkclear()
2048 for x in xrange(rev, len(self)):
2052 for x in xrange(rev, len(self)):
2049 del self.nodemap[self.node(x)]
2053 del self.nodemap[self.node(x)]
2050
2054
2051 del self.index[rev:-1]
2055 del self.index[rev:-1]
2052
2056
2053 def checksize(self):
2057 def checksize(self):
2054 expected = 0
2058 expected = 0
2055 if len(self):
2059 if len(self):
2056 expected = max(0, self.end(len(self) - 1))
2060 expected = max(0, self.end(len(self) - 1))
2057
2061
2058 try:
2062 try:
2059 f = self.opener(self.datafile)
2063 f = self.opener(self.datafile)
2060 f.seek(0, 2)
2064 f.seek(0, 2)
2061 actual = f.tell()
2065 actual = f.tell()
2062 f.close()
2066 f.close()
2063 dd = actual - expected
2067 dd = actual - expected
2064 except IOError as inst:
2068 except IOError as inst:
2065 if inst.errno != errno.ENOENT:
2069 if inst.errno != errno.ENOENT:
2066 raise
2070 raise
2067 dd = 0
2071 dd = 0
2068
2072
2069 try:
2073 try:
2070 f = self.opener(self.indexfile)
2074 f = self.opener(self.indexfile)
2071 f.seek(0, 2)
2075 f.seek(0, 2)
2072 actual = f.tell()
2076 actual = f.tell()
2073 f.close()
2077 f.close()
2074 s = self._io.size
2078 s = self._io.size
2075 i = max(0, actual // s)
2079 i = max(0, actual // s)
2076 di = actual - (i * s)
2080 di = actual - (i * s)
2077 if self._inline:
2081 if self._inline:
2078 databytes = 0
2082 databytes = 0
2079 for r in self:
2083 for r in self:
2080 databytes += max(0, self.length(r))
2084 databytes += max(0, self.length(r))
2081 dd = 0
2085 dd = 0
2082 di = actual - len(self) * s - databytes
2086 di = actual - len(self) * s - databytes
2083 except IOError as inst:
2087 except IOError as inst:
2084 if inst.errno != errno.ENOENT:
2088 if inst.errno != errno.ENOENT:
2085 raise
2089 raise
2086 di = 0
2090 di = 0
2087
2091
2088 return (dd, di)
2092 return (dd, di)
2089
2093
2090 def files(self):
2094 def files(self):
2091 res = [self.indexfile]
2095 res = [self.indexfile]
2092 if not self._inline:
2096 if not self._inline:
2093 res.append(self.datafile)
2097 res.append(self.datafile)
2094 return res
2098 return res
2095
2099
2096 DELTAREUSEALWAYS = 'always'
2100 DELTAREUSEALWAYS = 'always'
2097 DELTAREUSESAMEREVS = 'samerevs'
2101 DELTAREUSESAMEREVS = 'samerevs'
2098 DELTAREUSENEVER = 'never'
2102 DELTAREUSENEVER = 'never'
2099
2103
2100 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2104 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2101
2105
2102 def clone(self, tr, destrevlog, addrevisioncb=None,
2106 def clone(self, tr, destrevlog, addrevisioncb=None,
2103 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2107 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2104 """Copy this revlog to another, possibly with format changes.
2108 """Copy this revlog to another, possibly with format changes.
2105
2109
2106 The destination revlog will contain the same revisions and nodes.
2110 The destination revlog will contain the same revisions and nodes.
2107 However, it may not be bit-for-bit identical due to e.g. delta encoding
2111 However, it may not be bit-for-bit identical due to e.g. delta encoding
2108 differences.
2112 differences.
2109
2113
2110 The ``deltareuse`` argument control how deltas from the existing revlog
2114 The ``deltareuse`` argument control how deltas from the existing revlog
2111 are preserved in the destination revlog. The argument can have the
2115 are preserved in the destination revlog. The argument can have the
2112 following values:
2116 following values:
2113
2117
2114 DELTAREUSEALWAYS
2118 DELTAREUSEALWAYS
2115 Deltas will always be reused (if possible), even if the destination
2119 Deltas will always be reused (if possible), even if the destination
2116 revlog would not select the same revisions for the delta. This is the
2120 revlog would not select the same revisions for the delta. This is the
2117 fastest mode of operation.
2121 fastest mode of operation.
2118 DELTAREUSESAMEREVS
2122 DELTAREUSESAMEREVS
2119 Deltas will be reused if the destination revlog would pick the same
2123 Deltas will be reused if the destination revlog would pick the same
2120 revisions for the delta. This mode strikes a balance between speed
2124 revisions for the delta. This mode strikes a balance between speed
2121 and optimization.
2125 and optimization.
2122 DELTAREUSENEVER
2126 DELTAREUSENEVER
2123 Deltas will never be reused. This is the slowest mode of execution.
2127 Deltas will never be reused. This is the slowest mode of execution.
2124 This mode can be used to recompute deltas (e.g. if the diff/delta
2128 This mode can be used to recompute deltas (e.g. if the diff/delta
2125 algorithm changes).
2129 algorithm changes).
2126
2130
2127 Delta computation can be slow, so the choice of delta reuse policy can
2131 Delta computation can be slow, so the choice of delta reuse policy can
2128 significantly affect run time.
2132 significantly affect run time.
2129
2133
2130 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2134 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2131 two extremes. Deltas will be reused if they are appropriate. But if the
2135 two extremes. Deltas will be reused if they are appropriate. But if the
2132 delta could choose a better revision, it will do so. This means if you
2136 delta could choose a better revision, it will do so. This means if you
2133 are converting a non-generaldelta revlog to a generaldelta revlog,
2137 are converting a non-generaldelta revlog to a generaldelta revlog,
2134 deltas will be recomputed if the delta's parent isn't a parent of the
2138 deltas will be recomputed if the delta's parent isn't a parent of the
2135 revision.
2139 revision.
2136
2140
2137 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2141 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2138 controls whether to compute deltas against both parents for merges.
2142 controls whether to compute deltas against both parents for merges.
2139 By default, the current default is used.
2143 By default, the current default is used.
2140 """
2144 """
2141 if deltareuse not in self.DELTAREUSEALL:
2145 if deltareuse not in self.DELTAREUSEALL:
2142 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2146 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2143
2147
2144 if len(destrevlog):
2148 if len(destrevlog):
2145 raise ValueError(_('destination revlog is not empty'))
2149 raise ValueError(_('destination revlog is not empty'))
2146
2150
2147 if getattr(self, 'filteredrevs', None):
2151 if getattr(self, 'filteredrevs', None):
2148 raise ValueError(_('source revlog has filtered revisions'))
2152 raise ValueError(_('source revlog has filtered revisions'))
2149 if getattr(destrevlog, 'filteredrevs', None):
2153 if getattr(destrevlog, 'filteredrevs', None):
2150 raise ValueError(_('destination revlog has filtered revisions'))
2154 raise ValueError(_('destination revlog has filtered revisions'))
2151
2155
2152 # lazydeltabase controls whether to reuse a cached delta, if possible.
2156 # lazydeltabase controls whether to reuse a cached delta, if possible.
2153 oldlazydeltabase = destrevlog._lazydeltabase
2157 oldlazydeltabase = destrevlog._lazydeltabase
2154 oldamd = destrevlog._aggressivemergedeltas
2158 oldamd = destrevlog._aggressivemergedeltas
2155
2159
2156 try:
2160 try:
2157 if deltareuse == self.DELTAREUSEALWAYS:
2161 if deltareuse == self.DELTAREUSEALWAYS:
2158 destrevlog._lazydeltabase = True
2162 destrevlog._lazydeltabase = True
2159 elif deltareuse == self.DELTAREUSESAMEREVS:
2163 elif deltareuse == self.DELTAREUSESAMEREVS:
2160 destrevlog._lazydeltabase = False
2164 destrevlog._lazydeltabase = False
2161
2165
2162 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2166 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2163
2167
2164 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2168 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2165 self.DELTAREUSESAMEREVS)
2169 self.DELTAREUSESAMEREVS)
2166
2170
2167 index = self.index
2171 index = self.index
2168 for rev in self:
2172 for rev in self:
2169 entry = index[rev]
2173 entry = index[rev]
2170
2174
2171 # Some classes override linkrev to take filtered revs into
2175 # Some classes override linkrev to take filtered revs into
2172 # account. Use raw entry from index.
2176 # account. Use raw entry from index.
2173 flags = entry[0] & 0xffff
2177 flags = entry[0] & 0xffff
2174 linkrev = entry[4]
2178 linkrev = entry[4]
2175 p1 = index[entry[5]][7]
2179 p1 = index[entry[5]][7]
2176 p2 = index[entry[6]][7]
2180 p2 = index[entry[6]][7]
2177 node = entry[7]
2181 node = entry[7]
2178
2182
2179 # (Possibly) reuse the delta from the revlog if allowed and
2183 # (Possibly) reuse the delta from the revlog if allowed and
2180 # the revlog chunk is a delta.
2184 # the revlog chunk is a delta.
2181 cachedelta = None
2185 cachedelta = None
2182 rawtext = None
2186 rawtext = None
2183 if populatecachedelta:
2187 if populatecachedelta:
2184 dp = self.deltaparent(rev)
2188 dp = self.deltaparent(rev)
2185 if dp != nullrev:
2189 if dp != nullrev:
2186 cachedelta = (dp, str(self._chunk(rev)))
2190 cachedelta = (dp, str(self._chunk(rev)))
2187
2191
2188 if not cachedelta:
2192 if not cachedelta:
2189 rawtext = self.revision(rev, raw=True)
2193 rawtext = self.revision(rev, raw=True)
2190
2194
2191 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2195 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2192 checkambig=False)
2196 checkambig=False)
2193 dfh = None
2197 dfh = None
2194 if not destrevlog._inline:
2198 if not destrevlog._inline:
2195 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2199 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2196 try:
2200 try:
2197 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2201 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2198 flags, cachedelta, ifh, dfh)
2202 flags, cachedelta, ifh, dfh)
2199 finally:
2203 finally:
2200 if dfh:
2204 if dfh:
2201 dfh.close()
2205 dfh.close()
2202 ifh.close()
2206 ifh.close()
2203
2207
2204 if addrevisioncb:
2208 if addrevisioncb:
2205 addrevisioncb(self, rev, node)
2209 addrevisioncb(self, rev, node)
2206 finally:
2210 finally:
2207 destrevlog._lazydeltabase = oldlazydeltabase
2211 destrevlog._lazydeltabase = oldlazydeltabase
2208 destrevlog._aggressivemergedeltas = oldamd
2212 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now