##// END OF EJS Templates
revlog: avoid applying delta chain on cache hit...
Jun Wu -
r31804:726f2412 default
parent child Browse files
Show More
@@ -1,2125 +1,2129 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import errno
17 import errno
18 import hashlib
18 import hashlib
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullid,
27 nullid,
28 nullrev,
28 nullrev,
29 )
29 )
30 from .i18n import _
30 from .i18n import _
31 from . import (
31 from . import (
32 ancestor,
32 ancestor,
33 error,
33 error,
34 mdiff,
34 mdiff,
35 parsers,
35 parsers,
36 pycompat,
36 pycompat,
37 templatefilters,
37 templatefilters,
38 util,
38 util,
39 )
39 )
40
40
41 _pack = struct.pack
41 _pack = struct.pack
42 _unpack = struct.unpack
42 _unpack = struct.unpack
43 # Aliased for performance.
43 # Aliased for performance.
44 _zlibdecompress = zlib.decompress
44 _zlibdecompress = zlib.decompress
45
45
46 # revlog header flags
46 # revlog header flags
47 REVLOGV0 = 0
47 REVLOGV0 = 0
48 REVLOGNG = 1
48 REVLOGNG = 1
49 REVLOGNGINLINEDATA = (1 << 16)
49 REVLOGNGINLINEDATA = (1 << 16)
50 REVLOGGENERALDELTA = (1 << 17)
50 REVLOGGENERALDELTA = (1 << 17)
51 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
52 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 REVLOG_DEFAULT_FORMAT = REVLOGNG
53 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
54 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
55
55
56 # revlog index flags
56 # revlog index flags
57 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
58 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
58 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
59 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
59 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
60 REVIDX_DEFAULT_FLAGS = 0
60 REVIDX_DEFAULT_FLAGS = 0
61 # stable order in which flags need to be processed and their processors applied
61 # stable order in which flags need to be processed and their processors applied
62 REVIDX_FLAGS_ORDER = [
62 REVIDX_FLAGS_ORDER = [
63 REVIDX_ISCENSORED,
63 REVIDX_ISCENSORED,
64 REVIDX_ELLIPSIS,
64 REVIDX_ELLIPSIS,
65 REVIDX_EXTSTORED,
65 REVIDX_EXTSTORED,
66 ]
66 ]
67 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
67 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
68
68
69 # max size of revlog with inline data
69 # max size of revlog with inline data
70 _maxinline = 131072
70 _maxinline = 131072
71 _chunksize = 1048576
71 _chunksize = 1048576
72
72
73 RevlogError = error.RevlogError
73 RevlogError = error.RevlogError
74 LookupError = error.LookupError
74 LookupError = error.LookupError
75 CensoredNodeError = error.CensoredNodeError
75 CensoredNodeError = error.CensoredNodeError
76 ProgrammingError = error.ProgrammingError
76 ProgrammingError = error.ProgrammingError
77
77
78 # Store flag processors (cf. 'addflagprocessor()' to register)
78 # Store flag processors (cf. 'addflagprocessor()' to register)
79 _flagprocessors = {
79 _flagprocessors = {
80 REVIDX_ISCENSORED: None,
80 REVIDX_ISCENSORED: None,
81 }
81 }
82
82
83 def addflagprocessor(flag, processor):
83 def addflagprocessor(flag, processor):
84 """Register a flag processor on a revision data flag.
84 """Register a flag processor on a revision data flag.
85
85
86 Invariant:
86 Invariant:
87 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
87 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
88 - Only one flag processor can be registered on a specific flag.
88 - Only one flag processor can be registered on a specific flag.
89 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
89 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
90 following signatures:
90 following signatures:
91 - (read) f(self, rawtext) -> text, bool
91 - (read) f(self, rawtext) -> text, bool
92 - (write) f(self, text) -> rawtext, bool
92 - (write) f(self, text) -> rawtext, bool
93 - (raw) f(self, rawtext) -> bool
93 - (raw) f(self, rawtext) -> bool
94 "text" is presented to the user. "rawtext" is stored in revlog data, not
94 "text" is presented to the user. "rawtext" is stored in revlog data, not
95 directly visible to the user.
95 directly visible to the user.
96 The boolean returned by these transforms is used to determine whether
96 The boolean returned by these transforms is used to determine whether
97 the returned text can be used for hash integrity checking. For example,
97 the returned text can be used for hash integrity checking. For example,
98 if "write" returns False, then "text" is used to generate hash. If
98 if "write" returns False, then "text" is used to generate hash. If
99 "write" returns True, that basically means "rawtext" returned by "write"
99 "write" returns True, that basically means "rawtext" returned by "write"
100 should be used to generate hash. Usually, "write" and "read" return
100 should be used to generate hash. Usually, "write" and "read" return
101 different booleans. And "raw" returns a same boolean as "write".
101 different booleans. And "raw" returns a same boolean as "write".
102
102
103 Note: The 'raw' transform is used for changegroup generation and in some
103 Note: The 'raw' transform is used for changegroup generation and in some
104 debug commands. In this case the transform only indicates whether the
104 debug commands. In this case the transform only indicates whether the
105 contents can be used for hash integrity checks.
105 contents can be used for hash integrity checks.
106 """
106 """
107 if not flag & REVIDX_KNOWN_FLAGS:
107 if not flag & REVIDX_KNOWN_FLAGS:
108 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
108 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
109 raise ProgrammingError(msg)
109 raise ProgrammingError(msg)
110 if flag not in REVIDX_FLAGS_ORDER:
110 if flag not in REVIDX_FLAGS_ORDER:
111 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
111 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
112 raise ProgrammingError(msg)
112 raise ProgrammingError(msg)
113 if flag in _flagprocessors:
113 if flag in _flagprocessors:
114 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
114 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
115 raise error.Abort(msg)
115 raise error.Abort(msg)
116 _flagprocessors[flag] = processor
116 _flagprocessors[flag] = processor
117
117
118 def getoffset(q):
118 def getoffset(q):
119 return int(q >> 16)
119 return int(q >> 16)
120
120
121 def gettype(q):
121 def gettype(q):
122 return int(q & 0xFFFF)
122 return int(q & 0xFFFF)
123
123
124 def offset_type(offset, type):
124 def offset_type(offset, type):
125 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
125 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
126 raise ValueError('unknown revlog index flags')
126 raise ValueError('unknown revlog index flags')
127 return int(int(offset) << 16 | type)
127 return int(int(offset) << 16 | type)
128
128
129 _nullhash = hashlib.sha1(nullid)
129 _nullhash = hashlib.sha1(nullid)
130
130
131 def hash(text, p1, p2):
131 def hash(text, p1, p2):
132 """generate a hash from the given text and its parent hashes
132 """generate a hash from the given text and its parent hashes
133
133
134 This hash combines both the current file contents and its history
134 This hash combines both the current file contents and its history
135 in a manner that makes it easy to distinguish nodes with the same
135 in a manner that makes it easy to distinguish nodes with the same
136 content in the revision graph.
136 content in the revision graph.
137 """
137 """
138 # As of now, if one of the parent node is null, p2 is null
138 # As of now, if one of the parent node is null, p2 is null
139 if p2 == nullid:
139 if p2 == nullid:
140 # deep copy of a hash is faster than creating one
140 # deep copy of a hash is faster than creating one
141 s = _nullhash.copy()
141 s = _nullhash.copy()
142 s.update(p1)
142 s.update(p1)
143 else:
143 else:
144 # none of the parent nodes are nullid
144 # none of the parent nodes are nullid
145 l = [p1, p2]
145 l = [p1, p2]
146 l.sort()
146 l.sort()
147 s = hashlib.sha1(l[0])
147 s = hashlib.sha1(l[0])
148 s.update(l[1])
148 s.update(l[1])
149 s.update(text)
149 s.update(text)
150 return s.digest()
150 return s.digest()
151
151
152 # index v0:
152 # index v0:
153 # 4 bytes: offset
153 # 4 bytes: offset
154 # 4 bytes: compressed length
154 # 4 bytes: compressed length
155 # 4 bytes: base rev
155 # 4 bytes: base rev
156 # 4 bytes: link rev
156 # 4 bytes: link rev
157 # 20 bytes: parent 1 nodeid
157 # 20 bytes: parent 1 nodeid
158 # 20 bytes: parent 2 nodeid
158 # 20 bytes: parent 2 nodeid
159 # 20 bytes: nodeid
159 # 20 bytes: nodeid
160 indexformatv0 = ">4l20s20s20s"
160 indexformatv0 = ">4l20s20s20s"
161
161
162 class revlogoldio(object):
162 class revlogoldio(object):
163 def __init__(self):
163 def __init__(self):
164 self.size = struct.calcsize(indexformatv0)
164 self.size = struct.calcsize(indexformatv0)
165
165
166 def parseindex(self, data, inline):
166 def parseindex(self, data, inline):
167 s = self.size
167 s = self.size
168 index = []
168 index = []
169 nodemap = {nullid: nullrev}
169 nodemap = {nullid: nullrev}
170 n = off = 0
170 n = off = 0
171 l = len(data)
171 l = len(data)
172 while off + s <= l:
172 while off + s <= l:
173 cur = data[off:off + s]
173 cur = data[off:off + s]
174 off += s
174 off += s
175 e = _unpack(indexformatv0, cur)
175 e = _unpack(indexformatv0, cur)
176 # transform to revlogv1 format
176 # transform to revlogv1 format
177 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
177 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
178 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
178 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
179 index.append(e2)
179 index.append(e2)
180 nodemap[e[6]] = n
180 nodemap[e[6]] = n
181 n += 1
181 n += 1
182
182
183 # add the magic null revision at -1
183 # add the magic null revision at -1
184 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
184 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
185
185
186 return index, nodemap, None
186 return index, nodemap, None
187
187
188 def packentry(self, entry, node, version, rev):
188 def packentry(self, entry, node, version, rev):
189 if gettype(entry[0]):
189 if gettype(entry[0]):
190 raise RevlogError(_("index entry flags need RevlogNG"))
190 raise RevlogError(_("index entry flags need RevlogNG"))
191 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
191 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
192 node(entry[5]), node(entry[6]), entry[7])
192 node(entry[5]), node(entry[6]), entry[7])
193 return _pack(indexformatv0, *e2)
193 return _pack(indexformatv0, *e2)
194
194
195 # index ng:
195 # index ng:
196 # 6 bytes: offset
196 # 6 bytes: offset
197 # 2 bytes: flags
197 # 2 bytes: flags
198 # 4 bytes: compressed length
198 # 4 bytes: compressed length
199 # 4 bytes: uncompressed length
199 # 4 bytes: uncompressed length
200 # 4 bytes: base rev
200 # 4 bytes: base rev
201 # 4 bytes: link rev
201 # 4 bytes: link rev
202 # 4 bytes: parent 1 rev
202 # 4 bytes: parent 1 rev
203 # 4 bytes: parent 2 rev
203 # 4 bytes: parent 2 rev
204 # 32 bytes: nodeid
204 # 32 bytes: nodeid
205 indexformatng = ">Qiiiiii20s12x"
205 indexformatng = ">Qiiiiii20s12x"
206 versionformat = ">I"
206 versionformat = ">I"
207
207
208 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
208 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
209 # signed integer)
209 # signed integer)
210 _maxentrysize = 0x7fffffff
210 _maxentrysize = 0x7fffffff
211
211
212 class revlogio(object):
212 class revlogio(object):
213 def __init__(self):
213 def __init__(self):
214 self.size = struct.calcsize(indexformatng)
214 self.size = struct.calcsize(indexformatng)
215
215
216 def parseindex(self, data, inline):
216 def parseindex(self, data, inline):
217 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline)
218 index, cache = parsers.parse_index2(data, inline)
219 return index, getattr(index, 'nodemap', None), cache
219 return index, getattr(index, 'nodemap', None), cache
220
220
221 def packentry(self, entry, node, version, rev):
221 def packentry(self, entry, node, version, rev):
222 p = _pack(indexformatng, *entry)
222 p = _pack(indexformatng, *entry)
223 if rev == 0:
223 if rev == 0:
224 p = _pack(versionformat, version) + p[4:]
224 p = _pack(versionformat, version) + p[4:]
225 return p
225 return p
226
226
227 class revlog(object):
227 class revlog(object):
228 """
228 """
229 the underlying revision storage object
229 the underlying revision storage object
230
230
231 A revlog consists of two parts, an index and the revision data.
231 A revlog consists of two parts, an index and the revision data.
232
232
233 The index is a file with a fixed record size containing
233 The index is a file with a fixed record size containing
234 information on each revision, including its nodeid (hash), the
234 information on each revision, including its nodeid (hash), the
235 nodeids of its parents, the position and offset of its data within
235 nodeids of its parents, the position and offset of its data within
236 the data file, and the revision it's based on. Finally, each entry
236 the data file, and the revision it's based on. Finally, each entry
237 contains a linkrev entry that can serve as a pointer to external
237 contains a linkrev entry that can serve as a pointer to external
238 data.
238 data.
239
239
240 The revision data itself is a linear collection of data chunks.
240 The revision data itself is a linear collection of data chunks.
241 Each chunk represents a revision and is usually represented as a
241 Each chunk represents a revision and is usually represented as a
242 delta against the previous chunk. To bound lookup time, runs of
242 delta against the previous chunk. To bound lookup time, runs of
243 deltas are limited to about 2 times the length of the original
243 deltas are limited to about 2 times the length of the original
244 version data. This makes retrieval of a version proportional to
244 version data. This makes retrieval of a version proportional to
245 its size, or O(1) relative to the number of revisions.
245 its size, or O(1) relative to the number of revisions.
246
246
247 Both pieces of the revlog are written to in an append-only
247 Both pieces of the revlog are written to in an append-only
248 fashion, which means we never need to rewrite a file to insert or
248 fashion, which means we never need to rewrite a file to insert or
249 remove data, and can use some simple techniques to avoid the need
249 remove data, and can use some simple techniques to avoid the need
250 for locking while reading.
250 for locking while reading.
251
251
252 If checkambig, indexfile is opened with checkambig=True at
252 If checkambig, indexfile is opened with checkambig=True at
253 writing, to avoid file stat ambiguity.
253 writing, to avoid file stat ambiguity.
254 """
254 """
255 def __init__(self, opener, indexfile, checkambig=False):
255 def __init__(self, opener, indexfile, checkambig=False):
256 """
256 """
257 create a revlog object
257 create a revlog object
258
258
259 opener is a function that abstracts the file opening operation
259 opener is a function that abstracts the file opening operation
260 and can be used to implement COW semantics or the like.
260 and can be used to implement COW semantics or the like.
261 """
261 """
262 self.indexfile = indexfile
262 self.indexfile = indexfile
263 self.datafile = indexfile[:-2] + ".d"
263 self.datafile = indexfile[:-2] + ".d"
264 self.opener = opener
264 self.opener = opener
265 # When True, indexfile is opened with checkambig=True at writing, to
265 # When True, indexfile is opened with checkambig=True at writing, to
266 # avoid file stat ambiguity.
266 # avoid file stat ambiguity.
267 self._checkambig = checkambig
267 self._checkambig = checkambig
268 # 3-tuple of (node, rev, text) for a raw revision.
268 # 3-tuple of (node, rev, text) for a raw revision.
269 self._cache = None
269 self._cache = None
270 # Maps rev to chain base rev.
270 # Maps rev to chain base rev.
271 self._chainbasecache = util.lrucachedict(100)
271 self._chainbasecache = util.lrucachedict(100)
272 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
272 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
273 self._chunkcache = (0, '')
273 self._chunkcache = (0, '')
274 # How much data to read and cache into the raw revlog data cache.
274 # How much data to read and cache into the raw revlog data cache.
275 self._chunkcachesize = 65536
275 self._chunkcachesize = 65536
276 self._maxchainlen = None
276 self._maxchainlen = None
277 self._aggressivemergedeltas = False
277 self._aggressivemergedeltas = False
278 self.index = []
278 self.index = []
279 # Mapping of partial identifiers to full nodes.
279 # Mapping of partial identifiers to full nodes.
280 self._pcache = {}
280 self._pcache = {}
281 # Mapping of revision integer to full node.
281 # Mapping of revision integer to full node.
282 self._nodecache = {nullid: nullrev}
282 self._nodecache = {nullid: nullrev}
283 self._nodepos = None
283 self._nodepos = None
284 self._compengine = 'zlib'
284 self._compengine = 'zlib'
285
285
286 v = REVLOG_DEFAULT_VERSION
286 v = REVLOG_DEFAULT_VERSION
287 opts = getattr(opener, 'options', None)
287 opts = getattr(opener, 'options', None)
288 if opts is not None:
288 if opts is not None:
289 if 'revlogv1' in opts:
289 if 'revlogv1' in opts:
290 if 'generaldelta' in opts:
290 if 'generaldelta' in opts:
291 v |= REVLOGGENERALDELTA
291 v |= REVLOGGENERALDELTA
292 else:
292 else:
293 v = 0
293 v = 0
294 if 'chunkcachesize' in opts:
294 if 'chunkcachesize' in opts:
295 self._chunkcachesize = opts['chunkcachesize']
295 self._chunkcachesize = opts['chunkcachesize']
296 if 'maxchainlen' in opts:
296 if 'maxchainlen' in opts:
297 self._maxchainlen = opts['maxchainlen']
297 self._maxchainlen = opts['maxchainlen']
298 if 'aggressivemergedeltas' in opts:
298 if 'aggressivemergedeltas' in opts:
299 self._aggressivemergedeltas = opts['aggressivemergedeltas']
299 self._aggressivemergedeltas = opts['aggressivemergedeltas']
300 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
300 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
301 if 'compengine' in opts:
301 if 'compengine' in opts:
302 self._compengine = opts['compengine']
302 self._compengine = opts['compengine']
303
303
304 if self._chunkcachesize <= 0:
304 if self._chunkcachesize <= 0:
305 raise RevlogError(_('revlog chunk cache size %r is not greater '
305 raise RevlogError(_('revlog chunk cache size %r is not greater '
306 'than 0') % self._chunkcachesize)
306 'than 0') % self._chunkcachesize)
307 elif self._chunkcachesize & (self._chunkcachesize - 1):
307 elif self._chunkcachesize & (self._chunkcachesize - 1):
308 raise RevlogError(_('revlog chunk cache size %r is not a power '
308 raise RevlogError(_('revlog chunk cache size %r is not a power '
309 'of 2') % self._chunkcachesize)
309 'of 2') % self._chunkcachesize)
310
310
311 indexdata = ''
311 indexdata = ''
312 self._initempty = True
312 self._initempty = True
313 try:
313 try:
314 f = self.opener(self.indexfile)
314 f = self.opener(self.indexfile)
315 indexdata = f.read()
315 indexdata = f.read()
316 f.close()
316 f.close()
317 if len(indexdata) > 0:
317 if len(indexdata) > 0:
318 v = struct.unpack(versionformat, indexdata[:4])[0]
318 v = struct.unpack(versionformat, indexdata[:4])[0]
319 self._initempty = False
319 self._initempty = False
320 except IOError as inst:
320 except IOError as inst:
321 if inst.errno != errno.ENOENT:
321 if inst.errno != errno.ENOENT:
322 raise
322 raise
323
323
324 self.version = v
324 self.version = v
325 self._inline = v & REVLOGNGINLINEDATA
325 self._inline = v & REVLOGNGINLINEDATA
326 self._generaldelta = v & REVLOGGENERALDELTA
326 self._generaldelta = v & REVLOGGENERALDELTA
327 flags = v & ~0xFFFF
327 flags = v & ~0xFFFF
328 fmt = v & 0xFFFF
328 fmt = v & 0xFFFF
329 if fmt == REVLOGV0 and flags:
329 if fmt == REVLOGV0 and flags:
330 raise RevlogError(_("index %s unknown flags %#04x for format v0")
330 raise RevlogError(_("index %s unknown flags %#04x for format v0")
331 % (self.indexfile, flags >> 16))
331 % (self.indexfile, flags >> 16))
332 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
332 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
333 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
333 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
334 % (self.indexfile, flags >> 16))
334 % (self.indexfile, flags >> 16))
335 elif fmt > REVLOGNG:
335 elif fmt > REVLOGNG:
336 raise RevlogError(_("index %s unknown format %d")
336 raise RevlogError(_("index %s unknown format %d")
337 % (self.indexfile, fmt))
337 % (self.indexfile, fmt))
338
338
339 self.storedeltachains = True
339 self.storedeltachains = True
340
340
341 self._io = revlogio()
341 self._io = revlogio()
342 if self.version == REVLOGV0:
342 if self.version == REVLOGV0:
343 self._io = revlogoldio()
343 self._io = revlogoldio()
344 try:
344 try:
345 d = self._io.parseindex(indexdata, self._inline)
345 d = self._io.parseindex(indexdata, self._inline)
346 except (ValueError, IndexError):
346 except (ValueError, IndexError):
347 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
347 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
348 self.index, nodemap, self._chunkcache = d
348 self.index, nodemap, self._chunkcache = d
349 if nodemap is not None:
349 if nodemap is not None:
350 self.nodemap = self._nodecache = nodemap
350 self.nodemap = self._nodecache = nodemap
351 if not self._chunkcache:
351 if not self._chunkcache:
352 self._chunkclear()
352 self._chunkclear()
353 # revnum -> (chain-length, sum-delta-length)
353 # revnum -> (chain-length, sum-delta-length)
354 self._chaininfocache = {}
354 self._chaininfocache = {}
355 # revlog header -> revlog compressor
355 # revlog header -> revlog compressor
356 self._decompressors = {}
356 self._decompressors = {}
357
357
358 @util.propertycache
358 @util.propertycache
359 def _compressor(self):
359 def _compressor(self):
360 return util.compengines[self._compengine].revlogcompressor()
360 return util.compengines[self._compengine].revlogcompressor()
361
361
362 def tip(self):
362 def tip(self):
363 return self.node(len(self.index) - 2)
363 return self.node(len(self.index) - 2)
364 def __contains__(self, rev):
364 def __contains__(self, rev):
365 return 0 <= rev < len(self)
365 return 0 <= rev < len(self)
366 def __len__(self):
366 def __len__(self):
367 return len(self.index) - 1
367 return len(self.index) - 1
368 def __iter__(self):
368 def __iter__(self):
369 return iter(xrange(len(self)))
369 return iter(xrange(len(self)))
370 def revs(self, start=0, stop=None):
370 def revs(self, start=0, stop=None):
371 """iterate over all rev in this revlog (from start to stop)"""
371 """iterate over all rev in this revlog (from start to stop)"""
372 step = 1
372 step = 1
373 if stop is not None:
373 if stop is not None:
374 if start > stop:
374 if start > stop:
375 step = -1
375 step = -1
376 stop += step
376 stop += step
377 else:
377 else:
378 stop = len(self)
378 stop = len(self)
379 return xrange(start, stop, step)
379 return xrange(start, stop, step)
380
380
381 @util.propertycache
381 @util.propertycache
382 def nodemap(self):
382 def nodemap(self):
383 self.rev(self.node(0))
383 self.rev(self.node(0))
384 return self._nodecache
384 return self._nodecache
385
385
386 def hasnode(self, node):
386 def hasnode(self, node):
387 try:
387 try:
388 self.rev(node)
388 self.rev(node)
389 return True
389 return True
390 except KeyError:
390 except KeyError:
391 return False
391 return False
392
392
393 def clearcaches(self):
393 def clearcaches(self):
394 self._cache = None
394 self._cache = None
395 self._chainbasecache.clear()
395 self._chainbasecache.clear()
396 self._chunkcache = (0, '')
396 self._chunkcache = (0, '')
397 self._pcache = {}
397 self._pcache = {}
398
398
399 try:
399 try:
400 self._nodecache.clearcaches()
400 self._nodecache.clearcaches()
401 except AttributeError:
401 except AttributeError:
402 self._nodecache = {nullid: nullrev}
402 self._nodecache = {nullid: nullrev}
403 self._nodepos = None
403 self._nodepos = None
404
404
405 def rev(self, node):
405 def rev(self, node):
406 try:
406 try:
407 return self._nodecache[node]
407 return self._nodecache[node]
408 except TypeError:
408 except TypeError:
409 raise
409 raise
410 except RevlogError:
410 except RevlogError:
411 # parsers.c radix tree lookup failed
411 # parsers.c radix tree lookup failed
412 raise LookupError(node, self.indexfile, _('no node'))
412 raise LookupError(node, self.indexfile, _('no node'))
413 except KeyError:
413 except KeyError:
414 # pure python cache lookup failed
414 # pure python cache lookup failed
415 n = self._nodecache
415 n = self._nodecache
416 i = self.index
416 i = self.index
417 p = self._nodepos
417 p = self._nodepos
418 if p is None:
418 if p is None:
419 p = len(i) - 2
419 p = len(i) - 2
420 for r in xrange(p, -1, -1):
420 for r in xrange(p, -1, -1):
421 v = i[r][7]
421 v = i[r][7]
422 n[v] = r
422 n[v] = r
423 if v == node:
423 if v == node:
424 self._nodepos = r - 1
424 self._nodepos = r - 1
425 return r
425 return r
426 raise LookupError(node, self.indexfile, _('no node'))
426 raise LookupError(node, self.indexfile, _('no node'))
427
427
428 # Accessors for index entries.
428 # Accessors for index entries.
429
429
430 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
430 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
431 # are flags.
431 # are flags.
432 def start(self, rev):
432 def start(self, rev):
433 return int(self.index[rev][0] >> 16)
433 return int(self.index[rev][0] >> 16)
434
434
435 def flags(self, rev):
435 def flags(self, rev):
436 return self.index[rev][0] & 0xFFFF
436 return self.index[rev][0] & 0xFFFF
437
437
438 def length(self, rev):
438 def length(self, rev):
439 return self.index[rev][1]
439 return self.index[rev][1]
440
440
441 def rawsize(self, rev):
441 def rawsize(self, rev):
442 """return the length of the uncompressed text for a given revision"""
442 """return the length of the uncompressed text for a given revision"""
443 l = self.index[rev][2]
443 l = self.index[rev][2]
444 if l >= 0:
444 if l >= 0:
445 return l
445 return l
446
446
447 t = self.revision(rev, raw=True)
447 t = self.revision(rev, raw=True)
448 return len(t)
448 return len(t)
449 size = rawsize
449 size = rawsize
450
450
451 def chainbase(self, rev):
451 def chainbase(self, rev):
452 base = self._chainbasecache.get(rev)
452 base = self._chainbasecache.get(rev)
453 if base is not None:
453 if base is not None:
454 return base
454 return base
455
455
456 index = self.index
456 index = self.index
457 base = index[rev][3]
457 base = index[rev][3]
458 while base != rev:
458 while base != rev:
459 rev = base
459 rev = base
460 base = index[rev][3]
460 base = index[rev][3]
461
461
462 self._chainbasecache[rev] = base
462 self._chainbasecache[rev] = base
463 return base
463 return base
464
464
465 def linkrev(self, rev):
465 def linkrev(self, rev):
466 return self.index[rev][4]
466 return self.index[rev][4]
467
467
468 def parentrevs(self, rev):
468 def parentrevs(self, rev):
469 return self.index[rev][5:7]
469 return self.index[rev][5:7]
470
470
471 def node(self, rev):
471 def node(self, rev):
472 return self.index[rev][7]
472 return self.index[rev][7]
473
473
474 # Derived from index values.
474 # Derived from index values.
475
475
476 def end(self, rev):
476 def end(self, rev):
477 return self.start(rev) + self.length(rev)
477 return self.start(rev) + self.length(rev)
478
478
479 def parents(self, node):
479 def parents(self, node):
480 i = self.index
480 i = self.index
481 d = i[self.rev(node)]
481 d = i[self.rev(node)]
482 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
482 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
483
483
484 def chainlen(self, rev):
484 def chainlen(self, rev):
485 return self._chaininfo(rev)[0]
485 return self._chaininfo(rev)[0]
486
486
487 def _chaininfo(self, rev):
487 def _chaininfo(self, rev):
488 chaininfocache = self._chaininfocache
488 chaininfocache = self._chaininfocache
489 if rev in chaininfocache:
489 if rev in chaininfocache:
490 return chaininfocache[rev]
490 return chaininfocache[rev]
491 index = self.index
491 index = self.index
492 generaldelta = self._generaldelta
492 generaldelta = self._generaldelta
493 iterrev = rev
493 iterrev = rev
494 e = index[iterrev]
494 e = index[iterrev]
495 clen = 0
495 clen = 0
496 compresseddeltalen = 0
496 compresseddeltalen = 0
497 while iterrev != e[3]:
497 while iterrev != e[3]:
498 clen += 1
498 clen += 1
499 compresseddeltalen += e[1]
499 compresseddeltalen += e[1]
500 if generaldelta:
500 if generaldelta:
501 iterrev = e[3]
501 iterrev = e[3]
502 else:
502 else:
503 iterrev -= 1
503 iterrev -= 1
504 if iterrev in chaininfocache:
504 if iterrev in chaininfocache:
505 t = chaininfocache[iterrev]
505 t = chaininfocache[iterrev]
506 clen += t[0]
506 clen += t[0]
507 compresseddeltalen += t[1]
507 compresseddeltalen += t[1]
508 break
508 break
509 e = index[iterrev]
509 e = index[iterrev]
510 else:
510 else:
511 # Add text length of base since decompressing that also takes
511 # Add text length of base since decompressing that also takes
512 # work. For cache hits the length is already included.
512 # work. For cache hits the length is already included.
513 compresseddeltalen += e[1]
513 compresseddeltalen += e[1]
514 r = (clen, compresseddeltalen)
514 r = (clen, compresseddeltalen)
515 chaininfocache[rev] = r
515 chaininfocache[rev] = r
516 return r
516 return r
517
517
518 def _deltachain(self, rev, stoprev=None):
518 def _deltachain(self, rev, stoprev=None):
519 """Obtain the delta chain for a revision.
519 """Obtain the delta chain for a revision.
520
520
521 ``stoprev`` specifies a revision to stop at. If not specified, we
521 ``stoprev`` specifies a revision to stop at. If not specified, we
522 stop at the base of the chain.
522 stop at the base of the chain.
523
523
524 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
524 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
525 revs in ascending order and ``stopped`` is a bool indicating whether
525 revs in ascending order and ``stopped`` is a bool indicating whether
526 ``stoprev`` was hit.
526 ``stoprev`` was hit.
527 """
527 """
528 chain = []
528 chain = []
529
529
530 # Alias to prevent attribute lookup in tight loop.
530 # Alias to prevent attribute lookup in tight loop.
531 index = self.index
531 index = self.index
532 generaldelta = self._generaldelta
532 generaldelta = self._generaldelta
533
533
534 iterrev = rev
534 iterrev = rev
535 e = index[iterrev]
535 e = index[iterrev]
536 while iterrev != e[3] and iterrev != stoprev:
536 while iterrev != e[3] and iterrev != stoprev:
537 chain.append(iterrev)
537 chain.append(iterrev)
538 if generaldelta:
538 if generaldelta:
539 iterrev = e[3]
539 iterrev = e[3]
540 else:
540 else:
541 iterrev -= 1
541 iterrev -= 1
542 e = index[iterrev]
542 e = index[iterrev]
543
543
544 if iterrev == stoprev:
544 if iterrev == stoprev:
545 stopped = True
545 stopped = True
546 else:
546 else:
547 chain.append(iterrev)
547 chain.append(iterrev)
548 stopped = False
548 stopped = False
549
549
550 chain.reverse()
550 chain.reverse()
551 return chain, stopped
551 return chain, stopped
552
552
553 def ancestors(self, revs, stoprev=0, inclusive=False):
553 def ancestors(self, revs, stoprev=0, inclusive=False):
554 """Generate the ancestors of 'revs' in reverse topological order.
554 """Generate the ancestors of 'revs' in reverse topological order.
555 Does not generate revs lower than stoprev.
555 Does not generate revs lower than stoprev.
556
556
557 See the documentation for ancestor.lazyancestors for more details."""
557 See the documentation for ancestor.lazyancestors for more details."""
558
558
559 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
559 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
560 inclusive=inclusive)
560 inclusive=inclusive)
561
561
562 def descendants(self, revs):
562 def descendants(self, revs):
563 """Generate the descendants of 'revs' in revision order.
563 """Generate the descendants of 'revs' in revision order.
564
564
565 Yield a sequence of revision numbers starting with a child of
565 Yield a sequence of revision numbers starting with a child of
566 some rev in revs, i.e., each revision is *not* considered a
566 some rev in revs, i.e., each revision is *not* considered a
567 descendant of itself. Results are ordered by revision number (a
567 descendant of itself. Results are ordered by revision number (a
568 topological sort)."""
568 topological sort)."""
569 first = min(revs)
569 first = min(revs)
570 if first == nullrev:
570 if first == nullrev:
571 for i in self:
571 for i in self:
572 yield i
572 yield i
573 return
573 return
574
574
575 seen = set(revs)
575 seen = set(revs)
576 for i in self.revs(start=first + 1):
576 for i in self.revs(start=first + 1):
577 for x in self.parentrevs(i):
577 for x in self.parentrevs(i):
578 if x != nullrev and x in seen:
578 if x != nullrev and x in seen:
579 seen.add(i)
579 seen.add(i)
580 yield i
580 yield i
581 break
581 break
582
582
583 def findcommonmissing(self, common=None, heads=None):
583 def findcommonmissing(self, common=None, heads=None):
584 """Return a tuple of the ancestors of common and the ancestors of heads
584 """Return a tuple of the ancestors of common and the ancestors of heads
585 that are not ancestors of common. In revset terminology, we return the
585 that are not ancestors of common. In revset terminology, we return the
586 tuple:
586 tuple:
587
587
588 ::common, (::heads) - (::common)
588 ::common, (::heads) - (::common)
589
589
590 The list is sorted by revision number, meaning it is
590 The list is sorted by revision number, meaning it is
591 topologically sorted.
591 topologically sorted.
592
592
593 'heads' and 'common' are both lists of node IDs. If heads is
593 'heads' and 'common' are both lists of node IDs. If heads is
594 not supplied, uses all of the revlog's heads. If common is not
594 not supplied, uses all of the revlog's heads. If common is not
595 supplied, uses nullid."""
595 supplied, uses nullid."""
596 if common is None:
596 if common is None:
597 common = [nullid]
597 common = [nullid]
598 if heads is None:
598 if heads is None:
599 heads = self.heads()
599 heads = self.heads()
600
600
601 common = [self.rev(n) for n in common]
601 common = [self.rev(n) for n in common]
602 heads = [self.rev(n) for n in heads]
602 heads = [self.rev(n) for n in heads]
603
603
604 # we want the ancestors, but inclusive
604 # we want the ancestors, but inclusive
605 class lazyset(object):
605 class lazyset(object):
606 def __init__(self, lazyvalues):
606 def __init__(self, lazyvalues):
607 self.addedvalues = set()
607 self.addedvalues = set()
608 self.lazyvalues = lazyvalues
608 self.lazyvalues = lazyvalues
609
609
610 def __contains__(self, value):
610 def __contains__(self, value):
611 return value in self.addedvalues or value in self.lazyvalues
611 return value in self.addedvalues or value in self.lazyvalues
612
612
613 def __iter__(self):
613 def __iter__(self):
614 added = self.addedvalues
614 added = self.addedvalues
615 for r in added:
615 for r in added:
616 yield r
616 yield r
617 for r in self.lazyvalues:
617 for r in self.lazyvalues:
618 if not r in added:
618 if not r in added:
619 yield r
619 yield r
620
620
621 def add(self, value):
621 def add(self, value):
622 self.addedvalues.add(value)
622 self.addedvalues.add(value)
623
623
624 def update(self, values):
624 def update(self, values):
625 self.addedvalues.update(values)
625 self.addedvalues.update(values)
626
626
627 has = lazyset(self.ancestors(common))
627 has = lazyset(self.ancestors(common))
628 has.add(nullrev)
628 has.add(nullrev)
629 has.update(common)
629 has.update(common)
630
630
631 # take all ancestors from heads that aren't in has
631 # take all ancestors from heads that aren't in has
632 missing = set()
632 missing = set()
633 visit = collections.deque(r for r in heads if r not in has)
633 visit = collections.deque(r for r in heads if r not in has)
634 while visit:
634 while visit:
635 r = visit.popleft()
635 r = visit.popleft()
636 if r in missing:
636 if r in missing:
637 continue
637 continue
638 else:
638 else:
639 missing.add(r)
639 missing.add(r)
640 for p in self.parentrevs(r):
640 for p in self.parentrevs(r):
641 if p not in has:
641 if p not in has:
642 visit.append(p)
642 visit.append(p)
643 missing = list(missing)
643 missing = list(missing)
644 missing.sort()
644 missing.sort()
645 return has, [self.node(miss) for miss in missing]
645 return has, [self.node(miss) for miss in missing]
646
646
647 def incrementalmissingrevs(self, common=None):
647 def incrementalmissingrevs(self, common=None):
648 """Return an object that can be used to incrementally compute the
648 """Return an object that can be used to incrementally compute the
649 revision numbers of the ancestors of arbitrary sets that are not
649 revision numbers of the ancestors of arbitrary sets that are not
650 ancestors of common. This is an ancestor.incrementalmissingancestors
650 ancestors of common. This is an ancestor.incrementalmissingancestors
651 object.
651 object.
652
652
653 'common' is a list of revision numbers. If common is not supplied, uses
653 'common' is a list of revision numbers. If common is not supplied, uses
654 nullrev.
654 nullrev.
655 """
655 """
656 if common is None:
656 if common is None:
657 common = [nullrev]
657 common = [nullrev]
658
658
659 return ancestor.incrementalmissingancestors(self.parentrevs, common)
659 return ancestor.incrementalmissingancestors(self.parentrevs, common)
660
660
661 def findmissingrevs(self, common=None, heads=None):
661 def findmissingrevs(self, common=None, heads=None):
662 """Return the revision numbers of the ancestors of heads that
662 """Return the revision numbers of the ancestors of heads that
663 are not ancestors of common.
663 are not ancestors of common.
664
664
665 More specifically, return a list of revision numbers corresponding to
665 More specifically, return a list of revision numbers corresponding to
666 nodes N such that every N satisfies the following constraints:
666 nodes N such that every N satisfies the following constraints:
667
667
668 1. N is an ancestor of some node in 'heads'
668 1. N is an ancestor of some node in 'heads'
669 2. N is not an ancestor of any node in 'common'
669 2. N is not an ancestor of any node in 'common'
670
670
671 The list is sorted by revision number, meaning it is
671 The list is sorted by revision number, meaning it is
672 topologically sorted.
672 topologically sorted.
673
673
674 'heads' and 'common' are both lists of revision numbers. If heads is
674 'heads' and 'common' are both lists of revision numbers. If heads is
675 not supplied, uses all of the revlog's heads. If common is not
675 not supplied, uses all of the revlog's heads. If common is not
676 supplied, uses nullid."""
676 supplied, uses nullid."""
677 if common is None:
677 if common is None:
678 common = [nullrev]
678 common = [nullrev]
679 if heads is None:
679 if heads is None:
680 heads = self.headrevs()
680 heads = self.headrevs()
681
681
682 inc = self.incrementalmissingrevs(common=common)
682 inc = self.incrementalmissingrevs(common=common)
683 return inc.missingancestors(heads)
683 return inc.missingancestors(heads)
684
684
685 def findmissing(self, common=None, heads=None):
685 def findmissing(self, common=None, heads=None):
686 """Return the ancestors of heads that are not ancestors of common.
686 """Return the ancestors of heads that are not ancestors of common.
687
687
688 More specifically, return a list of nodes N such that every N
688 More specifically, return a list of nodes N such that every N
689 satisfies the following constraints:
689 satisfies the following constraints:
690
690
691 1. N is an ancestor of some node in 'heads'
691 1. N is an ancestor of some node in 'heads'
692 2. N is not an ancestor of any node in 'common'
692 2. N is not an ancestor of any node in 'common'
693
693
694 The list is sorted by revision number, meaning it is
694 The list is sorted by revision number, meaning it is
695 topologically sorted.
695 topologically sorted.
696
696
697 'heads' and 'common' are both lists of node IDs. If heads is
697 'heads' and 'common' are both lists of node IDs. If heads is
698 not supplied, uses all of the revlog's heads. If common is not
698 not supplied, uses all of the revlog's heads. If common is not
699 supplied, uses nullid."""
699 supplied, uses nullid."""
700 if common is None:
700 if common is None:
701 common = [nullid]
701 common = [nullid]
702 if heads is None:
702 if heads is None:
703 heads = self.heads()
703 heads = self.heads()
704
704
705 common = [self.rev(n) for n in common]
705 common = [self.rev(n) for n in common]
706 heads = [self.rev(n) for n in heads]
706 heads = [self.rev(n) for n in heads]
707
707
708 inc = self.incrementalmissingrevs(common=common)
708 inc = self.incrementalmissingrevs(common=common)
709 return [self.node(r) for r in inc.missingancestors(heads)]
709 return [self.node(r) for r in inc.missingancestors(heads)]
710
710
711 def nodesbetween(self, roots=None, heads=None):
711 def nodesbetween(self, roots=None, heads=None):
712 """Return a topological path from 'roots' to 'heads'.
712 """Return a topological path from 'roots' to 'heads'.
713
713
714 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
714 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
715 topologically sorted list of all nodes N that satisfy both of
715 topologically sorted list of all nodes N that satisfy both of
716 these constraints:
716 these constraints:
717
717
718 1. N is a descendant of some node in 'roots'
718 1. N is a descendant of some node in 'roots'
719 2. N is an ancestor of some node in 'heads'
719 2. N is an ancestor of some node in 'heads'
720
720
721 Every node is considered to be both a descendant and an ancestor
721 Every node is considered to be both a descendant and an ancestor
722 of itself, so every reachable node in 'roots' and 'heads' will be
722 of itself, so every reachable node in 'roots' and 'heads' will be
723 included in 'nodes'.
723 included in 'nodes'.
724
724
725 'outroots' is the list of reachable nodes in 'roots', i.e., the
725 'outroots' is the list of reachable nodes in 'roots', i.e., the
726 subset of 'roots' that is returned in 'nodes'. Likewise,
726 subset of 'roots' that is returned in 'nodes'. Likewise,
727 'outheads' is the subset of 'heads' that is also in 'nodes'.
727 'outheads' is the subset of 'heads' that is also in 'nodes'.
728
728
729 'roots' and 'heads' are both lists of node IDs. If 'roots' is
729 'roots' and 'heads' are both lists of node IDs. If 'roots' is
730 unspecified, uses nullid as the only root. If 'heads' is
730 unspecified, uses nullid as the only root. If 'heads' is
731 unspecified, uses list of all of the revlog's heads."""
731 unspecified, uses list of all of the revlog's heads."""
732 nonodes = ([], [], [])
732 nonodes = ([], [], [])
733 if roots is not None:
733 if roots is not None:
734 roots = list(roots)
734 roots = list(roots)
735 if not roots:
735 if not roots:
736 return nonodes
736 return nonodes
737 lowestrev = min([self.rev(n) for n in roots])
737 lowestrev = min([self.rev(n) for n in roots])
738 else:
738 else:
739 roots = [nullid] # Everybody's a descendant of nullid
739 roots = [nullid] # Everybody's a descendant of nullid
740 lowestrev = nullrev
740 lowestrev = nullrev
741 if (lowestrev == nullrev) and (heads is None):
741 if (lowestrev == nullrev) and (heads is None):
742 # We want _all_ the nodes!
742 # We want _all_ the nodes!
743 return ([self.node(r) for r in self], [nullid], list(self.heads()))
743 return ([self.node(r) for r in self], [nullid], list(self.heads()))
744 if heads is None:
744 if heads is None:
745 # All nodes are ancestors, so the latest ancestor is the last
745 # All nodes are ancestors, so the latest ancestor is the last
746 # node.
746 # node.
747 highestrev = len(self) - 1
747 highestrev = len(self) - 1
748 # Set ancestors to None to signal that every node is an ancestor.
748 # Set ancestors to None to signal that every node is an ancestor.
749 ancestors = None
749 ancestors = None
750 # Set heads to an empty dictionary for later discovery of heads
750 # Set heads to an empty dictionary for later discovery of heads
751 heads = {}
751 heads = {}
752 else:
752 else:
753 heads = list(heads)
753 heads = list(heads)
754 if not heads:
754 if not heads:
755 return nonodes
755 return nonodes
756 ancestors = set()
756 ancestors = set()
757 # Turn heads into a dictionary so we can remove 'fake' heads.
757 # Turn heads into a dictionary so we can remove 'fake' heads.
758 # Also, later we will be using it to filter out the heads we can't
758 # Also, later we will be using it to filter out the heads we can't
759 # find from roots.
759 # find from roots.
760 heads = dict.fromkeys(heads, False)
760 heads = dict.fromkeys(heads, False)
761 # Start at the top and keep marking parents until we're done.
761 # Start at the top and keep marking parents until we're done.
762 nodestotag = set(heads)
762 nodestotag = set(heads)
763 # Remember where the top was so we can use it as a limit later.
763 # Remember where the top was so we can use it as a limit later.
764 highestrev = max([self.rev(n) for n in nodestotag])
764 highestrev = max([self.rev(n) for n in nodestotag])
765 while nodestotag:
765 while nodestotag:
766 # grab a node to tag
766 # grab a node to tag
767 n = nodestotag.pop()
767 n = nodestotag.pop()
768 # Never tag nullid
768 # Never tag nullid
769 if n == nullid:
769 if n == nullid:
770 continue
770 continue
771 # A node's revision number represents its place in a
771 # A node's revision number represents its place in a
772 # topologically sorted list of nodes.
772 # topologically sorted list of nodes.
773 r = self.rev(n)
773 r = self.rev(n)
774 if r >= lowestrev:
774 if r >= lowestrev:
775 if n not in ancestors:
775 if n not in ancestors:
776 # If we are possibly a descendant of one of the roots
776 # If we are possibly a descendant of one of the roots
777 # and we haven't already been marked as an ancestor
777 # and we haven't already been marked as an ancestor
778 ancestors.add(n) # Mark as ancestor
778 ancestors.add(n) # Mark as ancestor
779 # Add non-nullid parents to list of nodes to tag.
779 # Add non-nullid parents to list of nodes to tag.
780 nodestotag.update([p for p in self.parents(n) if
780 nodestotag.update([p for p in self.parents(n) if
781 p != nullid])
781 p != nullid])
782 elif n in heads: # We've seen it before, is it a fake head?
782 elif n in heads: # We've seen it before, is it a fake head?
783 # So it is, real heads should not be the ancestors of
783 # So it is, real heads should not be the ancestors of
784 # any other heads.
784 # any other heads.
785 heads.pop(n)
785 heads.pop(n)
786 if not ancestors:
786 if not ancestors:
787 return nonodes
787 return nonodes
788 # Now that we have our set of ancestors, we want to remove any
788 # Now that we have our set of ancestors, we want to remove any
789 # roots that are not ancestors.
789 # roots that are not ancestors.
790
790
791 # If one of the roots was nullid, everything is included anyway.
791 # If one of the roots was nullid, everything is included anyway.
792 if lowestrev > nullrev:
792 if lowestrev > nullrev:
793 # But, since we weren't, let's recompute the lowest rev to not
793 # But, since we weren't, let's recompute the lowest rev to not
794 # include roots that aren't ancestors.
794 # include roots that aren't ancestors.
795
795
796 # Filter out roots that aren't ancestors of heads
796 # Filter out roots that aren't ancestors of heads
797 roots = [root for root in roots if root in ancestors]
797 roots = [root for root in roots if root in ancestors]
798 # Recompute the lowest revision
798 # Recompute the lowest revision
799 if roots:
799 if roots:
800 lowestrev = min([self.rev(root) for root in roots])
800 lowestrev = min([self.rev(root) for root in roots])
801 else:
801 else:
802 # No more roots? Return empty list
802 # No more roots? Return empty list
803 return nonodes
803 return nonodes
804 else:
804 else:
805 # We are descending from nullid, and don't need to care about
805 # We are descending from nullid, and don't need to care about
806 # any other roots.
806 # any other roots.
807 lowestrev = nullrev
807 lowestrev = nullrev
808 roots = [nullid]
808 roots = [nullid]
809 # Transform our roots list into a set.
809 # Transform our roots list into a set.
810 descendants = set(roots)
810 descendants = set(roots)
811 # Also, keep the original roots so we can filter out roots that aren't
811 # Also, keep the original roots so we can filter out roots that aren't
812 # 'real' roots (i.e. are descended from other roots).
812 # 'real' roots (i.e. are descended from other roots).
813 roots = descendants.copy()
813 roots = descendants.copy()
814 # Our topologically sorted list of output nodes.
814 # Our topologically sorted list of output nodes.
815 orderedout = []
815 orderedout = []
816 # Don't start at nullid since we don't want nullid in our output list,
816 # Don't start at nullid since we don't want nullid in our output list,
817 # and if nullid shows up in descendants, empty parents will look like
817 # and if nullid shows up in descendants, empty parents will look like
818 # they're descendants.
818 # they're descendants.
819 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
819 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
820 n = self.node(r)
820 n = self.node(r)
821 isdescendant = False
821 isdescendant = False
822 if lowestrev == nullrev: # Everybody is a descendant of nullid
822 if lowestrev == nullrev: # Everybody is a descendant of nullid
823 isdescendant = True
823 isdescendant = True
824 elif n in descendants:
824 elif n in descendants:
825 # n is already a descendant
825 # n is already a descendant
826 isdescendant = True
826 isdescendant = True
827 # This check only needs to be done here because all the roots
827 # This check only needs to be done here because all the roots
828 # will start being marked is descendants before the loop.
828 # will start being marked is descendants before the loop.
829 if n in roots:
829 if n in roots:
830 # If n was a root, check if it's a 'real' root.
830 # If n was a root, check if it's a 'real' root.
831 p = tuple(self.parents(n))
831 p = tuple(self.parents(n))
832 # If any of its parents are descendants, it's not a root.
832 # If any of its parents are descendants, it's not a root.
833 if (p[0] in descendants) or (p[1] in descendants):
833 if (p[0] in descendants) or (p[1] in descendants):
834 roots.remove(n)
834 roots.remove(n)
835 else:
835 else:
836 p = tuple(self.parents(n))
836 p = tuple(self.parents(n))
837 # A node is a descendant if either of its parents are
837 # A node is a descendant if either of its parents are
838 # descendants. (We seeded the dependents list with the roots
838 # descendants. (We seeded the dependents list with the roots
839 # up there, remember?)
839 # up there, remember?)
840 if (p[0] in descendants) or (p[1] in descendants):
840 if (p[0] in descendants) or (p[1] in descendants):
841 descendants.add(n)
841 descendants.add(n)
842 isdescendant = True
842 isdescendant = True
843 if isdescendant and ((ancestors is None) or (n in ancestors)):
843 if isdescendant and ((ancestors is None) or (n in ancestors)):
844 # Only include nodes that are both descendants and ancestors.
844 # Only include nodes that are both descendants and ancestors.
845 orderedout.append(n)
845 orderedout.append(n)
846 if (ancestors is not None) and (n in heads):
846 if (ancestors is not None) and (n in heads):
847 # We're trying to figure out which heads are reachable
847 # We're trying to figure out which heads are reachable
848 # from roots.
848 # from roots.
849 # Mark this head as having been reached
849 # Mark this head as having been reached
850 heads[n] = True
850 heads[n] = True
851 elif ancestors is None:
851 elif ancestors is None:
852 # Otherwise, we're trying to discover the heads.
852 # Otherwise, we're trying to discover the heads.
853 # Assume this is a head because if it isn't, the next step
853 # Assume this is a head because if it isn't, the next step
854 # will eventually remove it.
854 # will eventually remove it.
855 heads[n] = True
855 heads[n] = True
856 # But, obviously its parents aren't.
856 # But, obviously its parents aren't.
857 for p in self.parents(n):
857 for p in self.parents(n):
858 heads.pop(p, None)
858 heads.pop(p, None)
859 heads = [head for head, flag in heads.iteritems() if flag]
859 heads = [head for head, flag in heads.iteritems() if flag]
860 roots = list(roots)
860 roots = list(roots)
861 assert orderedout
861 assert orderedout
862 assert roots
862 assert roots
863 assert heads
863 assert heads
864 return (orderedout, roots, heads)
864 return (orderedout, roots, heads)
865
865
866 def headrevs(self):
866 def headrevs(self):
867 try:
867 try:
868 return self.index.headrevs()
868 return self.index.headrevs()
869 except AttributeError:
869 except AttributeError:
870 return self._headrevs()
870 return self._headrevs()
871
871
872 def computephases(self, roots):
872 def computephases(self, roots):
873 return self.index.computephasesmapsets(roots)
873 return self.index.computephasesmapsets(roots)
874
874
875 def _headrevs(self):
875 def _headrevs(self):
876 count = len(self)
876 count = len(self)
877 if not count:
877 if not count:
878 return [nullrev]
878 return [nullrev]
879 # we won't iter over filtered rev so nobody is a head at start
879 # we won't iter over filtered rev so nobody is a head at start
880 ishead = [0] * (count + 1)
880 ishead = [0] * (count + 1)
881 index = self.index
881 index = self.index
882 for r in self:
882 for r in self:
883 ishead[r] = 1 # I may be an head
883 ishead[r] = 1 # I may be an head
884 e = index[r]
884 e = index[r]
885 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
885 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
886 return [r for r, val in enumerate(ishead) if val]
886 return [r for r, val in enumerate(ishead) if val]
887
887
888 def heads(self, start=None, stop=None):
888 def heads(self, start=None, stop=None):
889 """return the list of all nodes that have no children
889 """return the list of all nodes that have no children
890
890
891 if start is specified, only heads that are descendants of
891 if start is specified, only heads that are descendants of
892 start will be returned
892 start will be returned
893 if stop is specified, it will consider all the revs from stop
893 if stop is specified, it will consider all the revs from stop
894 as if they had no children
894 as if they had no children
895 """
895 """
896 if start is None and stop is None:
896 if start is None and stop is None:
897 if not len(self):
897 if not len(self):
898 return [nullid]
898 return [nullid]
899 return [self.node(r) for r in self.headrevs()]
899 return [self.node(r) for r in self.headrevs()]
900
900
901 if start is None:
901 if start is None:
902 start = nullid
902 start = nullid
903 if stop is None:
903 if stop is None:
904 stop = []
904 stop = []
905 stoprevs = set([self.rev(n) for n in stop])
905 stoprevs = set([self.rev(n) for n in stop])
906 startrev = self.rev(start)
906 startrev = self.rev(start)
907 reachable = set((startrev,))
907 reachable = set((startrev,))
908 heads = set((startrev,))
908 heads = set((startrev,))
909
909
910 parentrevs = self.parentrevs
910 parentrevs = self.parentrevs
911 for r in self.revs(start=startrev + 1):
911 for r in self.revs(start=startrev + 1):
912 for p in parentrevs(r):
912 for p in parentrevs(r):
913 if p in reachable:
913 if p in reachable:
914 if r not in stoprevs:
914 if r not in stoprevs:
915 reachable.add(r)
915 reachable.add(r)
916 heads.add(r)
916 heads.add(r)
917 if p in heads and p not in stoprevs:
917 if p in heads and p not in stoprevs:
918 heads.remove(p)
918 heads.remove(p)
919
919
920 return [self.node(r) for r in heads]
920 return [self.node(r) for r in heads]
921
921
922 def children(self, node):
922 def children(self, node):
923 """find the children of a given node"""
923 """find the children of a given node"""
924 c = []
924 c = []
925 p = self.rev(node)
925 p = self.rev(node)
926 for r in self.revs(start=p + 1):
926 for r in self.revs(start=p + 1):
927 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
927 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
928 if prevs:
928 if prevs:
929 for pr in prevs:
929 for pr in prevs:
930 if pr == p:
930 if pr == p:
931 c.append(self.node(r))
931 c.append(self.node(r))
932 elif p == nullrev:
932 elif p == nullrev:
933 c.append(self.node(r))
933 c.append(self.node(r))
934 return c
934 return c
935
935
936 def descendant(self, start, end):
936 def descendant(self, start, end):
937 if start == nullrev:
937 if start == nullrev:
938 return True
938 return True
939 for i in self.descendants([start]):
939 for i in self.descendants([start]):
940 if i == end:
940 if i == end:
941 return True
941 return True
942 elif i > end:
942 elif i > end:
943 break
943 break
944 return False
944 return False
945
945
946 def commonancestorsheads(self, a, b):
946 def commonancestorsheads(self, a, b):
947 """calculate all the heads of the common ancestors of nodes a and b"""
947 """calculate all the heads of the common ancestors of nodes a and b"""
948 a, b = self.rev(a), self.rev(b)
948 a, b = self.rev(a), self.rev(b)
949 try:
949 try:
950 ancs = self.index.commonancestorsheads(a, b)
950 ancs = self.index.commonancestorsheads(a, b)
951 except (AttributeError, OverflowError): # C implementation failed
951 except (AttributeError, OverflowError): # C implementation failed
952 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
952 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
953 return pycompat.maplist(self.node, ancs)
953 return pycompat.maplist(self.node, ancs)
954
954
955 def isancestor(self, a, b):
955 def isancestor(self, a, b):
956 """return True if node a is an ancestor of node b
956 """return True if node a is an ancestor of node b
957
957
958 The implementation of this is trivial but the use of
958 The implementation of this is trivial but the use of
959 commonancestorsheads is not."""
959 commonancestorsheads is not."""
960 return a in self.commonancestorsheads(a, b)
960 return a in self.commonancestorsheads(a, b)
961
961
962 def ancestor(self, a, b):
962 def ancestor(self, a, b):
963 """calculate the "best" common ancestor of nodes a and b"""
963 """calculate the "best" common ancestor of nodes a and b"""
964
964
965 a, b = self.rev(a), self.rev(b)
965 a, b = self.rev(a), self.rev(b)
966 try:
966 try:
967 ancs = self.index.ancestors(a, b)
967 ancs = self.index.ancestors(a, b)
968 except (AttributeError, OverflowError):
968 except (AttributeError, OverflowError):
969 ancs = ancestor.ancestors(self.parentrevs, a, b)
969 ancs = ancestor.ancestors(self.parentrevs, a, b)
970 if ancs:
970 if ancs:
971 # choose a consistent winner when there's a tie
971 # choose a consistent winner when there's a tie
972 return min(map(self.node, ancs))
972 return min(map(self.node, ancs))
973 return nullid
973 return nullid
974
974
975 def _match(self, id):
975 def _match(self, id):
976 if isinstance(id, int):
976 if isinstance(id, int):
977 # rev
977 # rev
978 return self.node(id)
978 return self.node(id)
979 if len(id) == 20:
979 if len(id) == 20:
980 # possibly a binary node
980 # possibly a binary node
981 # odds of a binary node being all hex in ASCII are 1 in 10**25
981 # odds of a binary node being all hex in ASCII are 1 in 10**25
982 try:
982 try:
983 node = id
983 node = id
984 self.rev(node) # quick search the index
984 self.rev(node) # quick search the index
985 return node
985 return node
986 except LookupError:
986 except LookupError:
987 pass # may be partial hex id
987 pass # may be partial hex id
988 try:
988 try:
989 # str(rev)
989 # str(rev)
990 rev = int(id)
990 rev = int(id)
991 if str(rev) != id:
991 if str(rev) != id:
992 raise ValueError
992 raise ValueError
993 if rev < 0:
993 if rev < 0:
994 rev = len(self) + rev
994 rev = len(self) + rev
995 if rev < 0 or rev >= len(self):
995 if rev < 0 or rev >= len(self):
996 raise ValueError
996 raise ValueError
997 return self.node(rev)
997 return self.node(rev)
998 except (ValueError, OverflowError):
998 except (ValueError, OverflowError):
999 pass
999 pass
1000 if len(id) == 40:
1000 if len(id) == 40:
1001 try:
1001 try:
1002 # a full hex nodeid?
1002 # a full hex nodeid?
1003 node = bin(id)
1003 node = bin(id)
1004 self.rev(node)
1004 self.rev(node)
1005 return node
1005 return node
1006 except (TypeError, LookupError):
1006 except (TypeError, LookupError):
1007 pass
1007 pass
1008
1008
1009 def _partialmatch(self, id):
1009 def _partialmatch(self, id):
1010 try:
1010 try:
1011 partial = self.index.partialmatch(id)
1011 partial = self.index.partialmatch(id)
1012 if partial and self.hasnode(partial):
1012 if partial and self.hasnode(partial):
1013 return partial
1013 return partial
1014 return None
1014 return None
1015 except RevlogError:
1015 except RevlogError:
1016 # parsers.c radix tree lookup gave multiple matches
1016 # parsers.c radix tree lookup gave multiple matches
1017 # fast path: for unfiltered changelog, radix tree is accurate
1017 # fast path: for unfiltered changelog, radix tree is accurate
1018 if not getattr(self, 'filteredrevs', None):
1018 if not getattr(self, 'filteredrevs', None):
1019 raise LookupError(id, self.indexfile,
1019 raise LookupError(id, self.indexfile,
1020 _('ambiguous identifier'))
1020 _('ambiguous identifier'))
1021 # fall through to slow path that filters hidden revisions
1021 # fall through to slow path that filters hidden revisions
1022 except (AttributeError, ValueError):
1022 except (AttributeError, ValueError):
1023 # we are pure python, or key was too short to search radix tree
1023 # we are pure python, or key was too short to search radix tree
1024 pass
1024 pass
1025
1025
1026 if id in self._pcache:
1026 if id in self._pcache:
1027 return self._pcache[id]
1027 return self._pcache[id]
1028
1028
1029 if len(id) < 40:
1029 if len(id) < 40:
1030 try:
1030 try:
1031 # hex(node)[:...]
1031 # hex(node)[:...]
1032 l = len(id) // 2 # grab an even number of digits
1032 l = len(id) // 2 # grab an even number of digits
1033 prefix = bin(id[:l * 2])
1033 prefix = bin(id[:l * 2])
1034 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1034 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1035 nl = [n for n in nl if hex(n).startswith(id) and
1035 nl = [n for n in nl if hex(n).startswith(id) and
1036 self.hasnode(n)]
1036 self.hasnode(n)]
1037 if len(nl) > 0:
1037 if len(nl) > 0:
1038 if len(nl) == 1:
1038 if len(nl) == 1:
1039 self._pcache[id] = nl[0]
1039 self._pcache[id] = nl[0]
1040 return nl[0]
1040 return nl[0]
1041 raise LookupError(id, self.indexfile,
1041 raise LookupError(id, self.indexfile,
1042 _('ambiguous identifier'))
1042 _('ambiguous identifier'))
1043 return None
1043 return None
1044 except TypeError:
1044 except TypeError:
1045 pass
1045 pass
1046
1046
1047 def lookup(self, id):
1047 def lookup(self, id):
1048 """locate a node based on:
1048 """locate a node based on:
1049 - revision number or str(revision number)
1049 - revision number or str(revision number)
1050 - nodeid or subset of hex nodeid
1050 - nodeid or subset of hex nodeid
1051 """
1051 """
1052 n = self._match(id)
1052 n = self._match(id)
1053 if n is not None:
1053 if n is not None:
1054 return n
1054 return n
1055 n = self._partialmatch(id)
1055 n = self._partialmatch(id)
1056 if n:
1056 if n:
1057 return n
1057 return n
1058
1058
1059 raise LookupError(id, self.indexfile, _('no match found'))
1059 raise LookupError(id, self.indexfile, _('no match found'))
1060
1060
1061 def cmp(self, node, text):
1061 def cmp(self, node, text):
1062 """compare text with a given file revision
1062 """compare text with a given file revision
1063
1063
1064 returns True if text is different than what is stored.
1064 returns True if text is different than what is stored.
1065 """
1065 """
1066 p1, p2 = self.parents(node)
1066 p1, p2 = self.parents(node)
1067 return hash(text, p1, p2) != node
1067 return hash(text, p1, p2) != node
1068
1068
1069 def _addchunk(self, offset, data):
1069 def _addchunk(self, offset, data):
1070 """Add a segment to the revlog cache.
1070 """Add a segment to the revlog cache.
1071
1071
1072 Accepts an absolute offset and the data that is at that location.
1072 Accepts an absolute offset and the data that is at that location.
1073 """
1073 """
1074 o, d = self._chunkcache
1074 o, d = self._chunkcache
1075 # try to add to existing cache
1075 # try to add to existing cache
1076 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1076 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1077 self._chunkcache = o, d + data
1077 self._chunkcache = o, d + data
1078 else:
1078 else:
1079 self._chunkcache = offset, data
1079 self._chunkcache = offset, data
1080
1080
1081 def _loadchunk(self, offset, length, df=None):
1081 def _loadchunk(self, offset, length, df=None):
1082 """Load a segment of raw data from the revlog.
1082 """Load a segment of raw data from the revlog.
1083
1083
1084 Accepts an absolute offset, length to read, and an optional existing
1084 Accepts an absolute offset, length to read, and an optional existing
1085 file handle to read from.
1085 file handle to read from.
1086
1086
1087 If an existing file handle is passed, it will be seeked and the
1087 If an existing file handle is passed, it will be seeked and the
1088 original seek position will NOT be restored.
1088 original seek position will NOT be restored.
1089
1089
1090 Returns a str or buffer of raw byte data.
1090 Returns a str or buffer of raw byte data.
1091 """
1091 """
1092 if df is not None:
1092 if df is not None:
1093 closehandle = False
1093 closehandle = False
1094 else:
1094 else:
1095 if self._inline:
1095 if self._inline:
1096 df = self.opener(self.indexfile)
1096 df = self.opener(self.indexfile)
1097 else:
1097 else:
1098 df = self.opener(self.datafile)
1098 df = self.opener(self.datafile)
1099 closehandle = True
1099 closehandle = True
1100
1100
1101 # Cache data both forward and backward around the requested
1101 # Cache data both forward and backward around the requested
1102 # data, in a fixed size window. This helps speed up operations
1102 # data, in a fixed size window. This helps speed up operations
1103 # involving reading the revlog backwards.
1103 # involving reading the revlog backwards.
1104 cachesize = self._chunkcachesize
1104 cachesize = self._chunkcachesize
1105 realoffset = offset & ~(cachesize - 1)
1105 realoffset = offset & ~(cachesize - 1)
1106 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1106 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1107 - realoffset)
1107 - realoffset)
1108 df.seek(realoffset)
1108 df.seek(realoffset)
1109 d = df.read(reallength)
1109 d = df.read(reallength)
1110 if closehandle:
1110 if closehandle:
1111 df.close()
1111 df.close()
1112 self._addchunk(realoffset, d)
1112 self._addchunk(realoffset, d)
1113 if offset != realoffset or reallength != length:
1113 if offset != realoffset or reallength != length:
1114 return util.buffer(d, offset - realoffset, length)
1114 return util.buffer(d, offset - realoffset, length)
1115 return d
1115 return d
1116
1116
1117 def _getchunk(self, offset, length, df=None):
1117 def _getchunk(self, offset, length, df=None):
1118 """Obtain a segment of raw data from the revlog.
1118 """Obtain a segment of raw data from the revlog.
1119
1119
1120 Accepts an absolute offset, length of bytes to obtain, and an
1120 Accepts an absolute offset, length of bytes to obtain, and an
1121 optional file handle to the already-opened revlog. If the file
1121 optional file handle to the already-opened revlog. If the file
1122 handle is used, it's original seek position will not be preserved.
1122 handle is used, it's original seek position will not be preserved.
1123
1123
1124 Requests for data may be returned from a cache.
1124 Requests for data may be returned from a cache.
1125
1125
1126 Returns a str or a buffer instance of raw byte data.
1126 Returns a str or a buffer instance of raw byte data.
1127 """
1127 """
1128 o, d = self._chunkcache
1128 o, d = self._chunkcache
1129 l = len(d)
1129 l = len(d)
1130
1130
1131 # is it in the cache?
1131 # is it in the cache?
1132 cachestart = offset - o
1132 cachestart = offset - o
1133 cacheend = cachestart + length
1133 cacheend = cachestart + length
1134 if cachestart >= 0 and cacheend <= l:
1134 if cachestart >= 0 and cacheend <= l:
1135 if cachestart == 0 and cacheend == l:
1135 if cachestart == 0 and cacheend == l:
1136 return d # avoid a copy
1136 return d # avoid a copy
1137 return util.buffer(d, cachestart, cacheend - cachestart)
1137 return util.buffer(d, cachestart, cacheend - cachestart)
1138
1138
1139 return self._loadchunk(offset, length, df=df)
1139 return self._loadchunk(offset, length, df=df)
1140
1140
1141 def _chunkraw(self, startrev, endrev, df=None):
1141 def _chunkraw(self, startrev, endrev, df=None):
1142 """Obtain a segment of raw data corresponding to a range of revisions.
1142 """Obtain a segment of raw data corresponding to a range of revisions.
1143
1143
1144 Accepts the start and end revisions and an optional already-open
1144 Accepts the start and end revisions and an optional already-open
1145 file handle to be used for reading. If the file handle is read, its
1145 file handle to be used for reading. If the file handle is read, its
1146 seek position will not be preserved.
1146 seek position will not be preserved.
1147
1147
1148 Requests for data may be satisfied by a cache.
1148 Requests for data may be satisfied by a cache.
1149
1149
1150 Returns a 2-tuple of (offset, data) for the requested range of
1150 Returns a 2-tuple of (offset, data) for the requested range of
1151 revisions. Offset is the integer offset from the beginning of the
1151 revisions. Offset is the integer offset from the beginning of the
1152 revlog and data is a str or buffer of the raw byte data.
1152 revlog and data is a str or buffer of the raw byte data.
1153
1153
1154 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1154 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1155 to determine where each revision's data begins and ends.
1155 to determine where each revision's data begins and ends.
1156 """
1156 """
1157 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1157 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1158 # (functions are expensive).
1158 # (functions are expensive).
1159 index = self.index
1159 index = self.index
1160 istart = index[startrev]
1160 istart = index[startrev]
1161 start = int(istart[0] >> 16)
1161 start = int(istart[0] >> 16)
1162 if startrev == endrev:
1162 if startrev == endrev:
1163 end = start + istart[1]
1163 end = start + istart[1]
1164 else:
1164 else:
1165 iend = index[endrev]
1165 iend = index[endrev]
1166 end = int(iend[0] >> 16) + iend[1]
1166 end = int(iend[0] >> 16) + iend[1]
1167
1167
1168 if self._inline:
1168 if self._inline:
1169 start += (startrev + 1) * self._io.size
1169 start += (startrev + 1) * self._io.size
1170 end += (endrev + 1) * self._io.size
1170 end += (endrev + 1) * self._io.size
1171 length = end - start
1171 length = end - start
1172
1172
1173 return start, self._getchunk(start, length, df=df)
1173 return start, self._getchunk(start, length, df=df)
1174
1174
1175 def _chunk(self, rev, df=None):
1175 def _chunk(self, rev, df=None):
1176 """Obtain a single decompressed chunk for a revision.
1176 """Obtain a single decompressed chunk for a revision.
1177
1177
1178 Accepts an integer revision and an optional already-open file handle
1178 Accepts an integer revision and an optional already-open file handle
1179 to be used for reading. If used, the seek position of the file will not
1179 to be used for reading. If used, the seek position of the file will not
1180 be preserved.
1180 be preserved.
1181
1181
1182 Returns a str holding uncompressed data for the requested revision.
1182 Returns a str holding uncompressed data for the requested revision.
1183 """
1183 """
1184 return self.decompress(self._chunkraw(rev, rev, df=df)[1])
1184 return self.decompress(self._chunkraw(rev, rev, df=df)[1])
1185
1185
1186 def _chunks(self, revs, df=None):
1186 def _chunks(self, revs, df=None):
1187 """Obtain decompressed chunks for the specified revisions.
1187 """Obtain decompressed chunks for the specified revisions.
1188
1188
1189 Accepts an iterable of numeric revisions that are assumed to be in
1189 Accepts an iterable of numeric revisions that are assumed to be in
1190 ascending order. Also accepts an optional already-open file handle
1190 ascending order. Also accepts an optional already-open file handle
1191 to be used for reading. If used, the seek position of the file will
1191 to be used for reading. If used, the seek position of the file will
1192 not be preserved.
1192 not be preserved.
1193
1193
1194 This function is similar to calling ``self._chunk()`` multiple times,
1194 This function is similar to calling ``self._chunk()`` multiple times,
1195 but is faster.
1195 but is faster.
1196
1196
1197 Returns a list with decompressed data for each requested revision.
1197 Returns a list with decompressed data for each requested revision.
1198 """
1198 """
1199 if not revs:
1199 if not revs:
1200 return []
1200 return []
1201 start = self.start
1201 start = self.start
1202 length = self.length
1202 length = self.length
1203 inline = self._inline
1203 inline = self._inline
1204 iosize = self._io.size
1204 iosize = self._io.size
1205 buffer = util.buffer
1205 buffer = util.buffer
1206
1206
1207 l = []
1207 l = []
1208 ladd = l.append
1208 ladd = l.append
1209
1209
1210 try:
1210 try:
1211 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1211 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1212 except OverflowError:
1212 except OverflowError:
1213 # issue4215 - we can't cache a run of chunks greater than
1213 # issue4215 - we can't cache a run of chunks greater than
1214 # 2G on Windows
1214 # 2G on Windows
1215 return [self._chunk(rev, df=df) for rev in revs]
1215 return [self._chunk(rev, df=df) for rev in revs]
1216
1216
1217 decomp = self.decompress
1217 decomp = self.decompress
1218 for rev in revs:
1218 for rev in revs:
1219 chunkstart = start(rev)
1219 chunkstart = start(rev)
1220 if inline:
1220 if inline:
1221 chunkstart += (rev + 1) * iosize
1221 chunkstart += (rev + 1) * iosize
1222 chunklength = length(rev)
1222 chunklength = length(rev)
1223 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1223 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1224
1224
1225 return l
1225 return l
1226
1226
1227 def _chunkclear(self):
1227 def _chunkclear(self):
1228 """Clear the raw chunk cache."""
1228 """Clear the raw chunk cache."""
1229 self._chunkcache = (0, '')
1229 self._chunkcache = (0, '')
1230
1230
1231 def deltaparent(self, rev):
1231 def deltaparent(self, rev):
1232 """return deltaparent of the given revision"""
1232 """return deltaparent of the given revision"""
1233 base = self.index[rev][3]
1233 base = self.index[rev][3]
1234 if base == rev:
1234 if base == rev:
1235 return nullrev
1235 return nullrev
1236 elif self._generaldelta:
1236 elif self._generaldelta:
1237 return base
1237 return base
1238 else:
1238 else:
1239 return rev - 1
1239 return rev - 1
1240
1240
1241 def revdiff(self, rev1, rev2):
1241 def revdiff(self, rev1, rev2):
1242 """return or calculate a delta between two revisions
1242 """return or calculate a delta between two revisions
1243
1243
1244 The delta calculated is in binary form and is intended to be written to
1244 The delta calculated is in binary form and is intended to be written to
1245 revlog data directly. So this function needs raw revision data.
1245 revlog data directly. So this function needs raw revision data.
1246 """
1246 """
1247 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1247 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1248 return bytes(self._chunk(rev2))
1248 return bytes(self._chunk(rev2))
1249
1249
1250 return mdiff.textdiff(self.revision(rev1, raw=True),
1250 return mdiff.textdiff(self.revision(rev1, raw=True),
1251 self.revision(rev2, raw=True))
1251 self.revision(rev2, raw=True))
1252
1252
1253 def revision(self, nodeorrev, _df=None, raw=False):
1253 def revision(self, nodeorrev, _df=None, raw=False):
1254 """return an uncompressed revision of a given node or revision
1254 """return an uncompressed revision of a given node or revision
1255 number.
1255 number.
1256
1256
1257 _df - an existing file handle to read from. (internal-only)
1257 _df - an existing file handle to read from. (internal-only)
1258 raw - an optional argument specifying if the revision data is to be
1258 raw - an optional argument specifying if the revision data is to be
1259 treated as raw data when applying flag transforms. 'raw' should be set
1259 treated as raw data when applying flag transforms. 'raw' should be set
1260 to True when generating changegroups or in debug commands.
1260 to True when generating changegroups or in debug commands.
1261 """
1261 """
1262 if isinstance(nodeorrev, int):
1262 if isinstance(nodeorrev, int):
1263 rev = nodeorrev
1263 rev = nodeorrev
1264 node = self.node(rev)
1264 node = self.node(rev)
1265 else:
1265 else:
1266 node = nodeorrev
1266 node = nodeorrev
1267 rev = None
1267 rev = None
1268
1268
1269 cachedrev = None
1269 cachedrev = None
1270 flags = None
1270 flags = None
1271 rawtext = None
1271 if node == nullid:
1272 if node == nullid:
1272 return ""
1273 return ""
1273 if self._cache:
1274 if self._cache:
1274 if self._cache[0] == node:
1275 if self._cache[0] == node:
1275 # _cache only stores rawtext
1276 # _cache only stores rawtext
1276 if raw:
1277 if raw:
1277 return self._cache[2]
1278 return self._cache[2]
1278 # duplicated, but good for perf
1279 # duplicated, but good for perf
1279 if rev is None:
1280 if rev is None:
1280 rev = self.rev(node)
1281 rev = self.rev(node)
1281 if flags is None:
1282 if flags is None:
1282 flags = self.flags(rev)
1283 flags = self.flags(rev)
1283 # no extra flags set, no flag processor runs, text = rawtext
1284 # no extra flags set, no flag processor runs, text = rawtext
1284 if flags == REVIDX_DEFAULT_FLAGS:
1285 if flags == REVIDX_DEFAULT_FLAGS:
1285 return self._cache[2]
1286 return self._cache[2]
1287 # rawtext is reusable. need to run flag processor
1288 rawtext = self._cache[2]
1286
1289
1287 cachedrev = self._cache[1]
1290 cachedrev = self._cache[1]
1288
1291
1289 # look up what we need to read
1292 # look up what we need to read
1290 rawtext = None
1291 if rawtext is None:
1293 if rawtext is None:
1292 if rev is None:
1294 if rev is None:
1293 rev = self.rev(node)
1295 rev = self.rev(node)
1294
1296
1295 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1297 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1296 if stopped:
1298 if stopped:
1297 rawtext = self._cache[2]
1299 rawtext = self._cache[2]
1298
1300
1299 # drop cache to save memory
1301 # drop cache to save memory
1300 self._cache = None
1302 self._cache = None
1301
1303
1302 bins = self._chunks(chain, df=_df)
1304 bins = self._chunks(chain, df=_df)
1303 if rawtext is None:
1305 if rawtext is None:
1304 rawtext = bytes(bins[0])
1306 rawtext = bytes(bins[0])
1305 bins = bins[1:]
1307 bins = bins[1:]
1306
1308
1307 rawtext = mdiff.patches(rawtext, bins)
1309 rawtext = mdiff.patches(rawtext, bins)
1310 self._cache = (node, rev, rawtext)
1308
1311
1309 if flags is None:
1312 if flags is None:
1313 if rev is None:
1314 rev = self.rev(node)
1310 flags = self.flags(rev)
1315 flags = self.flags(rev)
1311
1316
1312 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1317 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1313 if validatehash:
1318 if validatehash:
1314 self.checkhash(text, node, rev=rev)
1319 self.checkhash(text, node, rev=rev)
1315
1320
1316 self._cache = (node, rev, rawtext)
1317 return text
1321 return text
1318
1322
1319 def hash(self, text, p1, p2):
1323 def hash(self, text, p1, p2):
1320 """Compute a node hash.
1324 """Compute a node hash.
1321
1325
1322 Available as a function so that subclasses can replace the hash
1326 Available as a function so that subclasses can replace the hash
1323 as needed.
1327 as needed.
1324 """
1328 """
1325 return hash(text, p1, p2)
1329 return hash(text, p1, p2)
1326
1330
1327 def _processflags(self, text, flags, operation, raw=False):
1331 def _processflags(self, text, flags, operation, raw=False):
1328 """Inspect revision data flags and applies transforms defined by
1332 """Inspect revision data flags and applies transforms defined by
1329 registered flag processors.
1333 registered flag processors.
1330
1334
1331 ``text`` - the revision data to process
1335 ``text`` - the revision data to process
1332 ``flags`` - the revision flags
1336 ``flags`` - the revision flags
1333 ``operation`` - the operation being performed (read or write)
1337 ``operation`` - the operation being performed (read or write)
1334 ``raw`` - an optional argument describing if the raw transform should be
1338 ``raw`` - an optional argument describing if the raw transform should be
1335 applied.
1339 applied.
1336
1340
1337 This method processes the flags in the order (or reverse order if
1341 This method processes the flags in the order (or reverse order if
1338 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1342 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1339 flag processors registered for present flags. The order of flags defined
1343 flag processors registered for present flags. The order of flags defined
1340 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1344 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1341
1345
1342 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1346 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1343 processed text and ``validatehash`` is a bool indicating whether the
1347 processed text and ``validatehash`` is a bool indicating whether the
1344 returned text should be checked for hash integrity.
1348 returned text should be checked for hash integrity.
1345
1349
1346 Note: If the ``raw`` argument is set, it has precedence over the
1350 Note: If the ``raw`` argument is set, it has precedence over the
1347 operation and will only update the value of ``validatehash``.
1351 operation and will only update the value of ``validatehash``.
1348 """
1352 """
1349 if not operation in ('read', 'write'):
1353 if not operation in ('read', 'write'):
1350 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1354 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1351 # Check all flags are known.
1355 # Check all flags are known.
1352 if flags & ~REVIDX_KNOWN_FLAGS:
1356 if flags & ~REVIDX_KNOWN_FLAGS:
1353 raise RevlogError(_("incompatible revision flag '%#x'") %
1357 raise RevlogError(_("incompatible revision flag '%#x'") %
1354 (flags & ~REVIDX_KNOWN_FLAGS))
1358 (flags & ~REVIDX_KNOWN_FLAGS))
1355 validatehash = True
1359 validatehash = True
1356 # Depending on the operation (read or write), the order might be
1360 # Depending on the operation (read or write), the order might be
1357 # reversed due to non-commutative transforms.
1361 # reversed due to non-commutative transforms.
1358 orderedflags = REVIDX_FLAGS_ORDER
1362 orderedflags = REVIDX_FLAGS_ORDER
1359 if operation == 'write':
1363 if operation == 'write':
1360 orderedflags = reversed(orderedflags)
1364 orderedflags = reversed(orderedflags)
1361
1365
1362 for flag in orderedflags:
1366 for flag in orderedflags:
1363 # If a flagprocessor has been registered for a known flag, apply the
1367 # If a flagprocessor has been registered for a known flag, apply the
1364 # related operation transform and update result tuple.
1368 # related operation transform and update result tuple.
1365 if flag & flags:
1369 if flag & flags:
1366 vhash = True
1370 vhash = True
1367
1371
1368 if flag not in _flagprocessors:
1372 if flag not in _flagprocessors:
1369 message = _("missing processor for flag '%#x'") % (flag)
1373 message = _("missing processor for flag '%#x'") % (flag)
1370 raise RevlogError(message)
1374 raise RevlogError(message)
1371
1375
1372 processor = _flagprocessors[flag]
1376 processor = _flagprocessors[flag]
1373 if processor is not None:
1377 if processor is not None:
1374 readtransform, writetransform, rawtransform = processor
1378 readtransform, writetransform, rawtransform = processor
1375
1379
1376 if raw:
1380 if raw:
1377 vhash = rawtransform(self, text)
1381 vhash = rawtransform(self, text)
1378 elif operation == 'read':
1382 elif operation == 'read':
1379 text, vhash = readtransform(self, text)
1383 text, vhash = readtransform(self, text)
1380 else: # write operation
1384 else: # write operation
1381 text, vhash = writetransform(self, text)
1385 text, vhash = writetransform(self, text)
1382 validatehash = validatehash and vhash
1386 validatehash = validatehash and vhash
1383
1387
1384 return text, validatehash
1388 return text, validatehash
1385
1389
1386 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1390 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1387 """Check node hash integrity.
1391 """Check node hash integrity.
1388
1392
1389 Available as a function so that subclasses can extend hash mismatch
1393 Available as a function so that subclasses can extend hash mismatch
1390 behaviors as needed.
1394 behaviors as needed.
1391 """
1395 """
1392 if p1 is None and p2 is None:
1396 if p1 is None and p2 is None:
1393 p1, p2 = self.parents(node)
1397 p1, p2 = self.parents(node)
1394 if node != self.hash(text, p1, p2):
1398 if node != self.hash(text, p1, p2):
1395 revornode = rev
1399 revornode = rev
1396 if revornode is None:
1400 if revornode is None:
1397 revornode = templatefilters.short(hex(node))
1401 revornode = templatefilters.short(hex(node))
1398 raise RevlogError(_("integrity check failed on %s:%s")
1402 raise RevlogError(_("integrity check failed on %s:%s")
1399 % (self.indexfile, revornode))
1403 % (self.indexfile, revornode))
1400
1404
1401 def checkinlinesize(self, tr, fp=None):
1405 def checkinlinesize(self, tr, fp=None):
1402 """Check if the revlog is too big for inline and convert if so.
1406 """Check if the revlog is too big for inline and convert if so.
1403
1407
1404 This should be called after revisions are added to the revlog. If the
1408 This should be called after revisions are added to the revlog. If the
1405 revlog has grown too large to be an inline revlog, it will convert it
1409 revlog has grown too large to be an inline revlog, it will convert it
1406 to use multiple index and data files.
1410 to use multiple index and data files.
1407 """
1411 """
1408 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1412 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1409 return
1413 return
1410
1414
1411 trinfo = tr.find(self.indexfile)
1415 trinfo = tr.find(self.indexfile)
1412 if trinfo is None:
1416 if trinfo is None:
1413 raise RevlogError(_("%s not found in the transaction")
1417 raise RevlogError(_("%s not found in the transaction")
1414 % self.indexfile)
1418 % self.indexfile)
1415
1419
1416 trindex = trinfo[2]
1420 trindex = trinfo[2]
1417 if trindex is not None:
1421 if trindex is not None:
1418 dataoff = self.start(trindex)
1422 dataoff = self.start(trindex)
1419 else:
1423 else:
1420 # revlog was stripped at start of transaction, use all leftover data
1424 # revlog was stripped at start of transaction, use all leftover data
1421 trindex = len(self) - 1
1425 trindex = len(self) - 1
1422 dataoff = self.end(-2)
1426 dataoff = self.end(-2)
1423
1427
1424 tr.add(self.datafile, dataoff)
1428 tr.add(self.datafile, dataoff)
1425
1429
1426 if fp:
1430 if fp:
1427 fp.flush()
1431 fp.flush()
1428 fp.close()
1432 fp.close()
1429
1433
1430 df = self.opener(self.datafile, 'w')
1434 df = self.opener(self.datafile, 'w')
1431 try:
1435 try:
1432 for r in self:
1436 for r in self:
1433 df.write(self._chunkraw(r, r)[1])
1437 df.write(self._chunkraw(r, r)[1])
1434 finally:
1438 finally:
1435 df.close()
1439 df.close()
1436
1440
1437 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1441 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1438 checkambig=self._checkambig)
1442 checkambig=self._checkambig)
1439 self.version &= ~(REVLOGNGINLINEDATA)
1443 self.version &= ~(REVLOGNGINLINEDATA)
1440 self._inline = False
1444 self._inline = False
1441 for i in self:
1445 for i in self:
1442 e = self._io.packentry(self.index[i], self.node, self.version, i)
1446 e = self._io.packentry(self.index[i], self.node, self.version, i)
1443 fp.write(e)
1447 fp.write(e)
1444
1448
1445 # if we don't call close, the temp file will never replace the
1449 # if we don't call close, the temp file will never replace the
1446 # real index
1450 # real index
1447 fp.close()
1451 fp.close()
1448
1452
1449 tr.replace(self.indexfile, trindex * self._io.size)
1453 tr.replace(self.indexfile, trindex * self._io.size)
1450 self._chunkclear()
1454 self._chunkclear()
1451
1455
1452 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1456 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1453 node=None, flags=REVIDX_DEFAULT_FLAGS):
1457 node=None, flags=REVIDX_DEFAULT_FLAGS):
1454 """add a revision to the log
1458 """add a revision to the log
1455
1459
1456 text - the revision data to add
1460 text - the revision data to add
1457 transaction - the transaction object used for rollback
1461 transaction - the transaction object used for rollback
1458 link - the linkrev data to add
1462 link - the linkrev data to add
1459 p1, p2 - the parent nodeids of the revision
1463 p1, p2 - the parent nodeids of the revision
1460 cachedelta - an optional precomputed delta
1464 cachedelta - an optional precomputed delta
1461 node - nodeid of revision; typically node is not specified, and it is
1465 node - nodeid of revision; typically node is not specified, and it is
1462 computed by default as hash(text, p1, p2), however subclasses might
1466 computed by default as hash(text, p1, p2), however subclasses might
1463 use different hashing method (and override checkhash() in such case)
1467 use different hashing method (and override checkhash() in such case)
1464 flags - the known flags to set on the revision
1468 flags - the known flags to set on the revision
1465 """
1469 """
1466 if link == nullrev:
1470 if link == nullrev:
1467 raise RevlogError(_("attempted to add linkrev -1 to %s")
1471 raise RevlogError(_("attempted to add linkrev -1 to %s")
1468 % self.indexfile)
1472 % self.indexfile)
1469
1473
1470 if flags:
1474 if flags:
1471 node = node or self.hash(text, p1, p2)
1475 node = node or self.hash(text, p1, p2)
1472
1476
1473 rawtext, validatehash = self._processflags(text, flags, 'write')
1477 rawtext, validatehash = self._processflags(text, flags, 'write')
1474
1478
1475 # If the flag processor modifies the revision data, ignore any provided
1479 # If the flag processor modifies the revision data, ignore any provided
1476 # cachedelta.
1480 # cachedelta.
1477 if rawtext != text:
1481 if rawtext != text:
1478 cachedelta = None
1482 cachedelta = None
1479
1483
1480 if len(rawtext) > _maxentrysize:
1484 if len(rawtext) > _maxentrysize:
1481 raise RevlogError(
1485 raise RevlogError(
1482 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1486 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1483 % (self.indexfile, len(rawtext)))
1487 % (self.indexfile, len(rawtext)))
1484
1488
1485 node = node or self.hash(rawtext, p1, p2)
1489 node = node or self.hash(rawtext, p1, p2)
1486 if node in self.nodemap:
1490 if node in self.nodemap:
1487 return node
1491 return node
1488
1492
1489 if validatehash:
1493 if validatehash:
1490 self.checkhash(rawtext, node, p1=p1, p2=p2)
1494 self.checkhash(rawtext, node, p1=p1, p2=p2)
1491
1495
1492 dfh = None
1496 dfh = None
1493 if not self._inline:
1497 if not self._inline:
1494 dfh = self.opener(self.datafile, "a+")
1498 dfh = self.opener(self.datafile, "a+")
1495 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1499 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1496 try:
1500 try:
1497 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1501 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1498 flags, cachedelta, ifh, dfh)
1502 flags, cachedelta, ifh, dfh)
1499 finally:
1503 finally:
1500 if dfh:
1504 if dfh:
1501 dfh.close()
1505 dfh.close()
1502 ifh.close()
1506 ifh.close()
1503
1507
1504 def compress(self, data):
1508 def compress(self, data):
1505 """Generate a possibly-compressed representation of data."""
1509 """Generate a possibly-compressed representation of data."""
1506 if not data:
1510 if not data:
1507 return '', data
1511 return '', data
1508
1512
1509 compressed = self._compressor.compress(data)
1513 compressed = self._compressor.compress(data)
1510
1514
1511 if compressed:
1515 if compressed:
1512 # The revlog compressor added the header in the returned data.
1516 # The revlog compressor added the header in the returned data.
1513 return '', compressed
1517 return '', compressed
1514
1518
1515 if data[0:1] == '\0':
1519 if data[0:1] == '\0':
1516 return '', data
1520 return '', data
1517 return 'u', data
1521 return 'u', data
1518
1522
1519 def decompress(self, data):
1523 def decompress(self, data):
1520 """Decompress a revlog chunk.
1524 """Decompress a revlog chunk.
1521
1525
1522 The chunk is expected to begin with a header identifying the
1526 The chunk is expected to begin with a header identifying the
1523 format type so it can be routed to an appropriate decompressor.
1527 format type so it can be routed to an appropriate decompressor.
1524 """
1528 """
1525 if not data:
1529 if not data:
1526 return data
1530 return data
1527
1531
1528 # Revlogs are read much more frequently than they are written and many
1532 # Revlogs are read much more frequently than they are written and many
1529 # chunks only take microseconds to decompress, so performance is
1533 # chunks only take microseconds to decompress, so performance is
1530 # important here.
1534 # important here.
1531 #
1535 #
1532 # We can make a few assumptions about revlogs:
1536 # We can make a few assumptions about revlogs:
1533 #
1537 #
1534 # 1) the majority of chunks will be compressed (as opposed to inline
1538 # 1) the majority of chunks will be compressed (as opposed to inline
1535 # raw data).
1539 # raw data).
1536 # 2) decompressing *any* data will likely by at least 10x slower than
1540 # 2) decompressing *any* data will likely by at least 10x slower than
1537 # returning raw inline data.
1541 # returning raw inline data.
1538 # 3) we want to prioritize common and officially supported compression
1542 # 3) we want to prioritize common and officially supported compression
1539 # engines
1543 # engines
1540 #
1544 #
1541 # It follows that we want to optimize for "decompress compressed data
1545 # It follows that we want to optimize for "decompress compressed data
1542 # when encoded with common and officially supported compression engines"
1546 # when encoded with common and officially supported compression engines"
1543 # case over "raw data" and "data encoded by less common or non-official
1547 # case over "raw data" and "data encoded by less common or non-official
1544 # compression engines." That is why we have the inline lookup first
1548 # compression engines." That is why we have the inline lookup first
1545 # followed by the compengines lookup.
1549 # followed by the compengines lookup.
1546 #
1550 #
1547 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1551 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1548 # compressed chunks. And this matters for changelog and manifest reads.
1552 # compressed chunks. And this matters for changelog and manifest reads.
1549 t = data[0:1]
1553 t = data[0:1]
1550
1554
1551 if t == 'x':
1555 if t == 'x':
1552 try:
1556 try:
1553 return _zlibdecompress(data)
1557 return _zlibdecompress(data)
1554 except zlib.error as e:
1558 except zlib.error as e:
1555 raise RevlogError(_('revlog decompress error: %s') % str(e))
1559 raise RevlogError(_('revlog decompress error: %s') % str(e))
1556 # '\0' is more common than 'u' so it goes first.
1560 # '\0' is more common than 'u' so it goes first.
1557 elif t == '\0':
1561 elif t == '\0':
1558 return data
1562 return data
1559 elif t == 'u':
1563 elif t == 'u':
1560 return util.buffer(data, 1)
1564 return util.buffer(data, 1)
1561
1565
1562 try:
1566 try:
1563 compressor = self._decompressors[t]
1567 compressor = self._decompressors[t]
1564 except KeyError:
1568 except KeyError:
1565 try:
1569 try:
1566 engine = util.compengines.forrevlogheader(t)
1570 engine = util.compengines.forrevlogheader(t)
1567 compressor = engine.revlogcompressor()
1571 compressor = engine.revlogcompressor()
1568 self._decompressors[t] = compressor
1572 self._decompressors[t] = compressor
1569 except KeyError:
1573 except KeyError:
1570 raise RevlogError(_('unknown compression type %r') % t)
1574 raise RevlogError(_('unknown compression type %r') % t)
1571
1575
1572 return compressor.decompress(data)
1576 return compressor.decompress(data)
1573
1577
1574 def _isgooddelta(self, d, textlen):
1578 def _isgooddelta(self, d, textlen):
1575 """Returns True if the given delta is good. Good means that it is within
1579 """Returns True if the given delta is good. Good means that it is within
1576 the disk span, disk size, and chain length bounds that we know to be
1580 the disk span, disk size, and chain length bounds that we know to be
1577 performant."""
1581 performant."""
1578 if d is None:
1582 if d is None:
1579 return False
1583 return False
1580
1584
1581 # - 'dist' is the distance from the base revision -- bounding it limits
1585 # - 'dist' is the distance from the base revision -- bounding it limits
1582 # the amount of I/O we need to do.
1586 # the amount of I/O we need to do.
1583 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1587 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1584 # to apply -- bounding it limits the amount of CPU we consume.
1588 # to apply -- bounding it limits the amount of CPU we consume.
1585 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1589 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1586 if (dist > textlen * 4 or l > textlen or
1590 if (dist > textlen * 4 or l > textlen or
1587 compresseddeltalen > textlen * 2 or
1591 compresseddeltalen > textlen * 2 or
1588 (self._maxchainlen and chainlen > self._maxchainlen)):
1592 (self._maxchainlen and chainlen > self._maxchainlen)):
1589 return False
1593 return False
1590
1594
1591 return True
1595 return True
1592
1596
1593 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1597 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1594 cachedelta, ifh, dfh, alwayscache=False):
1598 cachedelta, ifh, dfh, alwayscache=False):
1595 """internal function to add revisions to the log
1599 """internal function to add revisions to the log
1596
1600
1597 see addrevision for argument descriptions.
1601 see addrevision for argument descriptions.
1598
1602
1599 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1603 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1600
1604
1601 invariants:
1605 invariants:
1602 - rawtext is optional (can be None); if not set, cachedelta must be set.
1606 - rawtext is optional (can be None); if not set, cachedelta must be set.
1603 if both are set, they must correspond to each other.
1607 if both are set, they must correspond to each other.
1604 """
1608 """
1605 btext = [rawtext]
1609 btext = [rawtext]
1606 def buildtext():
1610 def buildtext():
1607 if btext[0] is not None:
1611 if btext[0] is not None:
1608 return btext[0]
1612 return btext[0]
1609 baserev = cachedelta[0]
1613 baserev = cachedelta[0]
1610 delta = cachedelta[1]
1614 delta = cachedelta[1]
1611 # special case deltas which replace entire base; no need to decode
1615 # special case deltas which replace entire base; no need to decode
1612 # base revision. this neatly avoids censored bases, which throw when
1616 # base revision. this neatly avoids censored bases, which throw when
1613 # they're decoded.
1617 # they're decoded.
1614 hlen = struct.calcsize(">lll")
1618 hlen = struct.calcsize(">lll")
1615 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1619 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1616 len(delta) - hlen):
1620 len(delta) - hlen):
1617 btext[0] = delta[hlen:]
1621 btext[0] = delta[hlen:]
1618 else:
1622 else:
1619 if self._inline:
1623 if self._inline:
1620 fh = ifh
1624 fh = ifh
1621 else:
1625 else:
1622 fh = dfh
1626 fh = dfh
1623 basetext = self.revision(baserev, _df=fh, raw=True)
1627 basetext = self.revision(baserev, _df=fh, raw=True)
1624 btext[0] = mdiff.patch(basetext, delta)
1628 btext[0] = mdiff.patch(basetext, delta)
1625
1629
1626 try:
1630 try:
1627 res = self._processflags(btext[0], flags, 'read', raw=True)
1631 res = self._processflags(btext[0], flags, 'read', raw=True)
1628 btext[0], validatehash = res
1632 btext[0], validatehash = res
1629 if validatehash:
1633 if validatehash:
1630 self.checkhash(btext[0], node, p1=p1, p2=p2)
1634 self.checkhash(btext[0], node, p1=p1, p2=p2)
1631 if flags & REVIDX_ISCENSORED:
1635 if flags & REVIDX_ISCENSORED:
1632 raise RevlogError(_('node %s is not censored') % node)
1636 raise RevlogError(_('node %s is not censored') % node)
1633 except CensoredNodeError:
1637 except CensoredNodeError:
1634 # must pass the censored index flag to add censored revisions
1638 # must pass the censored index flag to add censored revisions
1635 if not flags & REVIDX_ISCENSORED:
1639 if not flags & REVIDX_ISCENSORED:
1636 raise
1640 raise
1637 return btext[0]
1641 return btext[0]
1638
1642
1639 def builddelta(rev):
1643 def builddelta(rev):
1640 # can we use the cached delta?
1644 # can we use the cached delta?
1641 if cachedelta and cachedelta[0] == rev:
1645 if cachedelta and cachedelta[0] == rev:
1642 delta = cachedelta[1]
1646 delta = cachedelta[1]
1643 else:
1647 else:
1644 t = buildtext()
1648 t = buildtext()
1645 if self.iscensored(rev):
1649 if self.iscensored(rev):
1646 # deltas based on a censored revision must replace the
1650 # deltas based on a censored revision must replace the
1647 # full content in one patch, so delta works everywhere
1651 # full content in one patch, so delta works everywhere
1648 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1652 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1649 delta = header + t
1653 delta = header + t
1650 else:
1654 else:
1651 if self._inline:
1655 if self._inline:
1652 fh = ifh
1656 fh = ifh
1653 else:
1657 else:
1654 fh = dfh
1658 fh = dfh
1655 ptext = self.revision(rev, _df=fh, raw=True)
1659 ptext = self.revision(rev, _df=fh, raw=True)
1656 delta = mdiff.textdiff(ptext, t)
1660 delta = mdiff.textdiff(ptext, t)
1657 header, data = self.compress(delta)
1661 header, data = self.compress(delta)
1658 deltalen = len(header) + len(data)
1662 deltalen = len(header) + len(data)
1659 chainbase = self.chainbase(rev)
1663 chainbase = self.chainbase(rev)
1660 dist = deltalen + offset - self.start(chainbase)
1664 dist = deltalen + offset - self.start(chainbase)
1661 if self._generaldelta:
1665 if self._generaldelta:
1662 base = rev
1666 base = rev
1663 else:
1667 else:
1664 base = chainbase
1668 base = chainbase
1665 chainlen, compresseddeltalen = self._chaininfo(rev)
1669 chainlen, compresseddeltalen = self._chaininfo(rev)
1666 chainlen += 1
1670 chainlen += 1
1667 compresseddeltalen += deltalen
1671 compresseddeltalen += deltalen
1668 return (dist, deltalen, (header, data), base,
1672 return (dist, deltalen, (header, data), base,
1669 chainbase, chainlen, compresseddeltalen)
1673 chainbase, chainlen, compresseddeltalen)
1670
1674
1671 curr = len(self)
1675 curr = len(self)
1672 prev = curr - 1
1676 prev = curr - 1
1673 offset = self.end(prev)
1677 offset = self.end(prev)
1674 delta = None
1678 delta = None
1675 p1r, p2r = self.rev(p1), self.rev(p2)
1679 p1r, p2r = self.rev(p1), self.rev(p2)
1676
1680
1677 # full versions are inserted when the needed deltas
1681 # full versions are inserted when the needed deltas
1678 # become comparable to the uncompressed text
1682 # become comparable to the uncompressed text
1679 if rawtext is None:
1683 if rawtext is None:
1680 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1684 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1681 cachedelta[1])
1685 cachedelta[1])
1682 else:
1686 else:
1683 textlen = len(rawtext)
1687 textlen = len(rawtext)
1684
1688
1685 # should we try to build a delta?
1689 # should we try to build a delta?
1686 if prev != nullrev and self.storedeltachains:
1690 if prev != nullrev and self.storedeltachains:
1687 tested = set()
1691 tested = set()
1688 # This condition is true most of the time when processing
1692 # This condition is true most of the time when processing
1689 # changegroup data into a generaldelta repo. The only time it
1693 # changegroup data into a generaldelta repo. The only time it
1690 # isn't true is if this is the first revision in a delta chain
1694 # isn't true is if this is the first revision in a delta chain
1691 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1695 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1692 if cachedelta and self._generaldelta and self._lazydeltabase:
1696 if cachedelta and self._generaldelta and self._lazydeltabase:
1693 # Assume what we received from the server is a good choice
1697 # Assume what we received from the server is a good choice
1694 # build delta will reuse the cache
1698 # build delta will reuse the cache
1695 candidatedelta = builddelta(cachedelta[0])
1699 candidatedelta = builddelta(cachedelta[0])
1696 tested.add(cachedelta[0])
1700 tested.add(cachedelta[0])
1697 if self._isgooddelta(candidatedelta, textlen):
1701 if self._isgooddelta(candidatedelta, textlen):
1698 delta = candidatedelta
1702 delta = candidatedelta
1699 if delta is None and self._generaldelta:
1703 if delta is None and self._generaldelta:
1700 # exclude already lazy tested base if any
1704 # exclude already lazy tested base if any
1701 parents = [p for p in (p1r, p2r)
1705 parents = [p for p in (p1r, p2r)
1702 if p != nullrev and p not in tested]
1706 if p != nullrev and p not in tested]
1703 if parents and not self._aggressivemergedeltas:
1707 if parents and not self._aggressivemergedeltas:
1704 # Pick whichever parent is closer to us (to minimize the
1708 # Pick whichever parent is closer to us (to minimize the
1705 # chance of having to build a fulltext).
1709 # chance of having to build a fulltext).
1706 parents = [max(parents)]
1710 parents = [max(parents)]
1707 tested.update(parents)
1711 tested.update(parents)
1708 pdeltas = []
1712 pdeltas = []
1709 for p in parents:
1713 for p in parents:
1710 pd = builddelta(p)
1714 pd = builddelta(p)
1711 if self._isgooddelta(pd, textlen):
1715 if self._isgooddelta(pd, textlen):
1712 pdeltas.append(pd)
1716 pdeltas.append(pd)
1713 if pdeltas:
1717 if pdeltas:
1714 delta = min(pdeltas, key=lambda x: x[1])
1718 delta = min(pdeltas, key=lambda x: x[1])
1715 if delta is None and prev not in tested:
1719 if delta is None and prev not in tested:
1716 # other approach failed try against prev to hopefully save us a
1720 # other approach failed try against prev to hopefully save us a
1717 # fulltext.
1721 # fulltext.
1718 candidatedelta = builddelta(prev)
1722 candidatedelta = builddelta(prev)
1719 if self._isgooddelta(candidatedelta, textlen):
1723 if self._isgooddelta(candidatedelta, textlen):
1720 delta = candidatedelta
1724 delta = candidatedelta
1721 if delta is not None:
1725 if delta is not None:
1722 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1726 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1723 else:
1727 else:
1724 rawtext = buildtext()
1728 rawtext = buildtext()
1725 data = self.compress(rawtext)
1729 data = self.compress(rawtext)
1726 l = len(data[1]) + len(data[0])
1730 l = len(data[1]) + len(data[0])
1727 base = chainbase = curr
1731 base = chainbase = curr
1728
1732
1729 e = (offset_type(offset, flags), l, textlen,
1733 e = (offset_type(offset, flags), l, textlen,
1730 base, link, p1r, p2r, node)
1734 base, link, p1r, p2r, node)
1731 self.index.insert(-1, e)
1735 self.index.insert(-1, e)
1732 self.nodemap[node] = curr
1736 self.nodemap[node] = curr
1733
1737
1734 entry = self._io.packentry(e, self.node, self.version, curr)
1738 entry = self._io.packentry(e, self.node, self.version, curr)
1735 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1739 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1736
1740
1737 if alwayscache and rawtext is None:
1741 if alwayscache and rawtext is None:
1738 rawtext = buildtext()
1742 rawtext = buildtext()
1739
1743
1740 if type(rawtext) == str: # only accept immutable objects
1744 if type(rawtext) == str: # only accept immutable objects
1741 self._cache = (node, curr, rawtext)
1745 self._cache = (node, curr, rawtext)
1742 self._chainbasecache[curr] = chainbase
1746 self._chainbasecache[curr] = chainbase
1743 return node
1747 return node
1744
1748
1745 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1749 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1746 # Files opened in a+ mode have inconsistent behavior on various
1750 # Files opened in a+ mode have inconsistent behavior on various
1747 # platforms. Windows requires that a file positioning call be made
1751 # platforms. Windows requires that a file positioning call be made
1748 # when the file handle transitions between reads and writes. See
1752 # when the file handle transitions between reads and writes. See
1749 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1753 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1750 # platforms, Python or the platform itself can be buggy. Some versions
1754 # platforms, Python or the platform itself can be buggy. Some versions
1751 # of Solaris have been observed to not append at the end of the file
1755 # of Solaris have been observed to not append at the end of the file
1752 # if the file was seeked to before the end. See issue4943 for more.
1756 # if the file was seeked to before the end. See issue4943 for more.
1753 #
1757 #
1754 # We work around this issue by inserting a seek() before writing.
1758 # We work around this issue by inserting a seek() before writing.
1755 # Note: This is likely not necessary on Python 3.
1759 # Note: This is likely not necessary on Python 3.
1756 ifh.seek(0, os.SEEK_END)
1760 ifh.seek(0, os.SEEK_END)
1757 if dfh:
1761 if dfh:
1758 dfh.seek(0, os.SEEK_END)
1762 dfh.seek(0, os.SEEK_END)
1759
1763
1760 curr = len(self) - 1
1764 curr = len(self) - 1
1761 if not self._inline:
1765 if not self._inline:
1762 transaction.add(self.datafile, offset)
1766 transaction.add(self.datafile, offset)
1763 transaction.add(self.indexfile, curr * len(entry))
1767 transaction.add(self.indexfile, curr * len(entry))
1764 if data[0]:
1768 if data[0]:
1765 dfh.write(data[0])
1769 dfh.write(data[0])
1766 dfh.write(data[1])
1770 dfh.write(data[1])
1767 ifh.write(entry)
1771 ifh.write(entry)
1768 else:
1772 else:
1769 offset += curr * self._io.size
1773 offset += curr * self._io.size
1770 transaction.add(self.indexfile, offset, curr)
1774 transaction.add(self.indexfile, offset, curr)
1771 ifh.write(entry)
1775 ifh.write(entry)
1772 ifh.write(data[0])
1776 ifh.write(data[0])
1773 ifh.write(data[1])
1777 ifh.write(data[1])
1774 self.checkinlinesize(transaction, ifh)
1778 self.checkinlinesize(transaction, ifh)
1775
1779
1776 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1780 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1777 """
1781 """
1778 add a delta group
1782 add a delta group
1779
1783
1780 given a set of deltas, add them to the revision log. the
1784 given a set of deltas, add them to the revision log. the
1781 first delta is against its parent, which should be in our
1785 first delta is against its parent, which should be in our
1782 log, the rest are against the previous delta.
1786 log, the rest are against the previous delta.
1783
1787
1784 If ``addrevisioncb`` is defined, it will be called with arguments of
1788 If ``addrevisioncb`` is defined, it will be called with arguments of
1785 this revlog and the node that was added.
1789 this revlog and the node that was added.
1786 """
1790 """
1787
1791
1788 # track the base of the current delta log
1792 # track the base of the current delta log
1789 content = []
1793 content = []
1790 node = None
1794 node = None
1791
1795
1792 r = len(self)
1796 r = len(self)
1793 end = 0
1797 end = 0
1794 if r:
1798 if r:
1795 end = self.end(r - 1)
1799 end = self.end(r - 1)
1796 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1800 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1797 isize = r * self._io.size
1801 isize = r * self._io.size
1798 if self._inline:
1802 if self._inline:
1799 transaction.add(self.indexfile, end + isize, r)
1803 transaction.add(self.indexfile, end + isize, r)
1800 dfh = None
1804 dfh = None
1801 else:
1805 else:
1802 transaction.add(self.indexfile, isize, r)
1806 transaction.add(self.indexfile, isize, r)
1803 transaction.add(self.datafile, end)
1807 transaction.add(self.datafile, end)
1804 dfh = self.opener(self.datafile, "a+")
1808 dfh = self.opener(self.datafile, "a+")
1805 def flush():
1809 def flush():
1806 if dfh:
1810 if dfh:
1807 dfh.flush()
1811 dfh.flush()
1808 ifh.flush()
1812 ifh.flush()
1809 try:
1813 try:
1810 # loop through our set of deltas
1814 # loop through our set of deltas
1811 chain = None
1815 chain = None
1812 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1816 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1813 node = chunkdata['node']
1817 node = chunkdata['node']
1814 p1 = chunkdata['p1']
1818 p1 = chunkdata['p1']
1815 p2 = chunkdata['p2']
1819 p2 = chunkdata['p2']
1816 cs = chunkdata['cs']
1820 cs = chunkdata['cs']
1817 deltabase = chunkdata['deltabase']
1821 deltabase = chunkdata['deltabase']
1818 delta = chunkdata['delta']
1822 delta = chunkdata['delta']
1819 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1823 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1820
1824
1821 content.append(node)
1825 content.append(node)
1822
1826
1823 link = linkmapper(cs)
1827 link = linkmapper(cs)
1824 if node in self.nodemap:
1828 if node in self.nodemap:
1825 # this can happen if two branches make the same change
1829 # this can happen if two branches make the same change
1826 chain = node
1830 chain = node
1827 continue
1831 continue
1828
1832
1829 for p in (p1, p2):
1833 for p in (p1, p2):
1830 if p not in self.nodemap:
1834 if p not in self.nodemap:
1831 raise LookupError(p, self.indexfile,
1835 raise LookupError(p, self.indexfile,
1832 _('unknown parent'))
1836 _('unknown parent'))
1833
1837
1834 if deltabase not in self.nodemap:
1838 if deltabase not in self.nodemap:
1835 raise LookupError(deltabase, self.indexfile,
1839 raise LookupError(deltabase, self.indexfile,
1836 _('unknown delta base'))
1840 _('unknown delta base'))
1837
1841
1838 baserev = self.rev(deltabase)
1842 baserev = self.rev(deltabase)
1839
1843
1840 if baserev != nullrev and self.iscensored(baserev):
1844 if baserev != nullrev and self.iscensored(baserev):
1841 # if base is censored, delta must be full replacement in a
1845 # if base is censored, delta must be full replacement in a
1842 # single patch operation
1846 # single patch operation
1843 hlen = struct.calcsize(">lll")
1847 hlen = struct.calcsize(">lll")
1844 oldlen = self.rawsize(baserev)
1848 oldlen = self.rawsize(baserev)
1845 newlen = len(delta) - hlen
1849 newlen = len(delta) - hlen
1846 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1850 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1847 raise error.CensoredBaseError(self.indexfile,
1851 raise error.CensoredBaseError(self.indexfile,
1848 self.node(baserev))
1852 self.node(baserev))
1849
1853
1850 if not flags and self._peek_iscensored(baserev, delta, flush):
1854 if not flags and self._peek_iscensored(baserev, delta, flush):
1851 flags |= REVIDX_ISCENSORED
1855 flags |= REVIDX_ISCENSORED
1852
1856
1853 # We assume consumers of addrevisioncb will want to retrieve
1857 # We assume consumers of addrevisioncb will want to retrieve
1854 # the added revision, which will require a call to
1858 # the added revision, which will require a call to
1855 # revision(). revision() will fast path if there is a cache
1859 # revision(). revision() will fast path if there is a cache
1856 # hit. So, we tell _addrevision() to always cache in this case.
1860 # hit. So, we tell _addrevision() to always cache in this case.
1857 # We're only using addgroup() in the context of changegroup
1861 # We're only using addgroup() in the context of changegroup
1858 # generation so the revision data can always be handled as raw
1862 # generation so the revision data can always be handled as raw
1859 # by the flagprocessor.
1863 # by the flagprocessor.
1860 chain = self._addrevision(node, None, transaction, link,
1864 chain = self._addrevision(node, None, transaction, link,
1861 p1, p2, flags, (baserev, delta),
1865 p1, p2, flags, (baserev, delta),
1862 ifh, dfh,
1866 ifh, dfh,
1863 alwayscache=bool(addrevisioncb))
1867 alwayscache=bool(addrevisioncb))
1864
1868
1865 if addrevisioncb:
1869 if addrevisioncb:
1866 addrevisioncb(self, chain)
1870 addrevisioncb(self, chain)
1867
1871
1868 if not dfh and not self._inline:
1872 if not dfh and not self._inline:
1869 # addrevision switched from inline to conventional
1873 # addrevision switched from inline to conventional
1870 # reopen the index
1874 # reopen the index
1871 ifh.close()
1875 ifh.close()
1872 dfh = self.opener(self.datafile, "a+")
1876 dfh = self.opener(self.datafile, "a+")
1873 ifh = self.opener(self.indexfile, "a+",
1877 ifh = self.opener(self.indexfile, "a+",
1874 checkambig=self._checkambig)
1878 checkambig=self._checkambig)
1875 finally:
1879 finally:
1876 if dfh:
1880 if dfh:
1877 dfh.close()
1881 dfh.close()
1878 ifh.close()
1882 ifh.close()
1879
1883
1880 return content
1884 return content
1881
1885
1882 def iscensored(self, rev):
1886 def iscensored(self, rev):
1883 """Check if a file revision is censored."""
1887 """Check if a file revision is censored."""
1884 return False
1888 return False
1885
1889
1886 def _peek_iscensored(self, baserev, delta, flush):
1890 def _peek_iscensored(self, baserev, delta, flush):
1887 """Quickly check if a delta produces a censored revision."""
1891 """Quickly check if a delta produces a censored revision."""
1888 return False
1892 return False
1889
1893
1890 def getstrippoint(self, minlink):
1894 def getstrippoint(self, minlink):
1891 """find the minimum rev that must be stripped to strip the linkrev
1895 """find the minimum rev that must be stripped to strip the linkrev
1892
1896
1893 Returns a tuple containing the minimum rev and a set of all revs that
1897 Returns a tuple containing the minimum rev and a set of all revs that
1894 have linkrevs that will be broken by this strip.
1898 have linkrevs that will be broken by this strip.
1895 """
1899 """
1896 brokenrevs = set()
1900 brokenrevs = set()
1897 strippoint = len(self)
1901 strippoint = len(self)
1898
1902
1899 heads = {}
1903 heads = {}
1900 futurelargelinkrevs = set()
1904 futurelargelinkrevs = set()
1901 for head in self.headrevs():
1905 for head in self.headrevs():
1902 headlinkrev = self.linkrev(head)
1906 headlinkrev = self.linkrev(head)
1903 heads[head] = headlinkrev
1907 heads[head] = headlinkrev
1904 if headlinkrev >= minlink:
1908 if headlinkrev >= minlink:
1905 futurelargelinkrevs.add(headlinkrev)
1909 futurelargelinkrevs.add(headlinkrev)
1906
1910
1907 # This algorithm involves walking down the rev graph, starting at the
1911 # This algorithm involves walking down the rev graph, starting at the
1908 # heads. Since the revs are topologically sorted according to linkrev,
1912 # heads. Since the revs are topologically sorted according to linkrev,
1909 # once all head linkrevs are below the minlink, we know there are
1913 # once all head linkrevs are below the minlink, we know there are
1910 # no more revs that could have a linkrev greater than minlink.
1914 # no more revs that could have a linkrev greater than minlink.
1911 # So we can stop walking.
1915 # So we can stop walking.
1912 while futurelargelinkrevs:
1916 while futurelargelinkrevs:
1913 strippoint -= 1
1917 strippoint -= 1
1914 linkrev = heads.pop(strippoint)
1918 linkrev = heads.pop(strippoint)
1915
1919
1916 if linkrev < minlink:
1920 if linkrev < minlink:
1917 brokenrevs.add(strippoint)
1921 brokenrevs.add(strippoint)
1918 else:
1922 else:
1919 futurelargelinkrevs.remove(linkrev)
1923 futurelargelinkrevs.remove(linkrev)
1920
1924
1921 for p in self.parentrevs(strippoint):
1925 for p in self.parentrevs(strippoint):
1922 if p != nullrev:
1926 if p != nullrev:
1923 plinkrev = self.linkrev(p)
1927 plinkrev = self.linkrev(p)
1924 heads[p] = plinkrev
1928 heads[p] = plinkrev
1925 if plinkrev >= minlink:
1929 if plinkrev >= minlink:
1926 futurelargelinkrevs.add(plinkrev)
1930 futurelargelinkrevs.add(plinkrev)
1927
1931
1928 return strippoint, brokenrevs
1932 return strippoint, brokenrevs
1929
1933
1930 def strip(self, minlink, transaction):
1934 def strip(self, minlink, transaction):
1931 """truncate the revlog on the first revision with a linkrev >= minlink
1935 """truncate the revlog on the first revision with a linkrev >= minlink
1932
1936
1933 This function is called when we're stripping revision minlink and
1937 This function is called when we're stripping revision minlink and
1934 its descendants from the repository.
1938 its descendants from the repository.
1935
1939
1936 We have to remove all revisions with linkrev >= minlink, because
1940 We have to remove all revisions with linkrev >= minlink, because
1937 the equivalent changelog revisions will be renumbered after the
1941 the equivalent changelog revisions will be renumbered after the
1938 strip.
1942 strip.
1939
1943
1940 So we truncate the revlog on the first of these revisions, and
1944 So we truncate the revlog on the first of these revisions, and
1941 trust that the caller has saved the revisions that shouldn't be
1945 trust that the caller has saved the revisions that shouldn't be
1942 removed and that it'll re-add them after this truncation.
1946 removed and that it'll re-add them after this truncation.
1943 """
1947 """
1944 if len(self) == 0:
1948 if len(self) == 0:
1945 return
1949 return
1946
1950
1947 rev, _ = self.getstrippoint(minlink)
1951 rev, _ = self.getstrippoint(minlink)
1948 if rev == len(self):
1952 if rev == len(self):
1949 return
1953 return
1950
1954
1951 # first truncate the files on disk
1955 # first truncate the files on disk
1952 end = self.start(rev)
1956 end = self.start(rev)
1953 if not self._inline:
1957 if not self._inline:
1954 transaction.add(self.datafile, end)
1958 transaction.add(self.datafile, end)
1955 end = rev * self._io.size
1959 end = rev * self._io.size
1956 else:
1960 else:
1957 end += rev * self._io.size
1961 end += rev * self._io.size
1958
1962
1959 transaction.add(self.indexfile, end)
1963 transaction.add(self.indexfile, end)
1960
1964
1961 # then reset internal state in memory to forget those revisions
1965 # then reset internal state in memory to forget those revisions
1962 self._cache = None
1966 self._cache = None
1963 self._chaininfocache = {}
1967 self._chaininfocache = {}
1964 self._chunkclear()
1968 self._chunkclear()
1965 for x in xrange(rev, len(self)):
1969 for x in xrange(rev, len(self)):
1966 del self.nodemap[self.node(x)]
1970 del self.nodemap[self.node(x)]
1967
1971
1968 del self.index[rev:-1]
1972 del self.index[rev:-1]
1969
1973
1970 def checksize(self):
1974 def checksize(self):
1971 expected = 0
1975 expected = 0
1972 if len(self):
1976 if len(self):
1973 expected = max(0, self.end(len(self) - 1))
1977 expected = max(0, self.end(len(self) - 1))
1974
1978
1975 try:
1979 try:
1976 f = self.opener(self.datafile)
1980 f = self.opener(self.datafile)
1977 f.seek(0, 2)
1981 f.seek(0, 2)
1978 actual = f.tell()
1982 actual = f.tell()
1979 f.close()
1983 f.close()
1980 dd = actual - expected
1984 dd = actual - expected
1981 except IOError as inst:
1985 except IOError as inst:
1982 if inst.errno != errno.ENOENT:
1986 if inst.errno != errno.ENOENT:
1983 raise
1987 raise
1984 dd = 0
1988 dd = 0
1985
1989
1986 try:
1990 try:
1987 f = self.opener(self.indexfile)
1991 f = self.opener(self.indexfile)
1988 f.seek(0, 2)
1992 f.seek(0, 2)
1989 actual = f.tell()
1993 actual = f.tell()
1990 f.close()
1994 f.close()
1991 s = self._io.size
1995 s = self._io.size
1992 i = max(0, actual // s)
1996 i = max(0, actual // s)
1993 di = actual - (i * s)
1997 di = actual - (i * s)
1994 if self._inline:
1998 if self._inline:
1995 databytes = 0
1999 databytes = 0
1996 for r in self:
2000 for r in self:
1997 databytes += max(0, self.length(r))
2001 databytes += max(0, self.length(r))
1998 dd = 0
2002 dd = 0
1999 di = actual - len(self) * s - databytes
2003 di = actual - len(self) * s - databytes
2000 except IOError as inst:
2004 except IOError as inst:
2001 if inst.errno != errno.ENOENT:
2005 if inst.errno != errno.ENOENT:
2002 raise
2006 raise
2003 di = 0
2007 di = 0
2004
2008
2005 return (dd, di)
2009 return (dd, di)
2006
2010
2007 def files(self):
2011 def files(self):
2008 res = [self.indexfile]
2012 res = [self.indexfile]
2009 if not self._inline:
2013 if not self._inline:
2010 res.append(self.datafile)
2014 res.append(self.datafile)
2011 return res
2015 return res
2012
2016
2013 DELTAREUSEALWAYS = 'always'
2017 DELTAREUSEALWAYS = 'always'
2014 DELTAREUSESAMEREVS = 'samerevs'
2018 DELTAREUSESAMEREVS = 'samerevs'
2015 DELTAREUSENEVER = 'never'
2019 DELTAREUSENEVER = 'never'
2016
2020
2017 DELTAREUSEALL = set(['always', 'samerevs', 'never'])
2021 DELTAREUSEALL = set(['always', 'samerevs', 'never'])
2018
2022
2019 def clone(self, tr, destrevlog, addrevisioncb=None,
2023 def clone(self, tr, destrevlog, addrevisioncb=None,
2020 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2024 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2021 """Copy this revlog to another, possibly with format changes.
2025 """Copy this revlog to another, possibly with format changes.
2022
2026
2023 The destination revlog will contain the same revisions and nodes.
2027 The destination revlog will contain the same revisions and nodes.
2024 However, it may not be bit-for-bit identical due to e.g. delta encoding
2028 However, it may not be bit-for-bit identical due to e.g. delta encoding
2025 differences.
2029 differences.
2026
2030
2027 The ``deltareuse`` argument control how deltas from the existing revlog
2031 The ``deltareuse`` argument control how deltas from the existing revlog
2028 are preserved in the destination revlog. The argument can have the
2032 are preserved in the destination revlog. The argument can have the
2029 following values:
2033 following values:
2030
2034
2031 DELTAREUSEALWAYS
2035 DELTAREUSEALWAYS
2032 Deltas will always be reused (if possible), even if the destination
2036 Deltas will always be reused (if possible), even if the destination
2033 revlog would not select the same revisions for the delta. This is the
2037 revlog would not select the same revisions for the delta. This is the
2034 fastest mode of operation.
2038 fastest mode of operation.
2035 DELTAREUSESAMEREVS
2039 DELTAREUSESAMEREVS
2036 Deltas will be reused if the destination revlog would pick the same
2040 Deltas will be reused if the destination revlog would pick the same
2037 revisions for the delta. This mode strikes a balance between speed
2041 revisions for the delta. This mode strikes a balance between speed
2038 and optimization.
2042 and optimization.
2039 DELTAREUSENEVER
2043 DELTAREUSENEVER
2040 Deltas will never be reused. This is the slowest mode of execution.
2044 Deltas will never be reused. This is the slowest mode of execution.
2041 This mode can be used to recompute deltas (e.g. if the diff/delta
2045 This mode can be used to recompute deltas (e.g. if the diff/delta
2042 algorithm changes).
2046 algorithm changes).
2043
2047
2044 Delta computation can be slow, so the choice of delta reuse policy can
2048 Delta computation can be slow, so the choice of delta reuse policy can
2045 significantly affect run time.
2049 significantly affect run time.
2046
2050
2047 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2051 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2048 two extremes. Deltas will be reused if they are appropriate. But if the
2052 two extremes. Deltas will be reused if they are appropriate. But if the
2049 delta could choose a better revision, it will do so. This means if you
2053 delta could choose a better revision, it will do so. This means if you
2050 are converting a non-generaldelta revlog to a generaldelta revlog,
2054 are converting a non-generaldelta revlog to a generaldelta revlog,
2051 deltas will be recomputed if the delta's parent isn't a parent of the
2055 deltas will be recomputed if the delta's parent isn't a parent of the
2052 revision.
2056 revision.
2053
2057
2054 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2058 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2055 controls whether to compute deltas against both parents for merges.
2059 controls whether to compute deltas against both parents for merges.
2056 By default, the current default is used.
2060 By default, the current default is used.
2057 """
2061 """
2058 if deltareuse not in self.DELTAREUSEALL:
2062 if deltareuse not in self.DELTAREUSEALL:
2059 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2063 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2060
2064
2061 if len(destrevlog):
2065 if len(destrevlog):
2062 raise ValueError(_('destination revlog is not empty'))
2066 raise ValueError(_('destination revlog is not empty'))
2063
2067
2064 if getattr(self, 'filteredrevs', None):
2068 if getattr(self, 'filteredrevs', None):
2065 raise ValueError(_('source revlog has filtered revisions'))
2069 raise ValueError(_('source revlog has filtered revisions'))
2066 if getattr(destrevlog, 'filteredrevs', None):
2070 if getattr(destrevlog, 'filteredrevs', None):
2067 raise ValueError(_('destination revlog has filtered revisions'))
2071 raise ValueError(_('destination revlog has filtered revisions'))
2068
2072
2069 # lazydeltabase controls whether to reuse a cached delta, if possible.
2073 # lazydeltabase controls whether to reuse a cached delta, if possible.
2070 oldlazydeltabase = destrevlog._lazydeltabase
2074 oldlazydeltabase = destrevlog._lazydeltabase
2071 oldamd = destrevlog._aggressivemergedeltas
2075 oldamd = destrevlog._aggressivemergedeltas
2072
2076
2073 try:
2077 try:
2074 if deltareuse == self.DELTAREUSEALWAYS:
2078 if deltareuse == self.DELTAREUSEALWAYS:
2075 destrevlog._lazydeltabase = True
2079 destrevlog._lazydeltabase = True
2076 elif deltareuse == self.DELTAREUSESAMEREVS:
2080 elif deltareuse == self.DELTAREUSESAMEREVS:
2077 destrevlog._lazydeltabase = False
2081 destrevlog._lazydeltabase = False
2078
2082
2079 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2083 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2080
2084
2081 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2085 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2082 self.DELTAREUSESAMEREVS)
2086 self.DELTAREUSESAMEREVS)
2083
2087
2084 index = self.index
2088 index = self.index
2085 for rev in self:
2089 for rev in self:
2086 entry = index[rev]
2090 entry = index[rev]
2087
2091
2088 # Some classes override linkrev to take filtered revs into
2092 # Some classes override linkrev to take filtered revs into
2089 # account. Use raw entry from index.
2093 # account. Use raw entry from index.
2090 flags = entry[0] & 0xffff
2094 flags = entry[0] & 0xffff
2091 linkrev = entry[4]
2095 linkrev = entry[4]
2092 p1 = index[entry[5]][7]
2096 p1 = index[entry[5]][7]
2093 p2 = index[entry[6]][7]
2097 p2 = index[entry[6]][7]
2094 node = entry[7]
2098 node = entry[7]
2095
2099
2096 # (Possibly) reuse the delta from the revlog if allowed and
2100 # (Possibly) reuse the delta from the revlog if allowed and
2097 # the revlog chunk is a delta.
2101 # the revlog chunk is a delta.
2098 cachedelta = None
2102 cachedelta = None
2099 rawtext = None
2103 rawtext = None
2100 if populatecachedelta:
2104 if populatecachedelta:
2101 dp = self.deltaparent(rev)
2105 dp = self.deltaparent(rev)
2102 if dp != nullrev:
2106 if dp != nullrev:
2103 cachedelta = (dp, str(self._chunk(rev)))
2107 cachedelta = (dp, str(self._chunk(rev)))
2104
2108
2105 if not cachedelta:
2109 if not cachedelta:
2106 rawtext = self.revision(rev, raw=True)
2110 rawtext = self.revision(rev, raw=True)
2107
2111
2108 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2112 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2109 checkambig=False)
2113 checkambig=False)
2110 dfh = None
2114 dfh = None
2111 if not destrevlog._inline:
2115 if not destrevlog._inline:
2112 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2116 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2113 try:
2117 try:
2114 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2118 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2115 flags, cachedelta, ifh, dfh)
2119 flags, cachedelta, ifh, dfh)
2116 finally:
2120 finally:
2117 if dfh:
2121 if dfh:
2118 dfh.close()
2122 dfh.close()
2119 ifh.close()
2123 ifh.close()
2120
2124
2121 if addrevisioncb:
2125 if addrevisioncb:
2122 addrevisioncb(self, rev, node)
2126 addrevisioncb(self, rev, node)
2123 finally:
2127 finally:
2124 destrevlog._lazydeltabase = oldlazydeltabase
2128 destrevlog._lazydeltabase = oldlazydeltabase
2125 destrevlog._aggressivemergedeltas = oldamd
2129 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now