##// END OF EJS Templates
revlog: move censor logic into main revlog class...
Gregory Szorc -
r37461:65250a66 default
parent child Browse files
Show More
@@ -1,124 +1,85
1 # filelog.py - file history class for mercurial
1 # filelog.py - file history class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
11
12 from .thirdparty.zope import (
10 from .thirdparty.zope import (
13 interface as zi,
11 interface as zi,
14 )
12 )
15 from . import (
13 from . import (
16 error,
17 mdiff,
18 repository,
14 repository,
19 revlog,
15 revlog,
20 )
16 )
21
17
22 def _censoredtext(text):
23 m, offs = revlog.parsemeta(text)
24 return m and "censored" in m
25
26 @zi.implementer(repository.ifilestorage)
18 @zi.implementer(repository.ifilestorage)
27 class filelog(revlog.revlog):
19 class filelog(revlog.revlog):
28 def __init__(self, opener, path):
20 def __init__(self, opener, path):
29 super(filelog, self).__init__(opener,
21 super(filelog, self).__init__(opener,
30 "/".join(("data", path + ".i")))
22 "/".join(("data", path + ".i")),
23 censorable=True)
31 # full name of the user visible file, relative to the repository root
24 # full name of the user visible file, relative to the repository root
32 self.filename = path
25 self.filename = path
33
26
34 def read(self, node):
27 def read(self, node):
35 t = self.revision(node)
28 t = self.revision(node)
36 if not t.startswith('\1\n'):
29 if not t.startswith('\1\n'):
37 return t
30 return t
38 s = t.index('\1\n', 2)
31 s = t.index('\1\n', 2)
39 return t[s + 2:]
32 return t[s + 2:]
40
33
41 def add(self, text, meta, transaction, link, p1=None, p2=None):
34 def add(self, text, meta, transaction, link, p1=None, p2=None):
42 if meta or text.startswith('\1\n'):
35 if meta or text.startswith('\1\n'):
43 text = revlog.packmeta(meta, text)
36 text = revlog.packmeta(meta, text)
44 return self.addrevision(text, transaction, link, p1, p2)
37 return self.addrevision(text, transaction, link, p1, p2)
45
38
46 def renamed(self, node):
39 def renamed(self, node):
47 if self.parents(node)[0] != revlog.nullid:
40 if self.parents(node)[0] != revlog.nullid:
48 return False
41 return False
49 t = self.revision(node)
42 t = self.revision(node)
50 m = revlog.parsemeta(t)[0]
43 m = revlog.parsemeta(t)[0]
51 if m and "copy" in m:
44 if m and "copy" in m:
52 return (m["copy"], revlog.bin(m["copyrev"]))
45 return (m["copy"], revlog.bin(m["copyrev"]))
53 return False
46 return False
54
47
55 def size(self, rev):
48 def size(self, rev):
56 """return the size of a given revision"""
49 """return the size of a given revision"""
57
50
58 # for revisions with renames, we have to go the slow way
51 # for revisions with renames, we have to go the slow way
59 node = self.node(rev)
52 node = self.node(rev)
60 if self.renamed(node):
53 if self.renamed(node):
61 return len(self.read(node))
54 return len(self.read(node))
62 if self.iscensored(rev):
55 if self.iscensored(rev):
63 return 0
56 return 0
64
57
65 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
58 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
66 return super(filelog, self).size(rev)
59 return super(filelog, self).size(rev)
67
60
68 def cmp(self, node, text):
61 def cmp(self, node, text):
69 """compare text with a given file revision
62 """compare text with a given file revision
70
63
71 returns True if text is different than what is stored.
64 returns True if text is different than what is stored.
72 """
65 """
73
66
74 t = text
67 t = text
75 if text.startswith('\1\n'):
68 if text.startswith('\1\n'):
76 t = '\1\n\1\n' + text
69 t = '\1\n\1\n' + text
77
70
78 samehashes = not super(filelog, self).cmp(node, t)
71 samehashes = not super(filelog, self).cmp(node, t)
79 if samehashes:
72 if samehashes:
80 return False
73 return False
81
74
82 # censored files compare against the empty file
75 # censored files compare against the empty file
83 if self.iscensored(self.rev(node)):
76 if self.iscensored(self.rev(node)):
84 return text != ''
77 return text != ''
85
78
86 # renaming a file produces a different hash, even if the data
79 # renaming a file produces a different hash, even if the data
87 # remains unchanged. Check if it's the case (slow):
80 # remains unchanged. Check if it's the case (slow):
88 if self.renamed(node):
81 if self.renamed(node):
89 t2 = self.read(node)
82 t2 = self.read(node)
90 return t2 != text
83 return t2 != text
91
84
92 return True
85 return True
93
94 def checkhash(self, text, node, p1=None, p2=None, rev=None):
95 try:
96 super(filelog, self).checkhash(text, node, p1=p1, p2=p2, rev=rev)
97 except error.RevlogError:
98 if _censoredtext(text):
99 raise error.CensoredNodeError(self.indexfile, node, text)
100 raise
101
102 def iscensored(self, rev):
103 """Check if a file revision is censored."""
104 return self.flags(rev) & revlog.REVIDX_ISCENSORED
105
106 def _peek_iscensored(self, baserev, delta, flush):
107 """Quickly check if a delta produces a censored revision."""
108 # Fragile heuristic: unless new file meta keys are added alphabetically
109 # preceding "censored", all censored revisions are prefixed by
110 # "\1\ncensored:". A delta producing such a censored revision must be a
111 # full-replacement delta, so we inspect the first and only patch in the
112 # delta for this prefix.
113 hlen = struct.calcsize(">lll")
114 if len(delta) <= hlen:
115 return False
116
117 oldlen = self.rawsize(baserev)
118 newlen = len(delta) - hlen
119 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
120 return False
121
122 add = "\1\ncensored:"
123 addlen = len(add)
124 return newlen >= addlen and delta[hlen:hlen + addlen] == add
@@ -1,2554 +1,2588
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .thirdparty import (
37 from .thirdparty import (
38 attr,
38 attr,
39 )
39 )
40 from . import (
40 from . import (
41 ancestor,
41 ancestor,
42 error,
42 error,
43 mdiff,
43 mdiff,
44 policy,
44 policy,
45 pycompat,
45 pycompat,
46 templatefilters,
46 templatefilters,
47 util,
47 util,
48 )
48 )
49 from .utils import (
49 from .utils import (
50 stringutil,
50 stringutil,
51 )
51 )
52
52
53 parsers = policy.importmod(r'parsers')
53 parsers = policy.importmod(r'parsers')
54
54
55 # Aliased for performance.
55 # Aliased for performance.
56 _zlibdecompress = zlib.decompress
56 _zlibdecompress = zlib.decompress
57
57
58 # revlog header flags
58 # revlog header flags
59 REVLOGV0 = 0
59 REVLOGV0 = 0
60 REVLOGV1 = 1
60 REVLOGV1 = 1
61 # Dummy value until file format is finalized.
61 # Dummy value until file format is finalized.
62 # Reminder: change the bounds check in revlog.__init__ when this is changed.
62 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 REVLOGV2 = 0xDEAD
63 REVLOGV2 = 0xDEAD
64 FLAG_INLINE_DATA = (1 << 16)
64 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_GENERALDELTA = (1 << 17)
65 FLAG_GENERALDELTA = (1 << 17)
66 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
66 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FORMAT = REVLOGV1
67 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
68 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
69 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV2_FLAGS = REVLOGV1_FLAGS
70 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71
71
72 # revlog index flags
72 # revlog index flags
73 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
73 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
74 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
75 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_DEFAULT_FLAGS = 0
76 REVIDX_DEFAULT_FLAGS = 0
77 # stable order in which flags need to be processed and their processors applied
77 # stable order in which flags need to be processed and their processors applied
78 REVIDX_FLAGS_ORDER = [
78 REVIDX_FLAGS_ORDER = [
79 REVIDX_ISCENSORED,
79 REVIDX_ISCENSORED,
80 REVIDX_ELLIPSIS,
80 REVIDX_ELLIPSIS,
81 REVIDX_EXTSTORED,
81 REVIDX_EXTSTORED,
82 ]
82 ]
83 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
83 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 # bitmark for flags that could cause rawdata content change
84 # bitmark for flags that could cause rawdata content change
85 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
85 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86
86
87 # max size of revlog with inline data
87 # max size of revlog with inline data
88 _maxinline = 131072
88 _maxinline = 131072
89 _chunksize = 1048576
89 _chunksize = 1048576
90
90
91 RevlogError = error.RevlogError
91 RevlogError = error.RevlogError
92 LookupError = error.LookupError
92 LookupError = error.LookupError
93 CensoredNodeError = error.CensoredNodeError
93 CensoredNodeError = error.CensoredNodeError
94 ProgrammingError = error.ProgrammingError
94 ProgrammingError = error.ProgrammingError
95
95
96 # Store flag processors (cf. 'addflagprocessor()' to register)
96 # Store flag processors (cf. 'addflagprocessor()' to register)
97 _flagprocessors = {
97 _flagprocessors = {
98 REVIDX_ISCENSORED: None,
98 REVIDX_ISCENSORED: None,
99 }
99 }
100
100
101 _mdre = re.compile('\1\n')
101 _mdre = re.compile('\1\n')
102 def parsemeta(text):
102 def parsemeta(text):
103 """return (metadatadict, metadatasize)"""
103 """return (metadatadict, metadatasize)"""
104 # text can be buffer, so we can't use .startswith or .index
104 # text can be buffer, so we can't use .startswith or .index
105 if text[:2] != '\1\n':
105 if text[:2] != '\1\n':
106 return None, None
106 return None, None
107 s = _mdre.search(text, 2).start()
107 s = _mdre.search(text, 2).start()
108 mtext = text[2:s]
108 mtext = text[2:s]
109 meta = {}
109 meta = {}
110 for l in mtext.splitlines():
110 for l in mtext.splitlines():
111 k, v = l.split(": ", 1)
111 k, v = l.split(": ", 1)
112 meta[k] = v
112 meta[k] = v
113 return meta, (s + 2)
113 return meta, (s + 2)
114
114
115 def packmeta(meta, text):
115 def packmeta(meta, text):
116 keys = sorted(meta)
116 keys = sorted(meta)
117 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
117 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
118 return "\1\n%s\1\n%s" % (metatext, text)
118 return "\1\n%s\1\n%s" % (metatext, text)
119
119
120 def _censoredtext(text):
121 m, offs = parsemeta(text)
122 return m and "censored" in m
123
120 def addflagprocessor(flag, processor):
124 def addflagprocessor(flag, processor):
121 """Register a flag processor on a revision data flag.
125 """Register a flag processor on a revision data flag.
122
126
123 Invariant:
127 Invariant:
124 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
128 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
125 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
129 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
126 - Only one flag processor can be registered on a specific flag.
130 - Only one flag processor can be registered on a specific flag.
127 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
131 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
128 following signatures:
132 following signatures:
129 - (read) f(self, rawtext) -> text, bool
133 - (read) f(self, rawtext) -> text, bool
130 - (write) f(self, text) -> rawtext, bool
134 - (write) f(self, text) -> rawtext, bool
131 - (raw) f(self, rawtext) -> bool
135 - (raw) f(self, rawtext) -> bool
132 "text" is presented to the user. "rawtext" is stored in revlog data, not
136 "text" is presented to the user. "rawtext" is stored in revlog data, not
133 directly visible to the user.
137 directly visible to the user.
134 The boolean returned by these transforms is used to determine whether
138 The boolean returned by these transforms is used to determine whether
135 the returned text can be used for hash integrity checking. For example,
139 the returned text can be used for hash integrity checking. For example,
136 if "write" returns False, then "text" is used to generate hash. If
140 if "write" returns False, then "text" is used to generate hash. If
137 "write" returns True, that basically means "rawtext" returned by "write"
141 "write" returns True, that basically means "rawtext" returned by "write"
138 should be used to generate hash. Usually, "write" and "read" return
142 should be used to generate hash. Usually, "write" and "read" return
139 different booleans. And "raw" returns a same boolean as "write".
143 different booleans. And "raw" returns a same boolean as "write".
140
144
141 Note: The 'raw' transform is used for changegroup generation and in some
145 Note: The 'raw' transform is used for changegroup generation and in some
142 debug commands. In this case the transform only indicates whether the
146 debug commands. In this case the transform only indicates whether the
143 contents can be used for hash integrity checks.
147 contents can be used for hash integrity checks.
144 """
148 """
145 if not flag & REVIDX_KNOWN_FLAGS:
149 if not flag & REVIDX_KNOWN_FLAGS:
146 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
150 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
147 raise ProgrammingError(msg)
151 raise ProgrammingError(msg)
148 if flag not in REVIDX_FLAGS_ORDER:
152 if flag not in REVIDX_FLAGS_ORDER:
149 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
153 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
150 raise ProgrammingError(msg)
154 raise ProgrammingError(msg)
151 if flag in _flagprocessors:
155 if flag in _flagprocessors:
152 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
156 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
153 raise error.Abort(msg)
157 raise error.Abort(msg)
154 _flagprocessors[flag] = processor
158 _flagprocessors[flag] = processor
155
159
156 def getoffset(q):
160 def getoffset(q):
157 return int(q >> 16)
161 return int(q >> 16)
158
162
159 def gettype(q):
163 def gettype(q):
160 return int(q & 0xFFFF)
164 return int(q & 0xFFFF)
161
165
162 def offset_type(offset, type):
166 def offset_type(offset, type):
163 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
167 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
164 raise ValueError('unknown revlog index flags')
168 raise ValueError('unknown revlog index flags')
165 return int(int(offset) << 16 | type)
169 return int(int(offset) << 16 | type)
166
170
167 _nullhash = hashlib.sha1(nullid)
171 _nullhash = hashlib.sha1(nullid)
168
172
169 def hash(text, p1, p2):
173 def hash(text, p1, p2):
170 """generate a hash from the given text and its parent hashes
174 """generate a hash from the given text and its parent hashes
171
175
172 This hash combines both the current file contents and its history
176 This hash combines both the current file contents and its history
173 in a manner that makes it easy to distinguish nodes with the same
177 in a manner that makes it easy to distinguish nodes with the same
174 content in the revision graph.
178 content in the revision graph.
175 """
179 """
176 # As of now, if one of the parent node is null, p2 is null
180 # As of now, if one of the parent node is null, p2 is null
177 if p2 == nullid:
181 if p2 == nullid:
178 # deep copy of a hash is faster than creating one
182 # deep copy of a hash is faster than creating one
179 s = _nullhash.copy()
183 s = _nullhash.copy()
180 s.update(p1)
184 s.update(p1)
181 else:
185 else:
182 # none of the parent nodes are nullid
186 # none of the parent nodes are nullid
183 if p1 < p2:
187 if p1 < p2:
184 a = p1
188 a = p1
185 b = p2
189 b = p2
186 else:
190 else:
187 a = p2
191 a = p2
188 b = p1
192 b = p1
189 s = hashlib.sha1(a)
193 s = hashlib.sha1(a)
190 s.update(b)
194 s.update(b)
191 s.update(text)
195 s.update(text)
192 return s.digest()
196 return s.digest()
193
197
194 def _trimchunk(revlog, revs, startidx, endidx=None):
198 def _trimchunk(revlog, revs, startidx, endidx=None):
195 """returns revs[startidx:endidx] without empty trailing revs
199 """returns revs[startidx:endidx] without empty trailing revs
196 """
200 """
197 length = revlog.length
201 length = revlog.length
198
202
199 if endidx is None:
203 if endidx is None:
200 endidx = len(revs)
204 endidx = len(revs)
201
205
202 # Trim empty revs at the end, but never the very first revision of a chain
206 # Trim empty revs at the end, but never the very first revision of a chain
203 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
207 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
204 endidx -= 1
208 endidx -= 1
205
209
206 return revs[startidx:endidx]
210 return revs[startidx:endidx]
207
211
208 def _slicechunk(revlog, revs):
212 def _slicechunk(revlog, revs):
209 """slice revs to reduce the amount of unrelated data to be read from disk.
213 """slice revs to reduce the amount of unrelated data to be read from disk.
210
214
211 ``revs`` is sliced into groups that should be read in one time.
215 ``revs`` is sliced into groups that should be read in one time.
212 Assume that revs are sorted.
216 Assume that revs are sorted.
213 """
217 """
214 start = revlog.start
218 start = revlog.start
215 length = revlog.length
219 length = revlog.length
216
220
217 if len(revs) <= 1:
221 if len(revs) <= 1:
218 yield revs
222 yield revs
219 return
223 return
220
224
221 startbyte = start(revs[0])
225 startbyte = start(revs[0])
222 endbyte = start(revs[-1]) + length(revs[-1])
226 endbyte = start(revs[-1]) + length(revs[-1])
223 readdata = deltachainspan = endbyte - startbyte
227 readdata = deltachainspan = endbyte - startbyte
224
228
225 chainpayload = sum(length(r) for r in revs)
229 chainpayload = sum(length(r) for r in revs)
226
230
227 if deltachainspan:
231 if deltachainspan:
228 density = chainpayload / float(deltachainspan)
232 density = chainpayload / float(deltachainspan)
229 else:
233 else:
230 density = 1.0
234 density = 1.0
231
235
232 # Store the gaps in a heap to have them sorted by decreasing size
236 # Store the gaps in a heap to have them sorted by decreasing size
233 gapsheap = []
237 gapsheap = []
234 heapq.heapify(gapsheap)
238 heapq.heapify(gapsheap)
235 prevend = None
239 prevend = None
236 for i, rev in enumerate(revs):
240 for i, rev in enumerate(revs):
237 revstart = start(rev)
241 revstart = start(rev)
238 revlen = length(rev)
242 revlen = length(rev)
239
243
240 # Skip empty revisions to form larger holes
244 # Skip empty revisions to form larger holes
241 if revlen == 0:
245 if revlen == 0:
242 continue
246 continue
243
247
244 if prevend is not None:
248 if prevend is not None:
245 gapsize = revstart - prevend
249 gapsize = revstart - prevend
246 # only consider holes that are large enough
250 # only consider holes that are large enough
247 if gapsize > revlog._srmingapsize:
251 if gapsize > revlog._srmingapsize:
248 heapq.heappush(gapsheap, (-gapsize, i))
252 heapq.heappush(gapsheap, (-gapsize, i))
249
253
250 prevend = revstart + revlen
254 prevend = revstart + revlen
251
255
252 # Collect the indices of the largest holes until the density is acceptable
256 # Collect the indices of the largest holes until the density is acceptable
253 indicesheap = []
257 indicesheap = []
254 heapq.heapify(indicesheap)
258 heapq.heapify(indicesheap)
255 while gapsheap and density < revlog._srdensitythreshold:
259 while gapsheap and density < revlog._srdensitythreshold:
256 oppgapsize, gapidx = heapq.heappop(gapsheap)
260 oppgapsize, gapidx = heapq.heappop(gapsheap)
257
261
258 heapq.heappush(indicesheap, gapidx)
262 heapq.heappush(indicesheap, gapidx)
259
263
260 # the gap sizes are stored as negatives to be sorted decreasingly
264 # the gap sizes are stored as negatives to be sorted decreasingly
261 # by the heap
265 # by the heap
262 readdata -= (-oppgapsize)
266 readdata -= (-oppgapsize)
263 if readdata > 0:
267 if readdata > 0:
264 density = chainpayload / float(readdata)
268 density = chainpayload / float(readdata)
265 else:
269 else:
266 density = 1.0
270 density = 1.0
267
271
268 # Cut the revs at collected indices
272 # Cut the revs at collected indices
269 previdx = 0
273 previdx = 0
270 while indicesheap:
274 while indicesheap:
271 idx = heapq.heappop(indicesheap)
275 idx = heapq.heappop(indicesheap)
272
276
273 chunk = _trimchunk(revlog, revs, previdx, idx)
277 chunk = _trimchunk(revlog, revs, previdx, idx)
274 if chunk:
278 if chunk:
275 yield chunk
279 yield chunk
276
280
277 previdx = idx
281 previdx = idx
278
282
279 chunk = _trimchunk(revlog, revs, previdx)
283 chunk = _trimchunk(revlog, revs, previdx)
280 if chunk:
284 if chunk:
281 yield chunk
285 yield chunk
282
286
283 @attr.s(slots=True, frozen=True)
287 @attr.s(slots=True, frozen=True)
284 class _deltainfo(object):
288 class _deltainfo(object):
285 distance = attr.ib()
289 distance = attr.ib()
286 deltalen = attr.ib()
290 deltalen = attr.ib()
287 data = attr.ib()
291 data = attr.ib()
288 base = attr.ib()
292 base = attr.ib()
289 chainbase = attr.ib()
293 chainbase = attr.ib()
290 chainlen = attr.ib()
294 chainlen = attr.ib()
291 compresseddeltalen = attr.ib()
295 compresseddeltalen = attr.ib()
292
296
293 class _deltacomputer(object):
297 class _deltacomputer(object):
294 def __init__(self, revlog):
298 def __init__(self, revlog):
295 self.revlog = revlog
299 self.revlog = revlog
296
300
297 def _getcandidaterevs(self, p1, p2, cachedelta):
301 def _getcandidaterevs(self, p1, p2, cachedelta):
298 """
302 """
299 Provides revisions that present an interest to be diffed against,
303 Provides revisions that present an interest to be diffed against,
300 grouped by level of easiness.
304 grouped by level of easiness.
301 """
305 """
302 revlog = self.revlog
306 revlog = self.revlog
303 curr = len(revlog)
307 curr = len(revlog)
304 prev = curr - 1
308 prev = curr - 1
305 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
309 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
306
310
307 # should we try to build a delta?
311 # should we try to build a delta?
308 if prev != nullrev and revlog.storedeltachains:
312 if prev != nullrev and revlog.storedeltachains:
309 tested = set()
313 tested = set()
310 # This condition is true most of the time when processing
314 # This condition is true most of the time when processing
311 # changegroup data into a generaldelta repo. The only time it
315 # changegroup data into a generaldelta repo. The only time it
312 # isn't true is if this is the first revision in a delta chain
316 # isn't true is if this is the first revision in a delta chain
313 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
317 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
314 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
318 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
315 # Assume what we received from the server is a good choice
319 # Assume what we received from the server is a good choice
316 # build delta will reuse the cache
320 # build delta will reuse the cache
317 yield (cachedelta[0],)
321 yield (cachedelta[0],)
318 tested.add(cachedelta[0])
322 tested.add(cachedelta[0])
319
323
320 if revlog._generaldelta:
324 if revlog._generaldelta:
321 # exclude already lazy tested base if any
325 # exclude already lazy tested base if any
322 parents = [p for p in (p1r, p2r)
326 parents = [p for p in (p1r, p2r)
323 if p != nullrev and p not in tested]
327 if p != nullrev and p not in tested]
324 if parents and not revlog._aggressivemergedeltas:
328 if parents and not revlog._aggressivemergedeltas:
325 # Pick whichever parent is closer to us (to minimize the
329 # Pick whichever parent is closer to us (to minimize the
326 # chance of having to build a fulltext).
330 # chance of having to build a fulltext).
327 parents = [max(parents)]
331 parents = [max(parents)]
328 tested.update(parents)
332 tested.update(parents)
329 yield parents
333 yield parents
330
334
331 if prev not in tested:
335 if prev not in tested:
332 # other approach failed try against prev to hopefully save us a
336 # other approach failed try against prev to hopefully save us a
333 # fulltext.
337 # fulltext.
334 yield (prev,)
338 yield (prev,)
335
339
336 def buildtext(self, revinfo, fh):
340 def buildtext(self, revinfo, fh):
337 """Builds a fulltext version of a revision
341 """Builds a fulltext version of a revision
338
342
339 revinfo: _revisioninfo instance that contains all needed info
343 revinfo: _revisioninfo instance that contains all needed info
340 fh: file handle to either the .i or the .d revlog file,
344 fh: file handle to either the .i or the .d revlog file,
341 depending on whether it is inlined or not
345 depending on whether it is inlined or not
342 """
346 """
343 btext = revinfo.btext
347 btext = revinfo.btext
344 if btext[0] is not None:
348 if btext[0] is not None:
345 return btext[0]
349 return btext[0]
346
350
347 revlog = self.revlog
351 revlog = self.revlog
348 cachedelta = revinfo.cachedelta
352 cachedelta = revinfo.cachedelta
349 flags = revinfo.flags
353 flags = revinfo.flags
350 node = revinfo.node
354 node = revinfo.node
351
355
352 baserev = cachedelta[0]
356 baserev = cachedelta[0]
353 delta = cachedelta[1]
357 delta = cachedelta[1]
354 # special case deltas which replace entire base; no need to decode
358 # special case deltas which replace entire base; no need to decode
355 # base revision. this neatly avoids censored bases, which throw when
359 # base revision. this neatly avoids censored bases, which throw when
356 # they're decoded.
360 # they're decoded.
357 hlen = struct.calcsize(">lll")
361 hlen = struct.calcsize(">lll")
358 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
362 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
359 len(delta) - hlen):
363 len(delta) - hlen):
360 btext[0] = delta[hlen:]
364 btext[0] = delta[hlen:]
361 else:
365 else:
362 # deltabase is rawtext before changed by flag processors, which is
366 # deltabase is rawtext before changed by flag processors, which is
363 # equivalent to non-raw text
367 # equivalent to non-raw text
364 basetext = revlog.revision(baserev, _df=fh, raw=False)
368 basetext = revlog.revision(baserev, _df=fh, raw=False)
365 btext[0] = mdiff.patch(basetext, delta)
369 btext[0] = mdiff.patch(basetext, delta)
366
370
367 try:
371 try:
368 res = revlog._processflags(btext[0], flags, 'read', raw=True)
372 res = revlog._processflags(btext[0], flags, 'read', raw=True)
369 btext[0], validatehash = res
373 btext[0], validatehash = res
370 if validatehash:
374 if validatehash:
371 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
375 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
372 if flags & REVIDX_ISCENSORED:
376 if flags & REVIDX_ISCENSORED:
373 raise RevlogError(_('node %s is not censored') % node)
377 raise RevlogError(_('node %s is not censored') % node)
374 except CensoredNodeError:
378 except CensoredNodeError:
375 # must pass the censored index flag to add censored revisions
379 # must pass the censored index flag to add censored revisions
376 if not flags & REVIDX_ISCENSORED:
380 if not flags & REVIDX_ISCENSORED:
377 raise
381 raise
378 return btext[0]
382 return btext[0]
379
383
380 def _builddeltadiff(self, base, revinfo, fh):
384 def _builddeltadiff(self, base, revinfo, fh):
381 revlog = self.revlog
385 revlog = self.revlog
382 t = self.buildtext(revinfo, fh)
386 t = self.buildtext(revinfo, fh)
383 if revlog.iscensored(base):
387 if revlog.iscensored(base):
384 # deltas based on a censored revision must replace the
388 # deltas based on a censored revision must replace the
385 # full content in one patch, so delta works everywhere
389 # full content in one patch, so delta works everywhere
386 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
390 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
387 delta = header + t
391 delta = header + t
388 else:
392 else:
389 ptext = revlog.revision(base, _df=fh, raw=True)
393 ptext = revlog.revision(base, _df=fh, raw=True)
390 delta = mdiff.textdiff(ptext, t)
394 delta = mdiff.textdiff(ptext, t)
391
395
392 return delta
396 return delta
393
397
394 def _builddeltainfo(self, revinfo, base, fh):
398 def _builddeltainfo(self, revinfo, base, fh):
395 # can we use the cached delta?
399 # can we use the cached delta?
396 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
400 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
397 delta = revinfo.cachedelta[1]
401 delta = revinfo.cachedelta[1]
398 else:
402 else:
399 delta = self._builddeltadiff(base, revinfo, fh)
403 delta = self._builddeltadiff(base, revinfo, fh)
400 revlog = self.revlog
404 revlog = self.revlog
401 header, data = revlog.compress(delta)
405 header, data = revlog.compress(delta)
402 deltalen = len(header) + len(data)
406 deltalen = len(header) + len(data)
403 chainbase = revlog.chainbase(base)
407 chainbase = revlog.chainbase(base)
404 offset = revlog.end(len(revlog) - 1)
408 offset = revlog.end(len(revlog) - 1)
405 dist = deltalen + offset - revlog.start(chainbase)
409 dist = deltalen + offset - revlog.start(chainbase)
406 if revlog._generaldelta:
410 if revlog._generaldelta:
407 deltabase = base
411 deltabase = base
408 else:
412 else:
409 deltabase = chainbase
413 deltabase = chainbase
410 chainlen, compresseddeltalen = revlog._chaininfo(base)
414 chainlen, compresseddeltalen = revlog._chaininfo(base)
411 chainlen += 1
415 chainlen += 1
412 compresseddeltalen += deltalen
416 compresseddeltalen += deltalen
413 return _deltainfo(dist, deltalen, (header, data), deltabase,
417 return _deltainfo(dist, deltalen, (header, data), deltabase,
414 chainbase, chainlen, compresseddeltalen)
418 chainbase, chainlen, compresseddeltalen)
415
419
416 def finddeltainfo(self, revinfo, fh):
420 def finddeltainfo(self, revinfo, fh):
417 """Find an acceptable delta against a candidate revision
421 """Find an acceptable delta against a candidate revision
418
422
419 revinfo: information about the revision (instance of _revisioninfo)
423 revinfo: information about the revision (instance of _revisioninfo)
420 fh: file handle to either the .i or the .d revlog file,
424 fh: file handle to either the .i or the .d revlog file,
421 depending on whether it is inlined or not
425 depending on whether it is inlined or not
422
426
423 Returns the first acceptable candidate revision, as ordered by
427 Returns the first acceptable candidate revision, as ordered by
424 _getcandidaterevs
428 _getcandidaterevs
425 """
429 """
426 cachedelta = revinfo.cachedelta
430 cachedelta = revinfo.cachedelta
427 p1 = revinfo.p1
431 p1 = revinfo.p1
428 p2 = revinfo.p2
432 p2 = revinfo.p2
429 revlog = self.revlog
433 revlog = self.revlog
430
434
431 deltainfo = None
435 deltainfo = None
432 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
436 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
433 nominateddeltas = []
437 nominateddeltas = []
434 for candidaterev in candidaterevs:
438 for candidaterev in candidaterevs:
435 # no delta for rawtext-changing revs (see "candelta" for why)
439 # no delta for rawtext-changing revs (see "candelta" for why)
436 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
440 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
437 continue
441 continue
438 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
442 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
439 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
443 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
440 nominateddeltas.append(candidatedelta)
444 nominateddeltas.append(candidatedelta)
441 if nominateddeltas:
445 if nominateddeltas:
442 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
446 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
443 break
447 break
444
448
445 return deltainfo
449 return deltainfo
446
450
447 @attr.s(slots=True, frozen=True)
451 @attr.s(slots=True, frozen=True)
448 class _revisioninfo(object):
452 class _revisioninfo(object):
449 """Information about a revision that allows building its fulltext
453 """Information about a revision that allows building its fulltext
450 node: expected hash of the revision
454 node: expected hash of the revision
451 p1, p2: parent revs of the revision
455 p1, p2: parent revs of the revision
452 btext: built text cache consisting of a one-element list
456 btext: built text cache consisting of a one-element list
453 cachedelta: (baserev, uncompressed_delta) or None
457 cachedelta: (baserev, uncompressed_delta) or None
454 flags: flags associated to the revision storage
458 flags: flags associated to the revision storage
455
459
456 One of btext[0] or cachedelta must be set.
460 One of btext[0] or cachedelta must be set.
457 """
461 """
458 node = attr.ib()
462 node = attr.ib()
459 p1 = attr.ib()
463 p1 = attr.ib()
460 p2 = attr.ib()
464 p2 = attr.ib()
461 btext = attr.ib()
465 btext = attr.ib()
462 textlen = attr.ib()
466 textlen = attr.ib()
463 cachedelta = attr.ib()
467 cachedelta = attr.ib()
464 flags = attr.ib()
468 flags = attr.ib()
465
469
466 # index v0:
470 # index v0:
467 # 4 bytes: offset
471 # 4 bytes: offset
468 # 4 bytes: compressed length
472 # 4 bytes: compressed length
469 # 4 bytes: base rev
473 # 4 bytes: base rev
470 # 4 bytes: link rev
474 # 4 bytes: link rev
471 # 20 bytes: parent 1 nodeid
475 # 20 bytes: parent 1 nodeid
472 # 20 bytes: parent 2 nodeid
476 # 20 bytes: parent 2 nodeid
473 # 20 bytes: nodeid
477 # 20 bytes: nodeid
474 indexformatv0 = struct.Struct(">4l20s20s20s")
478 indexformatv0 = struct.Struct(">4l20s20s20s")
475 indexformatv0_pack = indexformatv0.pack
479 indexformatv0_pack = indexformatv0.pack
476 indexformatv0_unpack = indexformatv0.unpack
480 indexformatv0_unpack = indexformatv0.unpack
477
481
478 class revlogoldio(object):
482 class revlogoldio(object):
479 def __init__(self):
483 def __init__(self):
480 self.size = indexformatv0.size
484 self.size = indexformatv0.size
481
485
482 def parseindex(self, data, inline):
486 def parseindex(self, data, inline):
483 s = self.size
487 s = self.size
484 index = []
488 index = []
485 nodemap = {nullid: nullrev}
489 nodemap = {nullid: nullrev}
486 n = off = 0
490 n = off = 0
487 l = len(data)
491 l = len(data)
488 while off + s <= l:
492 while off + s <= l:
489 cur = data[off:off + s]
493 cur = data[off:off + s]
490 off += s
494 off += s
491 e = indexformatv0_unpack(cur)
495 e = indexformatv0_unpack(cur)
492 # transform to revlogv1 format
496 # transform to revlogv1 format
493 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
497 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
494 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
498 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
495 index.append(e2)
499 index.append(e2)
496 nodemap[e[6]] = n
500 nodemap[e[6]] = n
497 n += 1
501 n += 1
498
502
499 # add the magic null revision at -1
503 # add the magic null revision at -1
500 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
504 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
501
505
502 return index, nodemap, None
506 return index, nodemap, None
503
507
504 def packentry(self, entry, node, version, rev):
508 def packentry(self, entry, node, version, rev):
505 if gettype(entry[0]):
509 if gettype(entry[0]):
506 raise RevlogError(_('index entry flags need revlog version 1'))
510 raise RevlogError(_('index entry flags need revlog version 1'))
507 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
511 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
508 node(entry[5]), node(entry[6]), entry[7])
512 node(entry[5]), node(entry[6]), entry[7])
509 return indexformatv0_pack(*e2)
513 return indexformatv0_pack(*e2)
510
514
511 # index ng:
515 # index ng:
512 # 6 bytes: offset
516 # 6 bytes: offset
513 # 2 bytes: flags
517 # 2 bytes: flags
514 # 4 bytes: compressed length
518 # 4 bytes: compressed length
515 # 4 bytes: uncompressed length
519 # 4 bytes: uncompressed length
516 # 4 bytes: base rev
520 # 4 bytes: base rev
517 # 4 bytes: link rev
521 # 4 bytes: link rev
518 # 4 bytes: parent 1 rev
522 # 4 bytes: parent 1 rev
519 # 4 bytes: parent 2 rev
523 # 4 bytes: parent 2 rev
520 # 32 bytes: nodeid
524 # 32 bytes: nodeid
521 indexformatng = struct.Struct(">Qiiiiii20s12x")
525 indexformatng = struct.Struct(">Qiiiiii20s12x")
522 indexformatng_pack = indexformatng.pack
526 indexformatng_pack = indexformatng.pack
523 versionformat = struct.Struct(">I")
527 versionformat = struct.Struct(">I")
524 versionformat_pack = versionformat.pack
528 versionformat_pack = versionformat.pack
525 versionformat_unpack = versionformat.unpack
529 versionformat_unpack = versionformat.unpack
526
530
527 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
531 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
528 # signed integer)
532 # signed integer)
529 _maxentrysize = 0x7fffffff
533 _maxentrysize = 0x7fffffff
530
534
531 class revlogio(object):
535 class revlogio(object):
532 def __init__(self):
536 def __init__(self):
533 self.size = indexformatng.size
537 self.size = indexformatng.size
534
538
535 def parseindex(self, data, inline):
539 def parseindex(self, data, inline):
536 # call the C implementation to parse the index data
540 # call the C implementation to parse the index data
537 index, cache = parsers.parse_index2(data, inline)
541 index, cache = parsers.parse_index2(data, inline)
538 return index, getattr(index, 'nodemap', None), cache
542 return index, getattr(index, 'nodemap', None), cache
539
543
540 def packentry(self, entry, node, version, rev):
544 def packentry(self, entry, node, version, rev):
541 p = indexformatng_pack(*entry)
545 p = indexformatng_pack(*entry)
542 if rev == 0:
546 if rev == 0:
543 p = versionformat_pack(version) + p[4:]
547 p = versionformat_pack(version) + p[4:]
544 return p
548 return p
545
549
546 class revlog(object):
550 class revlog(object):
547 """
551 """
548 the underlying revision storage object
552 the underlying revision storage object
549
553
550 A revlog consists of two parts, an index and the revision data.
554 A revlog consists of two parts, an index and the revision data.
551
555
552 The index is a file with a fixed record size containing
556 The index is a file with a fixed record size containing
553 information on each revision, including its nodeid (hash), the
557 information on each revision, including its nodeid (hash), the
554 nodeids of its parents, the position and offset of its data within
558 nodeids of its parents, the position and offset of its data within
555 the data file, and the revision it's based on. Finally, each entry
559 the data file, and the revision it's based on. Finally, each entry
556 contains a linkrev entry that can serve as a pointer to external
560 contains a linkrev entry that can serve as a pointer to external
557 data.
561 data.
558
562
559 The revision data itself is a linear collection of data chunks.
563 The revision data itself is a linear collection of data chunks.
560 Each chunk represents a revision and is usually represented as a
564 Each chunk represents a revision and is usually represented as a
561 delta against the previous chunk. To bound lookup time, runs of
565 delta against the previous chunk. To bound lookup time, runs of
562 deltas are limited to about 2 times the length of the original
566 deltas are limited to about 2 times the length of the original
563 version data. This makes retrieval of a version proportional to
567 version data. This makes retrieval of a version proportional to
564 its size, or O(1) relative to the number of revisions.
568 its size, or O(1) relative to the number of revisions.
565
569
566 Both pieces of the revlog are written to in an append-only
570 Both pieces of the revlog are written to in an append-only
567 fashion, which means we never need to rewrite a file to insert or
571 fashion, which means we never need to rewrite a file to insert or
568 remove data, and can use some simple techniques to avoid the need
572 remove data, and can use some simple techniques to avoid the need
569 for locking while reading.
573 for locking while reading.
570
574
571 If checkambig, indexfile is opened with checkambig=True at
575 If checkambig, indexfile is opened with checkambig=True at
572 writing, to avoid file stat ambiguity.
576 writing, to avoid file stat ambiguity.
573
577
574 If mmaplargeindex is True, and an mmapindexthreshold is set, the
578 If mmaplargeindex is True, and an mmapindexthreshold is set, the
575 index will be mmapped rather than read if it is larger than the
579 index will be mmapped rather than read if it is larger than the
576 configured threshold.
580 configured threshold.
581
582 If censorable is True, the revlog can have censored revisions.
577 """
583 """
578 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
584 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
579 mmaplargeindex=False):
585 mmaplargeindex=False, censorable=False):
580 """
586 """
581 create a revlog object
587 create a revlog object
582
588
583 opener is a function that abstracts the file opening operation
589 opener is a function that abstracts the file opening operation
584 and can be used to implement COW semantics or the like.
590 and can be used to implement COW semantics or the like.
585 """
591 """
586 self.indexfile = indexfile
592 self.indexfile = indexfile
587 self.datafile = datafile or (indexfile[:-2] + ".d")
593 self.datafile = datafile or (indexfile[:-2] + ".d")
588 self.opener = opener
594 self.opener = opener
589 # When True, indexfile is opened with checkambig=True at writing, to
595 # When True, indexfile is opened with checkambig=True at writing, to
590 # avoid file stat ambiguity.
596 # avoid file stat ambiguity.
591 self._checkambig = checkambig
597 self._checkambig = checkambig
598 self._censorable = censorable
592 # 3-tuple of (node, rev, text) for a raw revision.
599 # 3-tuple of (node, rev, text) for a raw revision.
593 self._cache = None
600 self._cache = None
594 # Maps rev to chain base rev.
601 # Maps rev to chain base rev.
595 self._chainbasecache = util.lrucachedict(100)
602 self._chainbasecache = util.lrucachedict(100)
596 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
603 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
597 self._chunkcache = (0, '')
604 self._chunkcache = (0, '')
598 # How much data to read and cache into the raw revlog data cache.
605 # How much data to read and cache into the raw revlog data cache.
599 self._chunkcachesize = 65536
606 self._chunkcachesize = 65536
600 self._maxchainlen = None
607 self._maxchainlen = None
601 self._aggressivemergedeltas = False
608 self._aggressivemergedeltas = False
602 self.index = []
609 self.index = []
603 # Mapping of partial identifiers to full nodes.
610 # Mapping of partial identifiers to full nodes.
604 self._pcache = {}
611 self._pcache = {}
605 # Mapping of revision integer to full node.
612 # Mapping of revision integer to full node.
606 self._nodecache = {nullid: nullrev}
613 self._nodecache = {nullid: nullrev}
607 self._nodepos = None
614 self._nodepos = None
608 self._compengine = 'zlib'
615 self._compengine = 'zlib'
609 self._maxdeltachainspan = -1
616 self._maxdeltachainspan = -1
610 self._withsparseread = False
617 self._withsparseread = False
611 self._srdensitythreshold = 0.25
618 self._srdensitythreshold = 0.25
612 self._srmingapsize = 262144
619 self._srmingapsize = 262144
613
620
614 mmapindexthreshold = None
621 mmapindexthreshold = None
615 v = REVLOG_DEFAULT_VERSION
622 v = REVLOG_DEFAULT_VERSION
616 opts = getattr(opener, 'options', None)
623 opts = getattr(opener, 'options', None)
617 if opts is not None:
624 if opts is not None:
618 if 'revlogv2' in opts:
625 if 'revlogv2' in opts:
619 # version 2 revlogs always use generaldelta.
626 # version 2 revlogs always use generaldelta.
620 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
627 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
621 elif 'revlogv1' in opts:
628 elif 'revlogv1' in opts:
622 if 'generaldelta' in opts:
629 if 'generaldelta' in opts:
623 v |= FLAG_GENERALDELTA
630 v |= FLAG_GENERALDELTA
624 else:
631 else:
625 v = 0
632 v = 0
626 if 'chunkcachesize' in opts:
633 if 'chunkcachesize' in opts:
627 self._chunkcachesize = opts['chunkcachesize']
634 self._chunkcachesize = opts['chunkcachesize']
628 if 'maxchainlen' in opts:
635 if 'maxchainlen' in opts:
629 self._maxchainlen = opts['maxchainlen']
636 self._maxchainlen = opts['maxchainlen']
630 if 'aggressivemergedeltas' in opts:
637 if 'aggressivemergedeltas' in opts:
631 self._aggressivemergedeltas = opts['aggressivemergedeltas']
638 self._aggressivemergedeltas = opts['aggressivemergedeltas']
632 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
639 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
633 if 'compengine' in opts:
640 if 'compengine' in opts:
634 self._compengine = opts['compengine']
641 self._compengine = opts['compengine']
635 if 'maxdeltachainspan' in opts:
642 if 'maxdeltachainspan' in opts:
636 self._maxdeltachainspan = opts['maxdeltachainspan']
643 self._maxdeltachainspan = opts['maxdeltachainspan']
637 if mmaplargeindex and 'mmapindexthreshold' in opts:
644 if mmaplargeindex and 'mmapindexthreshold' in opts:
638 mmapindexthreshold = opts['mmapindexthreshold']
645 mmapindexthreshold = opts['mmapindexthreshold']
639 self._withsparseread = bool(opts.get('with-sparse-read', False))
646 self._withsparseread = bool(opts.get('with-sparse-read', False))
640 if 'sparse-read-density-threshold' in opts:
647 if 'sparse-read-density-threshold' in opts:
641 self._srdensitythreshold = opts['sparse-read-density-threshold']
648 self._srdensitythreshold = opts['sparse-read-density-threshold']
642 if 'sparse-read-min-gap-size' in opts:
649 if 'sparse-read-min-gap-size' in opts:
643 self._srmingapsize = opts['sparse-read-min-gap-size']
650 self._srmingapsize = opts['sparse-read-min-gap-size']
644
651
645 if self._chunkcachesize <= 0:
652 if self._chunkcachesize <= 0:
646 raise RevlogError(_('revlog chunk cache size %r is not greater '
653 raise RevlogError(_('revlog chunk cache size %r is not greater '
647 'than 0') % self._chunkcachesize)
654 'than 0') % self._chunkcachesize)
648 elif self._chunkcachesize & (self._chunkcachesize - 1):
655 elif self._chunkcachesize & (self._chunkcachesize - 1):
649 raise RevlogError(_('revlog chunk cache size %r is not a power '
656 raise RevlogError(_('revlog chunk cache size %r is not a power '
650 'of 2') % self._chunkcachesize)
657 'of 2') % self._chunkcachesize)
651
658
652 indexdata = ''
659 indexdata = ''
653 self._initempty = True
660 self._initempty = True
654 try:
661 try:
655 with self._indexfp() as f:
662 with self._indexfp() as f:
656 if (mmapindexthreshold is not None and
663 if (mmapindexthreshold is not None and
657 self.opener.fstat(f).st_size >= mmapindexthreshold):
664 self.opener.fstat(f).st_size >= mmapindexthreshold):
658 indexdata = util.buffer(util.mmapread(f))
665 indexdata = util.buffer(util.mmapread(f))
659 else:
666 else:
660 indexdata = f.read()
667 indexdata = f.read()
661 if len(indexdata) > 0:
668 if len(indexdata) > 0:
662 v = versionformat_unpack(indexdata[:4])[0]
669 v = versionformat_unpack(indexdata[:4])[0]
663 self._initempty = False
670 self._initempty = False
664 except IOError as inst:
671 except IOError as inst:
665 if inst.errno != errno.ENOENT:
672 if inst.errno != errno.ENOENT:
666 raise
673 raise
667
674
668 self.version = v
675 self.version = v
669 self._inline = v & FLAG_INLINE_DATA
676 self._inline = v & FLAG_INLINE_DATA
670 self._generaldelta = v & FLAG_GENERALDELTA
677 self._generaldelta = v & FLAG_GENERALDELTA
671 flags = v & ~0xFFFF
678 flags = v & ~0xFFFF
672 fmt = v & 0xFFFF
679 fmt = v & 0xFFFF
673 if fmt == REVLOGV0:
680 if fmt == REVLOGV0:
674 if flags:
681 if flags:
675 raise RevlogError(_('unknown flags (%#04x) in version %d '
682 raise RevlogError(_('unknown flags (%#04x) in version %d '
676 'revlog %s') %
683 'revlog %s') %
677 (flags >> 16, fmt, self.indexfile))
684 (flags >> 16, fmt, self.indexfile))
678 elif fmt == REVLOGV1:
685 elif fmt == REVLOGV1:
679 if flags & ~REVLOGV1_FLAGS:
686 if flags & ~REVLOGV1_FLAGS:
680 raise RevlogError(_('unknown flags (%#04x) in version %d '
687 raise RevlogError(_('unknown flags (%#04x) in version %d '
681 'revlog %s') %
688 'revlog %s') %
682 (flags >> 16, fmt, self.indexfile))
689 (flags >> 16, fmt, self.indexfile))
683 elif fmt == REVLOGV2:
690 elif fmt == REVLOGV2:
684 if flags & ~REVLOGV2_FLAGS:
691 if flags & ~REVLOGV2_FLAGS:
685 raise RevlogError(_('unknown flags (%#04x) in version %d '
692 raise RevlogError(_('unknown flags (%#04x) in version %d '
686 'revlog %s') %
693 'revlog %s') %
687 (flags >> 16, fmt, self.indexfile))
694 (flags >> 16, fmt, self.indexfile))
688 else:
695 else:
689 raise RevlogError(_('unknown version (%d) in revlog %s') %
696 raise RevlogError(_('unknown version (%d) in revlog %s') %
690 (fmt, self.indexfile))
697 (fmt, self.indexfile))
691
698
692 self.storedeltachains = True
699 self.storedeltachains = True
693
700
694 self._io = revlogio()
701 self._io = revlogio()
695 if self.version == REVLOGV0:
702 if self.version == REVLOGV0:
696 self._io = revlogoldio()
703 self._io = revlogoldio()
697 try:
704 try:
698 d = self._io.parseindex(indexdata, self._inline)
705 d = self._io.parseindex(indexdata, self._inline)
699 except (ValueError, IndexError):
706 except (ValueError, IndexError):
700 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
707 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
701 self.index, nodemap, self._chunkcache = d
708 self.index, nodemap, self._chunkcache = d
702 if nodemap is not None:
709 if nodemap is not None:
703 self.nodemap = self._nodecache = nodemap
710 self.nodemap = self._nodecache = nodemap
704 if not self._chunkcache:
711 if not self._chunkcache:
705 self._chunkclear()
712 self._chunkclear()
706 # revnum -> (chain-length, sum-delta-length)
713 # revnum -> (chain-length, sum-delta-length)
707 self._chaininfocache = {}
714 self._chaininfocache = {}
708 # revlog header -> revlog compressor
715 # revlog header -> revlog compressor
709 self._decompressors = {}
716 self._decompressors = {}
710
717
711 @util.propertycache
718 @util.propertycache
712 def _compressor(self):
719 def _compressor(self):
713 return util.compengines[self._compengine].revlogcompressor()
720 return util.compengines[self._compengine].revlogcompressor()
714
721
715 def _indexfp(self, mode='r'):
722 def _indexfp(self, mode='r'):
716 """file object for the revlog's index file"""
723 """file object for the revlog's index file"""
717 args = {r'mode': mode}
724 args = {r'mode': mode}
718 if mode != 'r':
725 if mode != 'r':
719 args[r'checkambig'] = self._checkambig
726 args[r'checkambig'] = self._checkambig
720 if mode == 'w':
727 if mode == 'w':
721 args[r'atomictemp'] = True
728 args[r'atomictemp'] = True
722 return self.opener(self.indexfile, **args)
729 return self.opener(self.indexfile, **args)
723
730
724 def _datafp(self, mode='r'):
731 def _datafp(self, mode='r'):
725 """file object for the revlog's data file"""
732 """file object for the revlog's data file"""
726 return self.opener(self.datafile, mode=mode)
733 return self.opener(self.datafile, mode=mode)
727
734
728 @contextlib.contextmanager
735 @contextlib.contextmanager
729 def _datareadfp(self, existingfp=None):
736 def _datareadfp(self, existingfp=None):
730 """file object suitable to read data"""
737 """file object suitable to read data"""
731 if existingfp is not None:
738 if existingfp is not None:
732 yield existingfp
739 yield existingfp
733 else:
740 else:
734 if self._inline:
741 if self._inline:
735 func = self._indexfp
742 func = self._indexfp
736 else:
743 else:
737 func = self._datafp
744 func = self._datafp
738 with func() as fp:
745 with func() as fp:
739 yield fp
746 yield fp
740
747
741 def tip(self):
748 def tip(self):
742 return self.node(len(self.index) - 2)
749 return self.node(len(self.index) - 2)
743 def __contains__(self, rev):
750 def __contains__(self, rev):
744 return 0 <= rev < len(self)
751 return 0 <= rev < len(self)
745 def __len__(self):
752 def __len__(self):
746 return len(self.index) - 1
753 return len(self.index) - 1
747 def __iter__(self):
754 def __iter__(self):
748 return iter(xrange(len(self)))
755 return iter(xrange(len(self)))
749 def revs(self, start=0, stop=None):
756 def revs(self, start=0, stop=None):
750 """iterate over all rev in this revlog (from start to stop)"""
757 """iterate over all rev in this revlog (from start to stop)"""
751 step = 1
758 step = 1
752 if stop is not None:
759 if stop is not None:
753 if start > stop:
760 if start > stop:
754 step = -1
761 step = -1
755 stop += step
762 stop += step
756 else:
763 else:
757 stop = len(self)
764 stop = len(self)
758 return xrange(start, stop, step)
765 return xrange(start, stop, step)
759
766
760 @util.propertycache
767 @util.propertycache
761 def nodemap(self):
768 def nodemap(self):
762 self.rev(self.node(0))
769 self.rev(self.node(0))
763 return self._nodecache
770 return self._nodecache
764
771
765 def hasnode(self, node):
772 def hasnode(self, node):
766 try:
773 try:
767 self.rev(node)
774 self.rev(node)
768 return True
775 return True
769 except KeyError:
776 except KeyError:
770 return False
777 return False
771
778
772 def candelta(self, baserev, rev):
779 def candelta(self, baserev, rev):
773 """whether two revisions (baserev, rev) can be delta-ed or not"""
780 """whether two revisions (baserev, rev) can be delta-ed or not"""
774 # Disable delta if either rev requires a content-changing flag
781 # Disable delta if either rev requires a content-changing flag
775 # processor (ex. LFS). This is because such flag processor can alter
782 # processor (ex. LFS). This is because such flag processor can alter
776 # the rawtext content that the delta will be based on, and two clients
783 # the rawtext content that the delta will be based on, and two clients
777 # could have a same revlog node with different flags (i.e. different
784 # could have a same revlog node with different flags (i.e. different
778 # rawtext contents) and the delta could be incompatible.
785 # rawtext contents) and the delta could be incompatible.
779 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
786 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
780 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
787 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
781 return False
788 return False
782 return True
789 return True
783
790
784 def clearcaches(self):
791 def clearcaches(self):
785 self._cache = None
792 self._cache = None
786 self._chainbasecache.clear()
793 self._chainbasecache.clear()
787 self._chunkcache = (0, '')
794 self._chunkcache = (0, '')
788 self._pcache = {}
795 self._pcache = {}
789
796
790 try:
797 try:
791 self._nodecache.clearcaches()
798 self._nodecache.clearcaches()
792 except AttributeError:
799 except AttributeError:
793 self._nodecache = {nullid: nullrev}
800 self._nodecache = {nullid: nullrev}
794 self._nodepos = None
801 self._nodepos = None
795
802
796 def rev(self, node):
803 def rev(self, node):
797 try:
804 try:
798 return self._nodecache[node]
805 return self._nodecache[node]
799 except TypeError:
806 except TypeError:
800 raise
807 raise
801 except RevlogError:
808 except RevlogError:
802 # parsers.c radix tree lookup failed
809 # parsers.c radix tree lookup failed
803 if node == wdirid:
810 if node == wdirid:
804 raise error.WdirUnsupported
811 raise error.WdirUnsupported
805 raise LookupError(node, self.indexfile, _('no node'))
812 raise LookupError(node, self.indexfile, _('no node'))
806 except KeyError:
813 except KeyError:
807 # pure python cache lookup failed
814 # pure python cache lookup failed
808 n = self._nodecache
815 n = self._nodecache
809 i = self.index
816 i = self.index
810 p = self._nodepos
817 p = self._nodepos
811 if p is None:
818 if p is None:
812 p = len(i) - 2
819 p = len(i) - 2
813 for r in xrange(p, -1, -1):
820 for r in xrange(p, -1, -1):
814 v = i[r][7]
821 v = i[r][7]
815 n[v] = r
822 n[v] = r
816 if v == node:
823 if v == node:
817 self._nodepos = r - 1
824 self._nodepos = r - 1
818 return r
825 return r
819 if node == wdirid:
826 if node == wdirid:
820 raise error.WdirUnsupported
827 raise error.WdirUnsupported
821 raise LookupError(node, self.indexfile, _('no node'))
828 raise LookupError(node, self.indexfile, _('no node'))
822
829
823 # Accessors for index entries.
830 # Accessors for index entries.
824
831
825 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
832 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
826 # are flags.
833 # are flags.
827 def start(self, rev):
834 def start(self, rev):
828 return int(self.index[rev][0] >> 16)
835 return int(self.index[rev][0] >> 16)
829
836
830 def flags(self, rev):
837 def flags(self, rev):
831 return self.index[rev][0] & 0xFFFF
838 return self.index[rev][0] & 0xFFFF
832
839
833 def length(self, rev):
840 def length(self, rev):
834 return self.index[rev][1]
841 return self.index[rev][1]
835
842
836 def rawsize(self, rev):
843 def rawsize(self, rev):
837 """return the length of the uncompressed text for a given revision"""
844 """return the length of the uncompressed text for a given revision"""
838 l = self.index[rev][2]
845 l = self.index[rev][2]
839 if l >= 0:
846 if l >= 0:
840 return l
847 return l
841
848
842 t = self.revision(rev, raw=True)
849 t = self.revision(rev, raw=True)
843 return len(t)
850 return len(t)
844
851
845 def size(self, rev):
852 def size(self, rev):
846 """length of non-raw text (processed by a "read" flag processor)"""
853 """length of non-raw text (processed by a "read" flag processor)"""
847 # fast path: if no "read" flag processor could change the content,
854 # fast path: if no "read" flag processor could change the content,
848 # size is rawsize. note: ELLIPSIS is known to not change the content.
855 # size is rawsize. note: ELLIPSIS is known to not change the content.
849 flags = self.flags(rev)
856 flags = self.flags(rev)
850 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
857 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
851 return self.rawsize(rev)
858 return self.rawsize(rev)
852
859
853 return len(self.revision(rev, raw=False))
860 return len(self.revision(rev, raw=False))
854
861
855 def chainbase(self, rev):
862 def chainbase(self, rev):
856 base = self._chainbasecache.get(rev)
863 base = self._chainbasecache.get(rev)
857 if base is not None:
864 if base is not None:
858 return base
865 return base
859
866
860 index = self.index
867 index = self.index
861 base = index[rev][3]
868 base = index[rev][3]
862 while base != rev:
869 while base != rev:
863 rev = base
870 rev = base
864 base = index[rev][3]
871 base = index[rev][3]
865
872
866 self._chainbasecache[rev] = base
873 self._chainbasecache[rev] = base
867 return base
874 return base
868
875
869 def linkrev(self, rev):
876 def linkrev(self, rev):
870 return self.index[rev][4]
877 return self.index[rev][4]
871
878
872 def parentrevs(self, rev):
879 def parentrevs(self, rev):
873 try:
880 try:
874 entry = self.index[rev]
881 entry = self.index[rev]
875 except IndexError:
882 except IndexError:
876 if rev == wdirrev:
883 if rev == wdirrev:
877 raise error.WdirUnsupported
884 raise error.WdirUnsupported
878 raise
885 raise
879
886
880 return entry[5], entry[6]
887 return entry[5], entry[6]
881
888
882 def node(self, rev):
889 def node(self, rev):
883 try:
890 try:
884 return self.index[rev][7]
891 return self.index[rev][7]
885 except IndexError:
892 except IndexError:
886 if rev == wdirrev:
893 if rev == wdirrev:
887 raise error.WdirUnsupported
894 raise error.WdirUnsupported
888 raise
895 raise
889
896
890 # Derived from index values.
897 # Derived from index values.
891
898
892 def end(self, rev):
899 def end(self, rev):
893 return self.start(rev) + self.length(rev)
900 return self.start(rev) + self.length(rev)
894
901
895 def parents(self, node):
902 def parents(self, node):
896 i = self.index
903 i = self.index
897 d = i[self.rev(node)]
904 d = i[self.rev(node)]
898 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
905 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
899
906
900 def chainlen(self, rev):
907 def chainlen(self, rev):
901 return self._chaininfo(rev)[0]
908 return self._chaininfo(rev)[0]
902
909
903 def _chaininfo(self, rev):
910 def _chaininfo(self, rev):
904 chaininfocache = self._chaininfocache
911 chaininfocache = self._chaininfocache
905 if rev in chaininfocache:
912 if rev in chaininfocache:
906 return chaininfocache[rev]
913 return chaininfocache[rev]
907 index = self.index
914 index = self.index
908 generaldelta = self._generaldelta
915 generaldelta = self._generaldelta
909 iterrev = rev
916 iterrev = rev
910 e = index[iterrev]
917 e = index[iterrev]
911 clen = 0
918 clen = 0
912 compresseddeltalen = 0
919 compresseddeltalen = 0
913 while iterrev != e[3]:
920 while iterrev != e[3]:
914 clen += 1
921 clen += 1
915 compresseddeltalen += e[1]
922 compresseddeltalen += e[1]
916 if generaldelta:
923 if generaldelta:
917 iterrev = e[3]
924 iterrev = e[3]
918 else:
925 else:
919 iterrev -= 1
926 iterrev -= 1
920 if iterrev in chaininfocache:
927 if iterrev in chaininfocache:
921 t = chaininfocache[iterrev]
928 t = chaininfocache[iterrev]
922 clen += t[0]
929 clen += t[0]
923 compresseddeltalen += t[1]
930 compresseddeltalen += t[1]
924 break
931 break
925 e = index[iterrev]
932 e = index[iterrev]
926 else:
933 else:
927 # Add text length of base since decompressing that also takes
934 # Add text length of base since decompressing that also takes
928 # work. For cache hits the length is already included.
935 # work. For cache hits the length is already included.
929 compresseddeltalen += e[1]
936 compresseddeltalen += e[1]
930 r = (clen, compresseddeltalen)
937 r = (clen, compresseddeltalen)
931 chaininfocache[rev] = r
938 chaininfocache[rev] = r
932 return r
939 return r
933
940
934 def _deltachain(self, rev, stoprev=None):
941 def _deltachain(self, rev, stoprev=None):
935 """Obtain the delta chain for a revision.
942 """Obtain the delta chain for a revision.
936
943
937 ``stoprev`` specifies a revision to stop at. If not specified, we
944 ``stoprev`` specifies a revision to stop at. If not specified, we
938 stop at the base of the chain.
945 stop at the base of the chain.
939
946
940 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
947 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
941 revs in ascending order and ``stopped`` is a bool indicating whether
948 revs in ascending order and ``stopped`` is a bool indicating whether
942 ``stoprev`` was hit.
949 ``stoprev`` was hit.
943 """
950 """
944 # Try C implementation.
951 # Try C implementation.
945 try:
952 try:
946 return self.index.deltachain(rev, stoprev, self._generaldelta)
953 return self.index.deltachain(rev, stoprev, self._generaldelta)
947 except AttributeError:
954 except AttributeError:
948 pass
955 pass
949
956
950 chain = []
957 chain = []
951
958
952 # Alias to prevent attribute lookup in tight loop.
959 # Alias to prevent attribute lookup in tight loop.
953 index = self.index
960 index = self.index
954 generaldelta = self._generaldelta
961 generaldelta = self._generaldelta
955
962
956 iterrev = rev
963 iterrev = rev
957 e = index[iterrev]
964 e = index[iterrev]
958 while iterrev != e[3] and iterrev != stoprev:
965 while iterrev != e[3] and iterrev != stoprev:
959 chain.append(iterrev)
966 chain.append(iterrev)
960 if generaldelta:
967 if generaldelta:
961 iterrev = e[3]
968 iterrev = e[3]
962 else:
969 else:
963 iterrev -= 1
970 iterrev -= 1
964 e = index[iterrev]
971 e = index[iterrev]
965
972
966 if iterrev == stoprev:
973 if iterrev == stoprev:
967 stopped = True
974 stopped = True
968 else:
975 else:
969 chain.append(iterrev)
976 chain.append(iterrev)
970 stopped = False
977 stopped = False
971
978
972 chain.reverse()
979 chain.reverse()
973 return chain, stopped
980 return chain, stopped
974
981
975 def ancestors(self, revs, stoprev=0, inclusive=False):
982 def ancestors(self, revs, stoprev=0, inclusive=False):
976 """Generate the ancestors of 'revs' in reverse topological order.
983 """Generate the ancestors of 'revs' in reverse topological order.
977 Does not generate revs lower than stoprev.
984 Does not generate revs lower than stoprev.
978
985
979 See the documentation for ancestor.lazyancestors for more details."""
986 See the documentation for ancestor.lazyancestors for more details."""
980
987
981 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
988 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
982 inclusive=inclusive)
989 inclusive=inclusive)
983
990
984 def descendants(self, revs):
991 def descendants(self, revs):
985 """Generate the descendants of 'revs' in revision order.
992 """Generate the descendants of 'revs' in revision order.
986
993
987 Yield a sequence of revision numbers starting with a child of
994 Yield a sequence of revision numbers starting with a child of
988 some rev in revs, i.e., each revision is *not* considered a
995 some rev in revs, i.e., each revision is *not* considered a
989 descendant of itself. Results are ordered by revision number (a
996 descendant of itself. Results are ordered by revision number (a
990 topological sort)."""
997 topological sort)."""
991 first = min(revs)
998 first = min(revs)
992 if first == nullrev:
999 if first == nullrev:
993 for i in self:
1000 for i in self:
994 yield i
1001 yield i
995 return
1002 return
996
1003
997 seen = set(revs)
1004 seen = set(revs)
998 for i in self.revs(start=first + 1):
1005 for i in self.revs(start=first + 1):
999 for x in self.parentrevs(i):
1006 for x in self.parentrevs(i):
1000 if x != nullrev and x in seen:
1007 if x != nullrev and x in seen:
1001 seen.add(i)
1008 seen.add(i)
1002 yield i
1009 yield i
1003 break
1010 break
1004
1011
1005 def findcommonmissing(self, common=None, heads=None):
1012 def findcommonmissing(self, common=None, heads=None):
1006 """Return a tuple of the ancestors of common and the ancestors of heads
1013 """Return a tuple of the ancestors of common and the ancestors of heads
1007 that are not ancestors of common. In revset terminology, we return the
1014 that are not ancestors of common. In revset terminology, we return the
1008 tuple:
1015 tuple:
1009
1016
1010 ::common, (::heads) - (::common)
1017 ::common, (::heads) - (::common)
1011
1018
1012 The list is sorted by revision number, meaning it is
1019 The list is sorted by revision number, meaning it is
1013 topologically sorted.
1020 topologically sorted.
1014
1021
1015 'heads' and 'common' are both lists of node IDs. If heads is
1022 'heads' and 'common' are both lists of node IDs. If heads is
1016 not supplied, uses all of the revlog's heads. If common is not
1023 not supplied, uses all of the revlog's heads. If common is not
1017 supplied, uses nullid."""
1024 supplied, uses nullid."""
1018 if common is None:
1025 if common is None:
1019 common = [nullid]
1026 common = [nullid]
1020 if heads is None:
1027 if heads is None:
1021 heads = self.heads()
1028 heads = self.heads()
1022
1029
1023 common = [self.rev(n) for n in common]
1030 common = [self.rev(n) for n in common]
1024 heads = [self.rev(n) for n in heads]
1031 heads = [self.rev(n) for n in heads]
1025
1032
1026 # we want the ancestors, but inclusive
1033 # we want the ancestors, but inclusive
1027 class lazyset(object):
1034 class lazyset(object):
1028 def __init__(self, lazyvalues):
1035 def __init__(self, lazyvalues):
1029 self.addedvalues = set()
1036 self.addedvalues = set()
1030 self.lazyvalues = lazyvalues
1037 self.lazyvalues = lazyvalues
1031
1038
1032 def __contains__(self, value):
1039 def __contains__(self, value):
1033 return value in self.addedvalues or value in self.lazyvalues
1040 return value in self.addedvalues or value in self.lazyvalues
1034
1041
1035 def __iter__(self):
1042 def __iter__(self):
1036 added = self.addedvalues
1043 added = self.addedvalues
1037 for r in added:
1044 for r in added:
1038 yield r
1045 yield r
1039 for r in self.lazyvalues:
1046 for r in self.lazyvalues:
1040 if not r in added:
1047 if not r in added:
1041 yield r
1048 yield r
1042
1049
1043 def add(self, value):
1050 def add(self, value):
1044 self.addedvalues.add(value)
1051 self.addedvalues.add(value)
1045
1052
1046 def update(self, values):
1053 def update(self, values):
1047 self.addedvalues.update(values)
1054 self.addedvalues.update(values)
1048
1055
1049 has = lazyset(self.ancestors(common))
1056 has = lazyset(self.ancestors(common))
1050 has.add(nullrev)
1057 has.add(nullrev)
1051 has.update(common)
1058 has.update(common)
1052
1059
1053 # take all ancestors from heads that aren't in has
1060 # take all ancestors from heads that aren't in has
1054 missing = set()
1061 missing = set()
1055 visit = collections.deque(r for r in heads if r not in has)
1062 visit = collections.deque(r for r in heads if r not in has)
1056 while visit:
1063 while visit:
1057 r = visit.popleft()
1064 r = visit.popleft()
1058 if r in missing:
1065 if r in missing:
1059 continue
1066 continue
1060 else:
1067 else:
1061 missing.add(r)
1068 missing.add(r)
1062 for p in self.parentrevs(r):
1069 for p in self.parentrevs(r):
1063 if p not in has:
1070 if p not in has:
1064 visit.append(p)
1071 visit.append(p)
1065 missing = list(missing)
1072 missing = list(missing)
1066 missing.sort()
1073 missing.sort()
1067 return has, [self.node(miss) for miss in missing]
1074 return has, [self.node(miss) for miss in missing]
1068
1075
1069 def incrementalmissingrevs(self, common=None):
1076 def incrementalmissingrevs(self, common=None):
1070 """Return an object that can be used to incrementally compute the
1077 """Return an object that can be used to incrementally compute the
1071 revision numbers of the ancestors of arbitrary sets that are not
1078 revision numbers of the ancestors of arbitrary sets that are not
1072 ancestors of common. This is an ancestor.incrementalmissingancestors
1079 ancestors of common. This is an ancestor.incrementalmissingancestors
1073 object.
1080 object.
1074
1081
1075 'common' is a list of revision numbers. If common is not supplied, uses
1082 'common' is a list of revision numbers. If common is not supplied, uses
1076 nullrev.
1083 nullrev.
1077 """
1084 """
1078 if common is None:
1085 if common is None:
1079 common = [nullrev]
1086 common = [nullrev]
1080
1087
1081 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1088 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1082
1089
1083 def findmissingrevs(self, common=None, heads=None):
1090 def findmissingrevs(self, common=None, heads=None):
1084 """Return the revision numbers of the ancestors of heads that
1091 """Return the revision numbers of the ancestors of heads that
1085 are not ancestors of common.
1092 are not ancestors of common.
1086
1093
1087 More specifically, return a list of revision numbers corresponding to
1094 More specifically, return a list of revision numbers corresponding to
1088 nodes N such that every N satisfies the following constraints:
1095 nodes N such that every N satisfies the following constraints:
1089
1096
1090 1. N is an ancestor of some node in 'heads'
1097 1. N is an ancestor of some node in 'heads'
1091 2. N is not an ancestor of any node in 'common'
1098 2. N is not an ancestor of any node in 'common'
1092
1099
1093 The list is sorted by revision number, meaning it is
1100 The list is sorted by revision number, meaning it is
1094 topologically sorted.
1101 topologically sorted.
1095
1102
1096 'heads' and 'common' are both lists of revision numbers. If heads is
1103 'heads' and 'common' are both lists of revision numbers. If heads is
1097 not supplied, uses all of the revlog's heads. If common is not
1104 not supplied, uses all of the revlog's heads. If common is not
1098 supplied, uses nullid."""
1105 supplied, uses nullid."""
1099 if common is None:
1106 if common is None:
1100 common = [nullrev]
1107 common = [nullrev]
1101 if heads is None:
1108 if heads is None:
1102 heads = self.headrevs()
1109 heads = self.headrevs()
1103
1110
1104 inc = self.incrementalmissingrevs(common=common)
1111 inc = self.incrementalmissingrevs(common=common)
1105 return inc.missingancestors(heads)
1112 return inc.missingancestors(heads)
1106
1113
1107 def findmissing(self, common=None, heads=None):
1114 def findmissing(self, common=None, heads=None):
1108 """Return the ancestors of heads that are not ancestors of common.
1115 """Return the ancestors of heads that are not ancestors of common.
1109
1116
1110 More specifically, return a list of nodes N such that every N
1117 More specifically, return a list of nodes N such that every N
1111 satisfies the following constraints:
1118 satisfies the following constraints:
1112
1119
1113 1. N is an ancestor of some node in 'heads'
1120 1. N is an ancestor of some node in 'heads'
1114 2. N is not an ancestor of any node in 'common'
1121 2. N is not an ancestor of any node in 'common'
1115
1122
1116 The list is sorted by revision number, meaning it is
1123 The list is sorted by revision number, meaning it is
1117 topologically sorted.
1124 topologically sorted.
1118
1125
1119 'heads' and 'common' are both lists of node IDs. If heads is
1126 'heads' and 'common' are both lists of node IDs. If heads is
1120 not supplied, uses all of the revlog's heads. If common is not
1127 not supplied, uses all of the revlog's heads. If common is not
1121 supplied, uses nullid."""
1128 supplied, uses nullid."""
1122 if common is None:
1129 if common is None:
1123 common = [nullid]
1130 common = [nullid]
1124 if heads is None:
1131 if heads is None:
1125 heads = self.heads()
1132 heads = self.heads()
1126
1133
1127 common = [self.rev(n) for n in common]
1134 common = [self.rev(n) for n in common]
1128 heads = [self.rev(n) for n in heads]
1135 heads = [self.rev(n) for n in heads]
1129
1136
1130 inc = self.incrementalmissingrevs(common=common)
1137 inc = self.incrementalmissingrevs(common=common)
1131 return [self.node(r) for r in inc.missingancestors(heads)]
1138 return [self.node(r) for r in inc.missingancestors(heads)]
1132
1139
1133 def nodesbetween(self, roots=None, heads=None):
1140 def nodesbetween(self, roots=None, heads=None):
1134 """Return a topological path from 'roots' to 'heads'.
1141 """Return a topological path from 'roots' to 'heads'.
1135
1142
1136 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1143 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1137 topologically sorted list of all nodes N that satisfy both of
1144 topologically sorted list of all nodes N that satisfy both of
1138 these constraints:
1145 these constraints:
1139
1146
1140 1. N is a descendant of some node in 'roots'
1147 1. N is a descendant of some node in 'roots'
1141 2. N is an ancestor of some node in 'heads'
1148 2. N is an ancestor of some node in 'heads'
1142
1149
1143 Every node is considered to be both a descendant and an ancestor
1150 Every node is considered to be both a descendant and an ancestor
1144 of itself, so every reachable node in 'roots' and 'heads' will be
1151 of itself, so every reachable node in 'roots' and 'heads' will be
1145 included in 'nodes'.
1152 included in 'nodes'.
1146
1153
1147 'outroots' is the list of reachable nodes in 'roots', i.e., the
1154 'outroots' is the list of reachable nodes in 'roots', i.e., the
1148 subset of 'roots' that is returned in 'nodes'. Likewise,
1155 subset of 'roots' that is returned in 'nodes'. Likewise,
1149 'outheads' is the subset of 'heads' that is also in 'nodes'.
1156 'outheads' is the subset of 'heads' that is also in 'nodes'.
1150
1157
1151 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1158 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1152 unspecified, uses nullid as the only root. If 'heads' is
1159 unspecified, uses nullid as the only root. If 'heads' is
1153 unspecified, uses list of all of the revlog's heads."""
1160 unspecified, uses list of all of the revlog's heads."""
1154 nonodes = ([], [], [])
1161 nonodes = ([], [], [])
1155 if roots is not None:
1162 if roots is not None:
1156 roots = list(roots)
1163 roots = list(roots)
1157 if not roots:
1164 if not roots:
1158 return nonodes
1165 return nonodes
1159 lowestrev = min([self.rev(n) for n in roots])
1166 lowestrev = min([self.rev(n) for n in roots])
1160 else:
1167 else:
1161 roots = [nullid] # Everybody's a descendant of nullid
1168 roots = [nullid] # Everybody's a descendant of nullid
1162 lowestrev = nullrev
1169 lowestrev = nullrev
1163 if (lowestrev == nullrev) and (heads is None):
1170 if (lowestrev == nullrev) and (heads is None):
1164 # We want _all_ the nodes!
1171 # We want _all_ the nodes!
1165 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1172 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1166 if heads is None:
1173 if heads is None:
1167 # All nodes are ancestors, so the latest ancestor is the last
1174 # All nodes are ancestors, so the latest ancestor is the last
1168 # node.
1175 # node.
1169 highestrev = len(self) - 1
1176 highestrev = len(self) - 1
1170 # Set ancestors to None to signal that every node is an ancestor.
1177 # Set ancestors to None to signal that every node is an ancestor.
1171 ancestors = None
1178 ancestors = None
1172 # Set heads to an empty dictionary for later discovery of heads
1179 # Set heads to an empty dictionary for later discovery of heads
1173 heads = {}
1180 heads = {}
1174 else:
1181 else:
1175 heads = list(heads)
1182 heads = list(heads)
1176 if not heads:
1183 if not heads:
1177 return nonodes
1184 return nonodes
1178 ancestors = set()
1185 ancestors = set()
1179 # Turn heads into a dictionary so we can remove 'fake' heads.
1186 # Turn heads into a dictionary so we can remove 'fake' heads.
1180 # Also, later we will be using it to filter out the heads we can't
1187 # Also, later we will be using it to filter out the heads we can't
1181 # find from roots.
1188 # find from roots.
1182 heads = dict.fromkeys(heads, False)
1189 heads = dict.fromkeys(heads, False)
1183 # Start at the top and keep marking parents until we're done.
1190 # Start at the top and keep marking parents until we're done.
1184 nodestotag = set(heads)
1191 nodestotag = set(heads)
1185 # Remember where the top was so we can use it as a limit later.
1192 # Remember where the top was so we can use it as a limit later.
1186 highestrev = max([self.rev(n) for n in nodestotag])
1193 highestrev = max([self.rev(n) for n in nodestotag])
1187 while nodestotag:
1194 while nodestotag:
1188 # grab a node to tag
1195 # grab a node to tag
1189 n = nodestotag.pop()
1196 n = nodestotag.pop()
1190 # Never tag nullid
1197 # Never tag nullid
1191 if n == nullid:
1198 if n == nullid:
1192 continue
1199 continue
1193 # A node's revision number represents its place in a
1200 # A node's revision number represents its place in a
1194 # topologically sorted list of nodes.
1201 # topologically sorted list of nodes.
1195 r = self.rev(n)
1202 r = self.rev(n)
1196 if r >= lowestrev:
1203 if r >= lowestrev:
1197 if n not in ancestors:
1204 if n not in ancestors:
1198 # If we are possibly a descendant of one of the roots
1205 # If we are possibly a descendant of one of the roots
1199 # and we haven't already been marked as an ancestor
1206 # and we haven't already been marked as an ancestor
1200 ancestors.add(n) # Mark as ancestor
1207 ancestors.add(n) # Mark as ancestor
1201 # Add non-nullid parents to list of nodes to tag.
1208 # Add non-nullid parents to list of nodes to tag.
1202 nodestotag.update([p for p in self.parents(n) if
1209 nodestotag.update([p for p in self.parents(n) if
1203 p != nullid])
1210 p != nullid])
1204 elif n in heads: # We've seen it before, is it a fake head?
1211 elif n in heads: # We've seen it before, is it a fake head?
1205 # So it is, real heads should not be the ancestors of
1212 # So it is, real heads should not be the ancestors of
1206 # any other heads.
1213 # any other heads.
1207 heads.pop(n)
1214 heads.pop(n)
1208 if not ancestors:
1215 if not ancestors:
1209 return nonodes
1216 return nonodes
1210 # Now that we have our set of ancestors, we want to remove any
1217 # Now that we have our set of ancestors, we want to remove any
1211 # roots that are not ancestors.
1218 # roots that are not ancestors.
1212
1219
1213 # If one of the roots was nullid, everything is included anyway.
1220 # If one of the roots was nullid, everything is included anyway.
1214 if lowestrev > nullrev:
1221 if lowestrev > nullrev:
1215 # But, since we weren't, let's recompute the lowest rev to not
1222 # But, since we weren't, let's recompute the lowest rev to not
1216 # include roots that aren't ancestors.
1223 # include roots that aren't ancestors.
1217
1224
1218 # Filter out roots that aren't ancestors of heads
1225 # Filter out roots that aren't ancestors of heads
1219 roots = [root for root in roots if root in ancestors]
1226 roots = [root for root in roots if root in ancestors]
1220 # Recompute the lowest revision
1227 # Recompute the lowest revision
1221 if roots:
1228 if roots:
1222 lowestrev = min([self.rev(root) for root in roots])
1229 lowestrev = min([self.rev(root) for root in roots])
1223 else:
1230 else:
1224 # No more roots? Return empty list
1231 # No more roots? Return empty list
1225 return nonodes
1232 return nonodes
1226 else:
1233 else:
1227 # We are descending from nullid, and don't need to care about
1234 # We are descending from nullid, and don't need to care about
1228 # any other roots.
1235 # any other roots.
1229 lowestrev = nullrev
1236 lowestrev = nullrev
1230 roots = [nullid]
1237 roots = [nullid]
1231 # Transform our roots list into a set.
1238 # Transform our roots list into a set.
1232 descendants = set(roots)
1239 descendants = set(roots)
1233 # Also, keep the original roots so we can filter out roots that aren't
1240 # Also, keep the original roots so we can filter out roots that aren't
1234 # 'real' roots (i.e. are descended from other roots).
1241 # 'real' roots (i.e. are descended from other roots).
1235 roots = descendants.copy()
1242 roots = descendants.copy()
1236 # Our topologically sorted list of output nodes.
1243 # Our topologically sorted list of output nodes.
1237 orderedout = []
1244 orderedout = []
1238 # Don't start at nullid since we don't want nullid in our output list,
1245 # Don't start at nullid since we don't want nullid in our output list,
1239 # and if nullid shows up in descendants, empty parents will look like
1246 # and if nullid shows up in descendants, empty parents will look like
1240 # they're descendants.
1247 # they're descendants.
1241 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1248 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1242 n = self.node(r)
1249 n = self.node(r)
1243 isdescendant = False
1250 isdescendant = False
1244 if lowestrev == nullrev: # Everybody is a descendant of nullid
1251 if lowestrev == nullrev: # Everybody is a descendant of nullid
1245 isdescendant = True
1252 isdescendant = True
1246 elif n in descendants:
1253 elif n in descendants:
1247 # n is already a descendant
1254 # n is already a descendant
1248 isdescendant = True
1255 isdescendant = True
1249 # This check only needs to be done here because all the roots
1256 # This check only needs to be done here because all the roots
1250 # will start being marked is descendants before the loop.
1257 # will start being marked is descendants before the loop.
1251 if n in roots:
1258 if n in roots:
1252 # If n was a root, check if it's a 'real' root.
1259 # If n was a root, check if it's a 'real' root.
1253 p = tuple(self.parents(n))
1260 p = tuple(self.parents(n))
1254 # If any of its parents are descendants, it's not a root.
1261 # If any of its parents are descendants, it's not a root.
1255 if (p[0] in descendants) or (p[1] in descendants):
1262 if (p[0] in descendants) or (p[1] in descendants):
1256 roots.remove(n)
1263 roots.remove(n)
1257 else:
1264 else:
1258 p = tuple(self.parents(n))
1265 p = tuple(self.parents(n))
1259 # A node is a descendant if either of its parents are
1266 # A node is a descendant if either of its parents are
1260 # descendants. (We seeded the dependents list with the roots
1267 # descendants. (We seeded the dependents list with the roots
1261 # up there, remember?)
1268 # up there, remember?)
1262 if (p[0] in descendants) or (p[1] in descendants):
1269 if (p[0] in descendants) or (p[1] in descendants):
1263 descendants.add(n)
1270 descendants.add(n)
1264 isdescendant = True
1271 isdescendant = True
1265 if isdescendant and ((ancestors is None) or (n in ancestors)):
1272 if isdescendant and ((ancestors is None) or (n in ancestors)):
1266 # Only include nodes that are both descendants and ancestors.
1273 # Only include nodes that are both descendants and ancestors.
1267 orderedout.append(n)
1274 orderedout.append(n)
1268 if (ancestors is not None) and (n in heads):
1275 if (ancestors is not None) and (n in heads):
1269 # We're trying to figure out which heads are reachable
1276 # We're trying to figure out which heads are reachable
1270 # from roots.
1277 # from roots.
1271 # Mark this head as having been reached
1278 # Mark this head as having been reached
1272 heads[n] = True
1279 heads[n] = True
1273 elif ancestors is None:
1280 elif ancestors is None:
1274 # Otherwise, we're trying to discover the heads.
1281 # Otherwise, we're trying to discover the heads.
1275 # Assume this is a head because if it isn't, the next step
1282 # Assume this is a head because if it isn't, the next step
1276 # will eventually remove it.
1283 # will eventually remove it.
1277 heads[n] = True
1284 heads[n] = True
1278 # But, obviously its parents aren't.
1285 # But, obviously its parents aren't.
1279 for p in self.parents(n):
1286 for p in self.parents(n):
1280 heads.pop(p, None)
1287 heads.pop(p, None)
1281 heads = [head for head, flag in heads.iteritems() if flag]
1288 heads = [head for head, flag in heads.iteritems() if flag]
1282 roots = list(roots)
1289 roots = list(roots)
1283 assert orderedout
1290 assert orderedout
1284 assert roots
1291 assert roots
1285 assert heads
1292 assert heads
1286 return (orderedout, roots, heads)
1293 return (orderedout, roots, heads)
1287
1294
1288 def headrevs(self):
1295 def headrevs(self):
1289 try:
1296 try:
1290 return self.index.headrevs()
1297 return self.index.headrevs()
1291 except AttributeError:
1298 except AttributeError:
1292 return self._headrevs()
1299 return self._headrevs()
1293
1300
1294 def computephases(self, roots):
1301 def computephases(self, roots):
1295 return self.index.computephasesmapsets(roots)
1302 return self.index.computephasesmapsets(roots)
1296
1303
1297 def _headrevs(self):
1304 def _headrevs(self):
1298 count = len(self)
1305 count = len(self)
1299 if not count:
1306 if not count:
1300 return [nullrev]
1307 return [nullrev]
1301 # we won't iter over filtered rev so nobody is a head at start
1308 # we won't iter over filtered rev so nobody is a head at start
1302 ishead = [0] * (count + 1)
1309 ishead = [0] * (count + 1)
1303 index = self.index
1310 index = self.index
1304 for r in self:
1311 for r in self:
1305 ishead[r] = 1 # I may be an head
1312 ishead[r] = 1 # I may be an head
1306 e = index[r]
1313 e = index[r]
1307 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1314 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1308 return [r for r, val in enumerate(ishead) if val]
1315 return [r for r, val in enumerate(ishead) if val]
1309
1316
1310 def heads(self, start=None, stop=None):
1317 def heads(self, start=None, stop=None):
1311 """return the list of all nodes that have no children
1318 """return the list of all nodes that have no children
1312
1319
1313 if start is specified, only heads that are descendants of
1320 if start is specified, only heads that are descendants of
1314 start will be returned
1321 start will be returned
1315 if stop is specified, it will consider all the revs from stop
1322 if stop is specified, it will consider all the revs from stop
1316 as if they had no children
1323 as if they had no children
1317 """
1324 """
1318 if start is None and stop is None:
1325 if start is None and stop is None:
1319 if not len(self):
1326 if not len(self):
1320 return [nullid]
1327 return [nullid]
1321 return [self.node(r) for r in self.headrevs()]
1328 return [self.node(r) for r in self.headrevs()]
1322
1329
1323 if start is None:
1330 if start is None:
1324 start = nullid
1331 start = nullid
1325 if stop is None:
1332 if stop is None:
1326 stop = []
1333 stop = []
1327 stoprevs = set([self.rev(n) for n in stop])
1334 stoprevs = set([self.rev(n) for n in stop])
1328 startrev = self.rev(start)
1335 startrev = self.rev(start)
1329 reachable = {startrev}
1336 reachable = {startrev}
1330 heads = {startrev}
1337 heads = {startrev}
1331
1338
1332 parentrevs = self.parentrevs
1339 parentrevs = self.parentrevs
1333 for r in self.revs(start=startrev + 1):
1340 for r in self.revs(start=startrev + 1):
1334 for p in parentrevs(r):
1341 for p in parentrevs(r):
1335 if p in reachable:
1342 if p in reachable:
1336 if r not in stoprevs:
1343 if r not in stoprevs:
1337 reachable.add(r)
1344 reachable.add(r)
1338 heads.add(r)
1345 heads.add(r)
1339 if p in heads and p not in stoprevs:
1346 if p in heads and p not in stoprevs:
1340 heads.remove(p)
1347 heads.remove(p)
1341
1348
1342 return [self.node(r) for r in heads]
1349 return [self.node(r) for r in heads]
1343
1350
1344 def children(self, node):
1351 def children(self, node):
1345 """find the children of a given node"""
1352 """find the children of a given node"""
1346 c = []
1353 c = []
1347 p = self.rev(node)
1354 p = self.rev(node)
1348 for r in self.revs(start=p + 1):
1355 for r in self.revs(start=p + 1):
1349 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1356 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1350 if prevs:
1357 if prevs:
1351 for pr in prevs:
1358 for pr in prevs:
1352 if pr == p:
1359 if pr == p:
1353 c.append(self.node(r))
1360 c.append(self.node(r))
1354 elif p == nullrev:
1361 elif p == nullrev:
1355 c.append(self.node(r))
1362 c.append(self.node(r))
1356 return c
1363 return c
1357
1364
1358 def descendant(self, start, end):
1365 def descendant(self, start, end):
1359 if start == nullrev:
1366 if start == nullrev:
1360 return True
1367 return True
1361 for i in self.descendants([start]):
1368 for i in self.descendants([start]):
1362 if i == end:
1369 if i == end:
1363 return True
1370 return True
1364 elif i > end:
1371 elif i > end:
1365 break
1372 break
1366 return False
1373 return False
1367
1374
1368 def commonancestorsheads(self, a, b):
1375 def commonancestorsheads(self, a, b):
1369 """calculate all the heads of the common ancestors of nodes a and b"""
1376 """calculate all the heads of the common ancestors of nodes a and b"""
1370 a, b = self.rev(a), self.rev(b)
1377 a, b = self.rev(a), self.rev(b)
1371 try:
1378 try:
1372 ancs = self.index.commonancestorsheads(a, b)
1379 ancs = self.index.commonancestorsheads(a, b)
1373 except (AttributeError, OverflowError): # C implementation failed
1380 except (AttributeError, OverflowError): # C implementation failed
1374 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1381 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1375 return pycompat.maplist(self.node, ancs)
1382 return pycompat.maplist(self.node, ancs)
1376
1383
1377 def isancestor(self, a, b):
1384 def isancestor(self, a, b):
1378 """return True if node a is an ancestor of node b
1385 """return True if node a is an ancestor of node b
1379
1386
1380 The implementation of this is trivial but the use of
1387 The implementation of this is trivial but the use of
1381 commonancestorsheads is not."""
1388 commonancestorsheads is not."""
1382 return a in self.commonancestorsheads(a, b)
1389 return a in self.commonancestorsheads(a, b)
1383
1390
1384 def ancestor(self, a, b):
1391 def ancestor(self, a, b):
1385 """calculate the "best" common ancestor of nodes a and b"""
1392 """calculate the "best" common ancestor of nodes a and b"""
1386
1393
1387 a, b = self.rev(a), self.rev(b)
1394 a, b = self.rev(a), self.rev(b)
1388 try:
1395 try:
1389 ancs = self.index.ancestors(a, b)
1396 ancs = self.index.ancestors(a, b)
1390 except (AttributeError, OverflowError):
1397 except (AttributeError, OverflowError):
1391 ancs = ancestor.ancestors(self.parentrevs, a, b)
1398 ancs = ancestor.ancestors(self.parentrevs, a, b)
1392 if ancs:
1399 if ancs:
1393 # choose a consistent winner when there's a tie
1400 # choose a consistent winner when there's a tie
1394 return min(map(self.node, ancs))
1401 return min(map(self.node, ancs))
1395 return nullid
1402 return nullid
1396
1403
1397 def _match(self, id):
1404 def _match(self, id):
1398 if isinstance(id, int):
1405 if isinstance(id, int):
1399 # rev
1406 # rev
1400 return self.node(id)
1407 return self.node(id)
1401 if len(id) == 20:
1408 if len(id) == 20:
1402 # possibly a binary node
1409 # possibly a binary node
1403 # odds of a binary node being all hex in ASCII are 1 in 10**25
1410 # odds of a binary node being all hex in ASCII are 1 in 10**25
1404 try:
1411 try:
1405 node = id
1412 node = id
1406 self.rev(node) # quick search the index
1413 self.rev(node) # quick search the index
1407 return node
1414 return node
1408 except LookupError:
1415 except LookupError:
1409 pass # may be partial hex id
1416 pass # may be partial hex id
1410 try:
1417 try:
1411 # str(rev)
1418 # str(rev)
1412 rev = int(id)
1419 rev = int(id)
1413 if "%d" % rev != id:
1420 if "%d" % rev != id:
1414 raise ValueError
1421 raise ValueError
1415 if rev < 0:
1422 if rev < 0:
1416 rev = len(self) + rev
1423 rev = len(self) + rev
1417 if rev < 0 or rev >= len(self):
1424 if rev < 0 or rev >= len(self):
1418 raise ValueError
1425 raise ValueError
1419 return self.node(rev)
1426 return self.node(rev)
1420 except (ValueError, OverflowError):
1427 except (ValueError, OverflowError):
1421 pass
1428 pass
1422 if len(id) == 40:
1429 if len(id) == 40:
1423 try:
1430 try:
1424 # a full hex nodeid?
1431 # a full hex nodeid?
1425 node = bin(id)
1432 node = bin(id)
1426 self.rev(node)
1433 self.rev(node)
1427 return node
1434 return node
1428 except (TypeError, LookupError):
1435 except (TypeError, LookupError):
1429 pass
1436 pass
1430
1437
1431 def _partialmatch(self, id):
1438 def _partialmatch(self, id):
1432 maybewdir = wdirhex.startswith(id)
1439 maybewdir = wdirhex.startswith(id)
1433 try:
1440 try:
1434 partial = self.index.partialmatch(id)
1441 partial = self.index.partialmatch(id)
1435 if partial and self.hasnode(partial):
1442 if partial and self.hasnode(partial):
1436 if maybewdir:
1443 if maybewdir:
1437 # single 'ff...' match in radix tree, ambiguous with wdir
1444 # single 'ff...' match in radix tree, ambiguous with wdir
1438 raise RevlogError
1445 raise RevlogError
1439 return partial
1446 return partial
1440 if maybewdir:
1447 if maybewdir:
1441 # no 'ff...' match in radix tree, wdir identified
1448 # no 'ff...' match in radix tree, wdir identified
1442 raise error.WdirUnsupported
1449 raise error.WdirUnsupported
1443 return None
1450 return None
1444 except RevlogError:
1451 except RevlogError:
1445 # parsers.c radix tree lookup gave multiple matches
1452 # parsers.c radix tree lookup gave multiple matches
1446 # fast path: for unfiltered changelog, radix tree is accurate
1453 # fast path: for unfiltered changelog, radix tree is accurate
1447 if not getattr(self, 'filteredrevs', None):
1454 if not getattr(self, 'filteredrevs', None):
1448 raise LookupError(id, self.indexfile,
1455 raise LookupError(id, self.indexfile,
1449 _('ambiguous identifier'))
1456 _('ambiguous identifier'))
1450 # fall through to slow path that filters hidden revisions
1457 # fall through to slow path that filters hidden revisions
1451 except (AttributeError, ValueError):
1458 except (AttributeError, ValueError):
1452 # we are pure python, or key was too short to search radix tree
1459 # we are pure python, or key was too short to search radix tree
1453 pass
1460 pass
1454
1461
1455 if id in self._pcache:
1462 if id in self._pcache:
1456 return self._pcache[id]
1463 return self._pcache[id]
1457
1464
1458 if len(id) < 40:
1465 if len(id) < 40:
1459 try:
1466 try:
1460 # hex(node)[:...]
1467 # hex(node)[:...]
1461 l = len(id) // 2 # grab an even number of digits
1468 l = len(id) // 2 # grab an even number of digits
1462 prefix = bin(id[:l * 2])
1469 prefix = bin(id[:l * 2])
1463 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1470 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1464 nl = [n for n in nl if hex(n).startswith(id) and
1471 nl = [n for n in nl if hex(n).startswith(id) and
1465 self.hasnode(n)]
1472 self.hasnode(n)]
1466 if len(nl) > 0:
1473 if len(nl) > 0:
1467 if len(nl) == 1 and not maybewdir:
1474 if len(nl) == 1 and not maybewdir:
1468 self._pcache[id] = nl[0]
1475 self._pcache[id] = nl[0]
1469 return nl[0]
1476 return nl[0]
1470 raise LookupError(id, self.indexfile,
1477 raise LookupError(id, self.indexfile,
1471 _('ambiguous identifier'))
1478 _('ambiguous identifier'))
1472 if maybewdir:
1479 if maybewdir:
1473 raise error.WdirUnsupported
1480 raise error.WdirUnsupported
1474 return None
1481 return None
1475 except TypeError:
1482 except TypeError:
1476 pass
1483 pass
1477
1484
1478 def lookup(self, id):
1485 def lookup(self, id):
1479 """locate a node based on:
1486 """locate a node based on:
1480 - revision number or str(revision number)
1487 - revision number or str(revision number)
1481 - nodeid or subset of hex nodeid
1488 - nodeid or subset of hex nodeid
1482 """
1489 """
1483 n = self._match(id)
1490 n = self._match(id)
1484 if n is not None:
1491 if n is not None:
1485 return n
1492 return n
1486 n = self._partialmatch(id)
1493 n = self._partialmatch(id)
1487 if n:
1494 if n:
1488 return n
1495 return n
1489
1496
1490 raise LookupError(id, self.indexfile, _('no match found'))
1497 raise LookupError(id, self.indexfile, _('no match found'))
1491
1498
1492 def shortest(self, hexnode, minlength=1):
1499 def shortest(self, hexnode, minlength=1):
1493 """Find the shortest unambiguous prefix that matches hexnode."""
1500 """Find the shortest unambiguous prefix that matches hexnode."""
1494 def isvalid(test):
1501 def isvalid(test):
1495 try:
1502 try:
1496 if self._partialmatch(test) is None:
1503 if self._partialmatch(test) is None:
1497 return False
1504 return False
1498
1505
1499 try:
1506 try:
1500 i = int(test)
1507 i = int(test)
1501 # if we are a pure int, then starting with zero will not be
1508 # if we are a pure int, then starting with zero will not be
1502 # confused as a rev; or, obviously, if the int is larger
1509 # confused as a rev; or, obviously, if the int is larger
1503 # than the value of the tip rev
1510 # than the value of the tip rev
1504 if test[0] == '0' or i > len(self):
1511 if test[0] == '0' or i > len(self):
1505 return True
1512 return True
1506 return False
1513 return False
1507 except ValueError:
1514 except ValueError:
1508 return True
1515 return True
1509 except error.RevlogError:
1516 except error.RevlogError:
1510 return False
1517 return False
1511 except error.WdirUnsupported:
1518 except error.WdirUnsupported:
1512 # single 'ff...' match
1519 # single 'ff...' match
1513 return True
1520 return True
1514
1521
1515 shortest = hexnode
1522 shortest = hexnode
1516 startlength = max(6, minlength)
1523 startlength = max(6, minlength)
1517 length = startlength
1524 length = startlength
1518 while True:
1525 while True:
1519 test = hexnode[:length]
1526 test = hexnode[:length]
1520 if isvalid(test):
1527 if isvalid(test):
1521 shortest = test
1528 shortest = test
1522 if length == minlength or length > startlength:
1529 if length == minlength or length > startlength:
1523 return shortest
1530 return shortest
1524 length -= 1
1531 length -= 1
1525 else:
1532 else:
1526 length += 1
1533 length += 1
1527 if len(shortest) <= length:
1534 if len(shortest) <= length:
1528 return shortest
1535 return shortest
1529
1536
1530 def cmp(self, node, text):
1537 def cmp(self, node, text):
1531 """compare text with a given file revision
1538 """compare text with a given file revision
1532
1539
1533 returns True if text is different than what is stored.
1540 returns True if text is different than what is stored.
1534 """
1541 """
1535 p1, p2 = self.parents(node)
1542 p1, p2 = self.parents(node)
1536 return hash(text, p1, p2) != node
1543 return hash(text, p1, p2) != node
1537
1544
1538 def _cachesegment(self, offset, data):
1545 def _cachesegment(self, offset, data):
1539 """Add a segment to the revlog cache.
1546 """Add a segment to the revlog cache.
1540
1547
1541 Accepts an absolute offset and the data that is at that location.
1548 Accepts an absolute offset and the data that is at that location.
1542 """
1549 """
1543 o, d = self._chunkcache
1550 o, d = self._chunkcache
1544 # try to add to existing cache
1551 # try to add to existing cache
1545 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1552 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1546 self._chunkcache = o, d + data
1553 self._chunkcache = o, d + data
1547 else:
1554 else:
1548 self._chunkcache = offset, data
1555 self._chunkcache = offset, data
1549
1556
1550 def _readsegment(self, offset, length, df=None):
1557 def _readsegment(self, offset, length, df=None):
1551 """Load a segment of raw data from the revlog.
1558 """Load a segment of raw data from the revlog.
1552
1559
1553 Accepts an absolute offset, length to read, and an optional existing
1560 Accepts an absolute offset, length to read, and an optional existing
1554 file handle to read from.
1561 file handle to read from.
1555
1562
1556 If an existing file handle is passed, it will be seeked and the
1563 If an existing file handle is passed, it will be seeked and the
1557 original seek position will NOT be restored.
1564 original seek position will NOT be restored.
1558
1565
1559 Returns a str or buffer of raw byte data.
1566 Returns a str or buffer of raw byte data.
1560 """
1567 """
1561 # Cache data both forward and backward around the requested
1568 # Cache data both forward and backward around the requested
1562 # data, in a fixed size window. This helps speed up operations
1569 # data, in a fixed size window. This helps speed up operations
1563 # involving reading the revlog backwards.
1570 # involving reading the revlog backwards.
1564 cachesize = self._chunkcachesize
1571 cachesize = self._chunkcachesize
1565 realoffset = offset & ~(cachesize - 1)
1572 realoffset = offset & ~(cachesize - 1)
1566 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1573 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1567 - realoffset)
1574 - realoffset)
1568 with self._datareadfp(df) as df:
1575 with self._datareadfp(df) as df:
1569 df.seek(realoffset)
1576 df.seek(realoffset)
1570 d = df.read(reallength)
1577 d = df.read(reallength)
1571 self._cachesegment(realoffset, d)
1578 self._cachesegment(realoffset, d)
1572 if offset != realoffset or reallength != length:
1579 if offset != realoffset or reallength != length:
1573 return util.buffer(d, offset - realoffset, length)
1580 return util.buffer(d, offset - realoffset, length)
1574 return d
1581 return d
1575
1582
1576 def _getsegment(self, offset, length, df=None):
1583 def _getsegment(self, offset, length, df=None):
1577 """Obtain a segment of raw data from the revlog.
1584 """Obtain a segment of raw data from the revlog.
1578
1585
1579 Accepts an absolute offset, length of bytes to obtain, and an
1586 Accepts an absolute offset, length of bytes to obtain, and an
1580 optional file handle to the already-opened revlog. If the file
1587 optional file handle to the already-opened revlog. If the file
1581 handle is used, it's original seek position will not be preserved.
1588 handle is used, it's original seek position will not be preserved.
1582
1589
1583 Requests for data may be returned from a cache.
1590 Requests for data may be returned from a cache.
1584
1591
1585 Returns a str or a buffer instance of raw byte data.
1592 Returns a str or a buffer instance of raw byte data.
1586 """
1593 """
1587 o, d = self._chunkcache
1594 o, d = self._chunkcache
1588 l = len(d)
1595 l = len(d)
1589
1596
1590 # is it in the cache?
1597 # is it in the cache?
1591 cachestart = offset - o
1598 cachestart = offset - o
1592 cacheend = cachestart + length
1599 cacheend = cachestart + length
1593 if cachestart >= 0 and cacheend <= l:
1600 if cachestart >= 0 and cacheend <= l:
1594 if cachestart == 0 and cacheend == l:
1601 if cachestart == 0 and cacheend == l:
1595 return d # avoid a copy
1602 return d # avoid a copy
1596 return util.buffer(d, cachestart, cacheend - cachestart)
1603 return util.buffer(d, cachestart, cacheend - cachestart)
1597
1604
1598 return self._readsegment(offset, length, df=df)
1605 return self._readsegment(offset, length, df=df)
1599
1606
1600 def _getsegmentforrevs(self, startrev, endrev, df=None):
1607 def _getsegmentforrevs(self, startrev, endrev, df=None):
1601 """Obtain a segment of raw data corresponding to a range of revisions.
1608 """Obtain a segment of raw data corresponding to a range of revisions.
1602
1609
1603 Accepts the start and end revisions and an optional already-open
1610 Accepts the start and end revisions and an optional already-open
1604 file handle to be used for reading. If the file handle is read, its
1611 file handle to be used for reading. If the file handle is read, its
1605 seek position will not be preserved.
1612 seek position will not be preserved.
1606
1613
1607 Requests for data may be satisfied by a cache.
1614 Requests for data may be satisfied by a cache.
1608
1615
1609 Returns a 2-tuple of (offset, data) for the requested range of
1616 Returns a 2-tuple of (offset, data) for the requested range of
1610 revisions. Offset is the integer offset from the beginning of the
1617 revisions. Offset is the integer offset from the beginning of the
1611 revlog and data is a str or buffer of the raw byte data.
1618 revlog and data is a str or buffer of the raw byte data.
1612
1619
1613 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1620 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1614 to determine where each revision's data begins and ends.
1621 to determine where each revision's data begins and ends.
1615 """
1622 """
1616 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1623 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1617 # (functions are expensive).
1624 # (functions are expensive).
1618 index = self.index
1625 index = self.index
1619 istart = index[startrev]
1626 istart = index[startrev]
1620 start = int(istart[0] >> 16)
1627 start = int(istart[0] >> 16)
1621 if startrev == endrev:
1628 if startrev == endrev:
1622 end = start + istart[1]
1629 end = start + istart[1]
1623 else:
1630 else:
1624 iend = index[endrev]
1631 iend = index[endrev]
1625 end = int(iend[0] >> 16) + iend[1]
1632 end = int(iend[0] >> 16) + iend[1]
1626
1633
1627 if self._inline:
1634 if self._inline:
1628 start += (startrev + 1) * self._io.size
1635 start += (startrev + 1) * self._io.size
1629 end += (endrev + 1) * self._io.size
1636 end += (endrev + 1) * self._io.size
1630 length = end - start
1637 length = end - start
1631
1638
1632 return start, self._getsegment(start, length, df=df)
1639 return start, self._getsegment(start, length, df=df)
1633
1640
1634 def _chunk(self, rev, df=None):
1641 def _chunk(self, rev, df=None):
1635 """Obtain a single decompressed chunk for a revision.
1642 """Obtain a single decompressed chunk for a revision.
1636
1643
1637 Accepts an integer revision and an optional already-open file handle
1644 Accepts an integer revision and an optional already-open file handle
1638 to be used for reading. If used, the seek position of the file will not
1645 to be used for reading. If used, the seek position of the file will not
1639 be preserved.
1646 be preserved.
1640
1647
1641 Returns a str holding uncompressed data for the requested revision.
1648 Returns a str holding uncompressed data for the requested revision.
1642 """
1649 """
1643 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1650 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1644
1651
1645 def _chunks(self, revs, df=None):
1652 def _chunks(self, revs, df=None):
1646 """Obtain decompressed chunks for the specified revisions.
1653 """Obtain decompressed chunks for the specified revisions.
1647
1654
1648 Accepts an iterable of numeric revisions that are assumed to be in
1655 Accepts an iterable of numeric revisions that are assumed to be in
1649 ascending order. Also accepts an optional already-open file handle
1656 ascending order. Also accepts an optional already-open file handle
1650 to be used for reading. If used, the seek position of the file will
1657 to be used for reading. If used, the seek position of the file will
1651 not be preserved.
1658 not be preserved.
1652
1659
1653 This function is similar to calling ``self._chunk()`` multiple times,
1660 This function is similar to calling ``self._chunk()`` multiple times,
1654 but is faster.
1661 but is faster.
1655
1662
1656 Returns a list with decompressed data for each requested revision.
1663 Returns a list with decompressed data for each requested revision.
1657 """
1664 """
1658 if not revs:
1665 if not revs:
1659 return []
1666 return []
1660 start = self.start
1667 start = self.start
1661 length = self.length
1668 length = self.length
1662 inline = self._inline
1669 inline = self._inline
1663 iosize = self._io.size
1670 iosize = self._io.size
1664 buffer = util.buffer
1671 buffer = util.buffer
1665
1672
1666 l = []
1673 l = []
1667 ladd = l.append
1674 ladd = l.append
1668
1675
1669 if not self._withsparseread:
1676 if not self._withsparseread:
1670 slicedchunks = (revs,)
1677 slicedchunks = (revs,)
1671 else:
1678 else:
1672 slicedchunks = _slicechunk(self, revs)
1679 slicedchunks = _slicechunk(self, revs)
1673
1680
1674 for revschunk in slicedchunks:
1681 for revschunk in slicedchunks:
1675 firstrev = revschunk[0]
1682 firstrev = revschunk[0]
1676 # Skip trailing revisions with empty diff
1683 # Skip trailing revisions with empty diff
1677 for lastrev in revschunk[::-1]:
1684 for lastrev in revschunk[::-1]:
1678 if length(lastrev) != 0:
1685 if length(lastrev) != 0:
1679 break
1686 break
1680
1687
1681 try:
1688 try:
1682 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1689 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1683 except OverflowError:
1690 except OverflowError:
1684 # issue4215 - we can't cache a run of chunks greater than
1691 # issue4215 - we can't cache a run of chunks greater than
1685 # 2G on Windows
1692 # 2G on Windows
1686 return [self._chunk(rev, df=df) for rev in revschunk]
1693 return [self._chunk(rev, df=df) for rev in revschunk]
1687
1694
1688 decomp = self.decompress
1695 decomp = self.decompress
1689 for rev in revschunk:
1696 for rev in revschunk:
1690 chunkstart = start(rev)
1697 chunkstart = start(rev)
1691 if inline:
1698 if inline:
1692 chunkstart += (rev + 1) * iosize
1699 chunkstart += (rev + 1) * iosize
1693 chunklength = length(rev)
1700 chunklength = length(rev)
1694 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1701 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1695
1702
1696 return l
1703 return l
1697
1704
1698 def _chunkclear(self):
1705 def _chunkclear(self):
1699 """Clear the raw chunk cache."""
1706 """Clear the raw chunk cache."""
1700 self._chunkcache = (0, '')
1707 self._chunkcache = (0, '')
1701
1708
1702 def deltaparent(self, rev):
1709 def deltaparent(self, rev):
1703 """return deltaparent of the given revision"""
1710 """return deltaparent of the given revision"""
1704 base = self.index[rev][3]
1711 base = self.index[rev][3]
1705 if base == rev:
1712 if base == rev:
1706 return nullrev
1713 return nullrev
1707 elif self._generaldelta:
1714 elif self._generaldelta:
1708 return base
1715 return base
1709 else:
1716 else:
1710 return rev - 1
1717 return rev - 1
1711
1718
1712 def revdiff(self, rev1, rev2):
1719 def revdiff(self, rev1, rev2):
1713 """return or calculate a delta between two revisions
1720 """return or calculate a delta between two revisions
1714
1721
1715 The delta calculated is in binary form and is intended to be written to
1722 The delta calculated is in binary form and is intended to be written to
1716 revlog data directly. So this function needs raw revision data.
1723 revlog data directly. So this function needs raw revision data.
1717 """
1724 """
1718 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1725 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1719 return bytes(self._chunk(rev2))
1726 return bytes(self._chunk(rev2))
1720
1727
1721 return mdiff.textdiff(self.revision(rev1, raw=True),
1728 return mdiff.textdiff(self.revision(rev1, raw=True),
1722 self.revision(rev2, raw=True))
1729 self.revision(rev2, raw=True))
1723
1730
1724 def revision(self, nodeorrev, _df=None, raw=False):
1731 def revision(self, nodeorrev, _df=None, raw=False):
1725 """return an uncompressed revision of a given node or revision
1732 """return an uncompressed revision of a given node or revision
1726 number.
1733 number.
1727
1734
1728 _df - an existing file handle to read from. (internal-only)
1735 _df - an existing file handle to read from. (internal-only)
1729 raw - an optional argument specifying if the revision data is to be
1736 raw - an optional argument specifying if the revision data is to be
1730 treated as raw data when applying flag transforms. 'raw' should be set
1737 treated as raw data when applying flag transforms. 'raw' should be set
1731 to True when generating changegroups or in debug commands.
1738 to True when generating changegroups or in debug commands.
1732 """
1739 """
1733 if isinstance(nodeorrev, int):
1740 if isinstance(nodeorrev, int):
1734 rev = nodeorrev
1741 rev = nodeorrev
1735 node = self.node(rev)
1742 node = self.node(rev)
1736 else:
1743 else:
1737 node = nodeorrev
1744 node = nodeorrev
1738 rev = None
1745 rev = None
1739
1746
1740 cachedrev = None
1747 cachedrev = None
1741 flags = None
1748 flags = None
1742 rawtext = None
1749 rawtext = None
1743 if node == nullid:
1750 if node == nullid:
1744 return ""
1751 return ""
1745 if self._cache:
1752 if self._cache:
1746 if self._cache[0] == node:
1753 if self._cache[0] == node:
1747 # _cache only stores rawtext
1754 # _cache only stores rawtext
1748 if raw:
1755 if raw:
1749 return self._cache[2]
1756 return self._cache[2]
1750 # duplicated, but good for perf
1757 # duplicated, but good for perf
1751 if rev is None:
1758 if rev is None:
1752 rev = self.rev(node)
1759 rev = self.rev(node)
1753 if flags is None:
1760 if flags is None:
1754 flags = self.flags(rev)
1761 flags = self.flags(rev)
1755 # no extra flags set, no flag processor runs, text = rawtext
1762 # no extra flags set, no flag processor runs, text = rawtext
1756 if flags == REVIDX_DEFAULT_FLAGS:
1763 if flags == REVIDX_DEFAULT_FLAGS:
1757 return self._cache[2]
1764 return self._cache[2]
1758 # rawtext is reusable. need to run flag processor
1765 # rawtext is reusable. need to run flag processor
1759 rawtext = self._cache[2]
1766 rawtext = self._cache[2]
1760
1767
1761 cachedrev = self._cache[1]
1768 cachedrev = self._cache[1]
1762
1769
1763 # look up what we need to read
1770 # look up what we need to read
1764 if rawtext is None:
1771 if rawtext is None:
1765 if rev is None:
1772 if rev is None:
1766 rev = self.rev(node)
1773 rev = self.rev(node)
1767
1774
1768 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1775 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1769 if stopped:
1776 if stopped:
1770 rawtext = self._cache[2]
1777 rawtext = self._cache[2]
1771
1778
1772 # drop cache to save memory
1779 # drop cache to save memory
1773 self._cache = None
1780 self._cache = None
1774
1781
1775 bins = self._chunks(chain, df=_df)
1782 bins = self._chunks(chain, df=_df)
1776 if rawtext is None:
1783 if rawtext is None:
1777 rawtext = bytes(bins[0])
1784 rawtext = bytes(bins[0])
1778 bins = bins[1:]
1785 bins = bins[1:]
1779
1786
1780 rawtext = mdiff.patches(rawtext, bins)
1787 rawtext = mdiff.patches(rawtext, bins)
1781 self._cache = (node, rev, rawtext)
1788 self._cache = (node, rev, rawtext)
1782
1789
1783 if flags is None:
1790 if flags is None:
1784 if rev is None:
1791 if rev is None:
1785 rev = self.rev(node)
1792 rev = self.rev(node)
1786 flags = self.flags(rev)
1793 flags = self.flags(rev)
1787
1794
1788 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1795 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1789 if validatehash:
1796 if validatehash:
1790 self.checkhash(text, node, rev=rev)
1797 self.checkhash(text, node, rev=rev)
1791
1798
1792 return text
1799 return text
1793
1800
1794 def hash(self, text, p1, p2):
1801 def hash(self, text, p1, p2):
1795 """Compute a node hash.
1802 """Compute a node hash.
1796
1803
1797 Available as a function so that subclasses can replace the hash
1804 Available as a function so that subclasses can replace the hash
1798 as needed.
1805 as needed.
1799 """
1806 """
1800 return hash(text, p1, p2)
1807 return hash(text, p1, p2)
1801
1808
1802 def _processflags(self, text, flags, operation, raw=False):
1809 def _processflags(self, text, flags, operation, raw=False):
1803 """Inspect revision data flags and applies transforms defined by
1810 """Inspect revision data flags and applies transforms defined by
1804 registered flag processors.
1811 registered flag processors.
1805
1812
1806 ``text`` - the revision data to process
1813 ``text`` - the revision data to process
1807 ``flags`` - the revision flags
1814 ``flags`` - the revision flags
1808 ``operation`` - the operation being performed (read or write)
1815 ``operation`` - the operation being performed (read or write)
1809 ``raw`` - an optional argument describing if the raw transform should be
1816 ``raw`` - an optional argument describing if the raw transform should be
1810 applied.
1817 applied.
1811
1818
1812 This method processes the flags in the order (or reverse order if
1819 This method processes the flags in the order (or reverse order if
1813 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1820 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1814 flag processors registered for present flags. The order of flags defined
1821 flag processors registered for present flags. The order of flags defined
1815 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1822 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1816
1823
1817 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1824 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1818 processed text and ``validatehash`` is a bool indicating whether the
1825 processed text and ``validatehash`` is a bool indicating whether the
1819 returned text should be checked for hash integrity.
1826 returned text should be checked for hash integrity.
1820
1827
1821 Note: If the ``raw`` argument is set, it has precedence over the
1828 Note: If the ``raw`` argument is set, it has precedence over the
1822 operation and will only update the value of ``validatehash``.
1829 operation and will only update the value of ``validatehash``.
1823 """
1830 """
1824 # fast path: no flag processors will run
1831 # fast path: no flag processors will run
1825 if flags == 0:
1832 if flags == 0:
1826 return text, True
1833 return text, True
1827 if not operation in ('read', 'write'):
1834 if not operation in ('read', 'write'):
1828 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1835 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1829 # Check all flags are known.
1836 # Check all flags are known.
1830 if flags & ~REVIDX_KNOWN_FLAGS:
1837 if flags & ~REVIDX_KNOWN_FLAGS:
1831 raise RevlogError(_("incompatible revision flag '%#x'") %
1838 raise RevlogError(_("incompatible revision flag '%#x'") %
1832 (flags & ~REVIDX_KNOWN_FLAGS))
1839 (flags & ~REVIDX_KNOWN_FLAGS))
1833 validatehash = True
1840 validatehash = True
1834 # Depending on the operation (read or write), the order might be
1841 # Depending on the operation (read or write), the order might be
1835 # reversed due to non-commutative transforms.
1842 # reversed due to non-commutative transforms.
1836 orderedflags = REVIDX_FLAGS_ORDER
1843 orderedflags = REVIDX_FLAGS_ORDER
1837 if operation == 'write':
1844 if operation == 'write':
1838 orderedflags = reversed(orderedflags)
1845 orderedflags = reversed(orderedflags)
1839
1846
1840 for flag in orderedflags:
1847 for flag in orderedflags:
1841 # If a flagprocessor has been registered for a known flag, apply the
1848 # If a flagprocessor has been registered for a known flag, apply the
1842 # related operation transform and update result tuple.
1849 # related operation transform and update result tuple.
1843 if flag & flags:
1850 if flag & flags:
1844 vhash = True
1851 vhash = True
1845
1852
1846 if flag not in _flagprocessors:
1853 if flag not in _flagprocessors:
1847 message = _("missing processor for flag '%#x'") % (flag)
1854 message = _("missing processor for flag '%#x'") % (flag)
1848 raise RevlogError(message)
1855 raise RevlogError(message)
1849
1856
1850 processor = _flagprocessors[flag]
1857 processor = _flagprocessors[flag]
1851 if processor is not None:
1858 if processor is not None:
1852 readtransform, writetransform, rawtransform = processor
1859 readtransform, writetransform, rawtransform = processor
1853
1860
1854 if raw:
1861 if raw:
1855 vhash = rawtransform(self, text)
1862 vhash = rawtransform(self, text)
1856 elif operation == 'read':
1863 elif operation == 'read':
1857 text, vhash = readtransform(self, text)
1864 text, vhash = readtransform(self, text)
1858 else: # write operation
1865 else: # write operation
1859 text, vhash = writetransform(self, text)
1866 text, vhash = writetransform(self, text)
1860 validatehash = validatehash and vhash
1867 validatehash = validatehash and vhash
1861
1868
1862 return text, validatehash
1869 return text, validatehash
1863
1870
1864 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1871 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1865 """Check node hash integrity.
1872 """Check node hash integrity.
1866
1873
1867 Available as a function so that subclasses can extend hash mismatch
1874 Available as a function so that subclasses can extend hash mismatch
1868 behaviors as needed.
1875 behaviors as needed.
1869 """
1876 """
1870 if p1 is None and p2 is None:
1877 try:
1871 p1, p2 = self.parents(node)
1878 if p1 is None and p2 is None:
1872 if node != self.hash(text, p1, p2):
1879 p1, p2 = self.parents(node)
1873 revornode = rev
1880 if node != self.hash(text, p1, p2):
1874 if revornode is None:
1881 revornode = rev
1875 revornode = templatefilters.short(hex(node))
1882 if revornode is None:
1876 raise RevlogError(_("integrity check failed on %s:%s")
1883 revornode = templatefilters.short(hex(node))
1877 % (self.indexfile, pycompat.bytestr(revornode)))
1884 raise RevlogError(_("integrity check failed on %s:%s")
1885 % (self.indexfile, pycompat.bytestr(revornode)))
1886 except RevlogError:
1887 if self._censorable and _censoredtext(text):
1888 raise error.CensoredNodeError(self.indexfile, node, text)
1889 raise
1878
1890
1879 def _enforceinlinesize(self, tr, fp=None):
1891 def _enforceinlinesize(self, tr, fp=None):
1880 """Check if the revlog is too big for inline and convert if so.
1892 """Check if the revlog is too big for inline and convert if so.
1881
1893
1882 This should be called after revisions are added to the revlog. If the
1894 This should be called after revisions are added to the revlog. If the
1883 revlog has grown too large to be an inline revlog, it will convert it
1895 revlog has grown too large to be an inline revlog, it will convert it
1884 to use multiple index and data files.
1896 to use multiple index and data files.
1885 """
1897 """
1886 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1898 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1887 return
1899 return
1888
1900
1889 trinfo = tr.find(self.indexfile)
1901 trinfo = tr.find(self.indexfile)
1890 if trinfo is None:
1902 if trinfo is None:
1891 raise RevlogError(_("%s not found in the transaction")
1903 raise RevlogError(_("%s not found in the transaction")
1892 % self.indexfile)
1904 % self.indexfile)
1893
1905
1894 trindex = trinfo[2]
1906 trindex = trinfo[2]
1895 if trindex is not None:
1907 if trindex is not None:
1896 dataoff = self.start(trindex)
1908 dataoff = self.start(trindex)
1897 else:
1909 else:
1898 # revlog was stripped at start of transaction, use all leftover data
1910 # revlog was stripped at start of transaction, use all leftover data
1899 trindex = len(self) - 1
1911 trindex = len(self) - 1
1900 dataoff = self.end(-2)
1912 dataoff = self.end(-2)
1901
1913
1902 tr.add(self.datafile, dataoff)
1914 tr.add(self.datafile, dataoff)
1903
1915
1904 if fp:
1916 if fp:
1905 fp.flush()
1917 fp.flush()
1906 fp.close()
1918 fp.close()
1907
1919
1908 with self._datafp('w') as df:
1920 with self._datafp('w') as df:
1909 for r in self:
1921 for r in self:
1910 df.write(self._getsegmentforrevs(r, r)[1])
1922 df.write(self._getsegmentforrevs(r, r)[1])
1911
1923
1912 with self._indexfp('w') as fp:
1924 with self._indexfp('w') as fp:
1913 self.version &= ~FLAG_INLINE_DATA
1925 self.version &= ~FLAG_INLINE_DATA
1914 self._inline = False
1926 self._inline = False
1915 io = self._io
1927 io = self._io
1916 for i in self:
1928 for i in self:
1917 e = io.packentry(self.index[i], self.node, self.version, i)
1929 e = io.packentry(self.index[i], self.node, self.version, i)
1918 fp.write(e)
1930 fp.write(e)
1919
1931
1920 # the temp file replace the real index when we exit the context
1932 # the temp file replace the real index when we exit the context
1921 # manager
1933 # manager
1922
1934
1923 tr.replace(self.indexfile, trindex * self._io.size)
1935 tr.replace(self.indexfile, trindex * self._io.size)
1924 self._chunkclear()
1936 self._chunkclear()
1925
1937
1926 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1938 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1927 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1939 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1928 """add a revision to the log
1940 """add a revision to the log
1929
1941
1930 text - the revision data to add
1942 text - the revision data to add
1931 transaction - the transaction object used for rollback
1943 transaction - the transaction object used for rollback
1932 link - the linkrev data to add
1944 link - the linkrev data to add
1933 p1, p2 - the parent nodeids of the revision
1945 p1, p2 - the parent nodeids of the revision
1934 cachedelta - an optional precomputed delta
1946 cachedelta - an optional precomputed delta
1935 node - nodeid of revision; typically node is not specified, and it is
1947 node - nodeid of revision; typically node is not specified, and it is
1936 computed by default as hash(text, p1, p2), however subclasses might
1948 computed by default as hash(text, p1, p2), however subclasses might
1937 use different hashing method (and override checkhash() in such case)
1949 use different hashing method (and override checkhash() in such case)
1938 flags - the known flags to set on the revision
1950 flags - the known flags to set on the revision
1939 deltacomputer - an optional _deltacomputer instance shared between
1951 deltacomputer - an optional _deltacomputer instance shared between
1940 multiple calls
1952 multiple calls
1941 """
1953 """
1942 if link == nullrev:
1954 if link == nullrev:
1943 raise RevlogError(_("attempted to add linkrev -1 to %s")
1955 raise RevlogError(_("attempted to add linkrev -1 to %s")
1944 % self.indexfile)
1956 % self.indexfile)
1945
1957
1946 if flags:
1958 if flags:
1947 node = node or self.hash(text, p1, p2)
1959 node = node or self.hash(text, p1, p2)
1948
1960
1949 rawtext, validatehash = self._processflags(text, flags, 'write')
1961 rawtext, validatehash = self._processflags(text, flags, 'write')
1950
1962
1951 # If the flag processor modifies the revision data, ignore any provided
1963 # If the flag processor modifies the revision data, ignore any provided
1952 # cachedelta.
1964 # cachedelta.
1953 if rawtext != text:
1965 if rawtext != text:
1954 cachedelta = None
1966 cachedelta = None
1955
1967
1956 if len(rawtext) > _maxentrysize:
1968 if len(rawtext) > _maxentrysize:
1957 raise RevlogError(
1969 raise RevlogError(
1958 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1970 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1959 % (self.indexfile, len(rawtext)))
1971 % (self.indexfile, len(rawtext)))
1960
1972
1961 node = node or self.hash(rawtext, p1, p2)
1973 node = node or self.hash(rawtext, p1, p2)
1962 if node in self.nodemap:
1974 if node in self.nodemap:
1963 return node
1975 return node
1964
1976
1965 if validatehash:
1977 if validatehash:
1966 self.checkhash(rawtext, node, p1=p1, p2=p2)
1978 self.checkhash(rawtext, node, p1=p1, p2=p2)
1967
1979
1968 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1980 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1969 flags, cachedelta=cachedelta,
1981 flags, cachedelta=cachedelta,
1970 deltacomputer=deltacomputer)
1982 deltacomputer=deltacomputer)
1971
1983
1972 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1984 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1973 cachedelta=None, deltacomputer=None):
1985 cachedelta=None, deltacomputer=None):
1974 """add a raw revision with known flags, node and parents
1986 """add a raw revision with known flags, node and parents
1975 useful when reusing a revision not stored in this revlog (ex: received
1987 useful when reusing a revision not stored in this revlog (ex: received
1976 over wire, or read from an external bundle).
1988 over wire, or read from an external bundle).
1977 """
1989 """
1978 dfh = None
1990 dfh = None
1979 if not self._inline:
1991 if not self._inline:
1980 dfh = self._datafp("a+")
1992 dfh = self._datafp("a+")
1981 ifh = self._indexfp("a+")
1993 ifh = self._indexfp("a+")
1982 try:
1994 try:
1983 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1995 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1984 flags, cachedelta, ifh, dfh,
1996 flags, cachedelta, ifh, dfh,
1985 deltacomputer=deltacomputer)
1997 deltacomputer=deltacomputer)
1986 finally:
1998 finally:
1987 if dfh:
1999 if dfh:
1988 dfh.close()
2000 dfh.close()
1989 ifh.close()
2001 ifh.close()
1990
2002
1991 def compress(self, data):
2003 def compress(self, data):
1992 """Generate a possibly-compressed representation of data."""
2004 """Generate a possibly-compressed representation of data."""
1993 if not data:
2005 if not data:
1994 return '', data
2006 return '', data
1995
2007
1996 compressed = self._compressor.compress(data)
2008 compressed = self._compressor.compress(data)
1997
2009
1998 if compressed:
2010 if compressed:
1999 # The revlog compressor added the header in the returned data.
2011 # The revlog compressor added the header in the returned data.
2000 return '', compressed
2012 return '', compressed
2001
2013
2002 if data[0:1] == '\0':
2014 if data[0:1] == '\0':
2003 return '', data
2015 return '', data
2004 return 'u', data
2016 return 'u', data
2005
2017
2006 def decompress(self, data):
2018 def decompress(self, data):
2007 """Decompress a revlog chunk.
2019 """Decompress a revlog chunk.
2008
2020
2009 The chunk is expected to begin with a header identifying the
2021 The chunk is expected to begin with a header identifying the
2010 format type so it can be routed to an appropriate decompressor.
2022 format type so it can be routed to an appropriate decompressor.
2011 """
2023 """
2012 if not data:
2024 if not data:
2013 return data
2025 return data
2014
2026
2015 # Revlogs are read much more frequently than they are written and many
2027 # Revlogs are read much more frequently than they are written and many
2016 # chunks only take microseconds to decompress, so performance is
2028 # chunks only take microseconds to decompress, so performance is
2017 # important here.
2029 # important here.
2018 #
2030 #
2019 # We can make a few assumptions about revlogs:
2031 # We can make a few assumptions about revlogs:
2020 #
2032 #
2021 # 1) the majority of chunks will be compressed (as opposed to inline
2033 # 1) the majority of chunks will be compressed (as opposed to inline
2022 # raw data).
2034 # raw data).
2023 # 2) decompressing *any* data will likely by at least 10x slower than
2035 # 2) decompressing *any* data will likely by at least 10x slower than
2024 # returning raw inline data.
2036 # returning raw inline data.
2025 # 3) we want to prioritize common and officially supported compression
2037 # 3) we want to prioritize common and officially supported compression
2026 # engines
2038 # engines
2027 #
2039 #
2028 # It follows that we want to optimize for "decompress compressed data
2040 # It follows that we want to optimize for "decompress compressed data
2029 # when encoded with common and officially supported compression engines"
2041 # when encoded with common and officially supported compression engines"
2030 # case over "raw data" and "data encoded by less common or non-official
2042 # case over "raw data" and "data encoded by less common or non-official
2031 # compression engines." That is why we have the inline lookup first
2043 # compression engines." That is why we have the inline lookup first
2032 # followed by the compengines lookup.
2044 # followed by the compengines lookup.
2033 #
2045 #
2034 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2046 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2035 # compressed chunks. And this matters for changelog and manifest reads.
2047 # compressed chunks. And this matters for changelog and manifest reads.
2036 t = data[0:1]
2048 t = data[0:1]
2037
2049
2038 if t == 'x':
2050 if t == 'x':
2039 try:
2051 try:
2040 return _zlibdecompress(data)
2052 return _zlibdecompress(data)
2041 except zlib.error as e:
2053 except zlib.error as e:
2042 raise RevlogError(_('revlog decompress error: %s') %
2054 raise RevlogError(_('revlog decompress error: %s') %
2043 stringutil.forcebytestr(e))
2055 stringutil.forcebytestr(e))
2044 # '\0' is more common than 'u' so it goes first.
2056 # '\0' is more common than 'u' so it goes first.
2045 elif t == '\0':
2057 elif t == '\0':
2046 return data
2058 return data
2047 elif t == 'u':
2059 elif t == 'u':
2048 return util.buffer(data, 1)
2060 return util.buffer(data, 1)
2049
2061
2050 try:
2062 try:
2051 compressor = self._decompressors[t]
2063 compressor = self._decompressors[t]
2052 except KeyError:
2064 except KeyError:
2053 try:
2065 try:
2054 engine = util.compengines.forrevlogheader(t)
2066 engine = util.compengines.forrevlogheader(t)
2055 compressor = engine.revlogcompressor()
2067 compressor = engine.revlogcompressor()
2056 self._decompressors[t] = compressor
2068 self._decompressors[t] = compressor
2057 except KeyError:
2069 except KeyError:
2058 raise RevlogError(_('unknown compression type %r') % t)
2070 raise RevlogError(_('unknown compression type %r') % t)
2059
2071
2060 return compressor.decompress(data)
2072 return compressor.decompress(data)
2061
2073
2062 def _isgooddeltainfo(self, d, textlen):
2074 def _isgooddeltainfo(self, d, textlen):
2063 """Returns True if the given delta is good. Good means that it is within
2075 """Returns True if the given delta is good. Good means that it is within
2064 the disk span, disk size, and chain length bounds that we know to be
2076 the disk span, disk size, and chain length bounds that we know to be
2065 performant."""
2077 performant."""
2066 if d is None:
2078 if d is None:
2067 return False
2079 return False
2068
2080
2069 # - 'd.distance' is the distance from the base revision -- bounding it
2081 # - 'd.distance' is the distance from the base revision -- bounding it
2070 # limits the amount of I/O we need to do.
2082 # limits the amount of I/O we need to do.
2071 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2083 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2072 # need to apply -- bounding it limits the amount of CPU we consume.
2084 # need to apply -- bounding it limits the amount of CPU we consume.
2073
2085
2074 defaultmax = textlen * 4
2086 defaultmax = textlen * 4
2075 maxdist = self._maxdeltachainspan
2087 maxdist = self._maxdeltachainspan
2076 if not maxdist:
2088 if not maxdist:
2077 maxdist = d.distance # ensure the conditional pass
2089 maxdist = d.distance # ensure the conditional pass
2078 maxdist = max(maxdist, defaultmax)
2090 maxdist = max(maxdist, defaultmax)
2079 if (d.distance > maxdist or d.deltalen > textlen or
2091 if (d.distance > maxdist or d.deltalen > textlen or
2080 d.compresseddeltalen > textlen * 2 or
2092 d.compresseddeltalen > textlen * 2 or
2081 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2093 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2082 return False
2094 return False
2083
2095
2084 return True
2096 return True
2085
2097
2086 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2098 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2087 cachedelta, ifh, dfh, alwayscache=False,
2099 cachedelta, ifh, dfh, alwayscache=False,
2088 deltacomputer=None):
2100 deltacomputer=None):
2089 """internal function to add revisions to the log
2101 """internal function to add revisions to the log
2090
2102
2091 see addrevision for argument descriptions.
2103 see addrevision for argument descriptions.
2092
2104
2093 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2105 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2094
2106
2095 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2107 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2096 be used.
2108 be used.
2097
2109
2098 invariants:
2110 invariants:
2099 - rawtext is optional (can be None); if not set, cachedelta must be set.
2111 - rawtext is optional (can be None); if not set, cachedelta must be set.
2100 if both are set, they must correspond to each other.
2112 if both are set, they must correspond to each other.
2101 """
2113 """
2102 if node == nullid:
2114 if node == nullid:
2103 raise RevlogError(_("%s: attempt to add null revision") %
2115 raise RevlogError(_("%s: attempt to add null revision") %
2104 (self.indexfile))
2116 (self.indexfile))
2105 if node == wdirid:
2117 if node == wdirid:
2106 raise RevlogError(_("%s: attempt to add wdir revision") %
2118 raise RevlogError(_("%s: attempt to add wdir revision") %
2107 (self.indexfile))
2119 (self.indexfile))
2108
2120
2109 if self._inline:
2121 if self._inline:
2110 fh = ifh
2122 fh = ifh
2111 else:
2123 else:
2112 fh = dfh
2124 fh = dfh
2113
2125
2114 btext = [rawtext]
2126 btext = [rawtext]
2115
2127
2116 curr = len(self)
2128 curr = len(self)
2117 prev = curr - 1
2129 prev = curr - 1
2118 offset = self.end(prev)
2130 offset = self.end(prev)
2119 p1r, p2r = self.rev(p1), self.rev(p2)
2131 p1r, p2r = self.rev(p1), self.rev(p2)
2120
2132
2121 # full versions are inserted when the needed deltas
2133 # full versions are inserted when the needed deltas
2122 # become comparable to the uncompressed text
2134 # become comparable to the uncompressed text
2123 if rawtext is None:
2135 if rawtext is None:
2124 # need rawtext size, before changed by flag processors, which is
2136 # need rawtext size, before changed by flag processors, which is
2125 # the non-raw size. use revlog explicitly to avoid filelog's extra
2137 # the non-raw size. use revlog explicitly to avoid filelog's extra
2126 # logic that might remove metadata size.
2138 # logic that might remove metadata size.
2127 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2139 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2128 cachedelta[1])
2140 cachedelta[1])
2129 else:
2141 else:
2130 textlen = len(rawtext)
2142 textlen = len(rawtext)
2131
2143
2132 if deltacomputer is None:
2144 if deltacomputer is None:
2133 deltacomputer = _deltacomputer(self)
2145 deltacomputer = _deltacomputer(self)
2134
2146
2135 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2147 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2136
2148
2137 # no delta for flag processor revision (see "candelta" for why)
2149 # no delta for flag processor revision (see "candelta" for why)
2138 # not calling candelta since only one revision needs test, also to
2150 # not calling candelta since only one revision needs test, also to
2139 # avoid overhead fetching flags again.
2151 # avoid overhead fetching flags again.
2140 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2152 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2141 deltainfo = None
2153 deltainfo = None
2142 else:
2154 else:
2143 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2155 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2144
2156
2145 if deltainfo is not None:
2157 if deltainfo is not None:
2146 base = deltainfo.base
2158 base = deltainfo.base
2147 chainbase = deltainfo.chainbase
2159 chainbase = deltainfo.chainbase
2148 data = deltainfo.data
2160 data = deltainfo.data
2149 l = deltainfo.deltalen
2161 l = deltainfo.deltalen
2150 else:
2162 else:
2151 rawtext = deltacomputer.buildtext(revinfo, fh)
2163 rawtext = deltacomputer.buildtext(revinfo, fh)
2152 data = self.compress(rawtext)
2164 data = self.compress(rawtext)
2153 l = len(data[1]) + len(data[0])
2165 l = len(data[1]) + len(data[0])
2154 base = chainbase = curr
2166 base = chainbase = curr
2155
2167
2156 e = (offset_type(offset, flags), l, textlen,
2168 e = (offset_type(offset, flags), l, textlen,
2157 base, link, p1r, p2r, node)
2169 base, link, p1r, p2r, node)
2158 self.index.insert(-1, e)
2170 self.index.insert(-1, e)
2159 self.nodemap[node] = curr
2171 self.nodemap[node] = curr
2160
2172
2161 entry = self._io.packentry(e, self.node, self.version, curr)
2173 entry = self._io.packentry(e, self.node, self.version, curr)
2162 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2174 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2163
2175
2164 if alwayscache and rawtext is None:
2176 if alwayscache and rawtext is None:
2165 rawtext = deltacomputer._buildtext(revinfo, fh)
2177 rawtext = deltacomputer._buildtext(revinfo, fh)
2166
2178
2167 if type(rawtext) == bytes: # only accept immutable objects
2179 if type(rawtext) == bytes: # only accept immutable objects
2168 self._cache = (node, curr, rawtext)
2180 self._cache = (node, curr, rawtext)
2169 self._chainbasecache[curr] = chainbase
2181 self._chainbasecache[curr] = chainbase
2170 return node
2182 return node
2171
2183
2172 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2184 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2173 # Files opened in a+ mode have inconsistent behavior on various
2185 # Files opened in a+ mode have inconsistent behavior on various
2174 # platforms. Windows requires that a file positioning call be made
2186 # platforms. Windows requires that a file positioning call be made
2175 # when the file handle transitions between reads and writes. See
2187 # when the file handle transitions between reads and writes. See
2176 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2188 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2177 # platforms, Python or the platform itself can be buggy. Some versions
2189 # platforms, Python or the platform itself can be buggy. Some versions
2178 # of Solaris have been observed to not append at the end of the file
2190 # of Solaris have been observed to not append at the end of the file
2179 # if the file was seeked to before the end. See issue4943 for more.
2191 # if the file was seeked to before the end. See issue4943 for more.
2180 #
2192 #
2181 # We work around this issue by inserting a seek() before writing.
2193 # We work around this issue by inserting a seek() before writing.
2182 # Note: This is likely not necessary on Python 3.
2194 # Note: This is likely not necessary on Python 3.
2183 ifh.seek(0, os.SEEK_END)
2195 ifh.seek(0, os.SEEK_END)
2184 if dfh:
2196 if dfh:
2185 dfh.seek(0, os.SEEK_END)
2197 dfh.seek(0, os.SEEK_END)
2186
2198
2187 curr = len(self) - 1
2199 curr = len(self) - 1
2188 if not self._inline:
2200 if not self._inline:
2189 transaction.add(self.datafile, offset)
2201 transaction.add(self.datafile, offset)
2190 transaction.add(self.indexfile, curr * len(entry))
2202 transaction.add(self.indexfile, curr * len(entry))
2191 if data[0]:
2203 if data[0]:
2192 dfh.write(data[0])
2204 dfh.write(data[0])
2193 dfh.write(data[1])
2205 dfh.write(data[1])
2194 ifh.write(entry)
2206 ifh.write(entry)
2195 else:
2207 else:
2196 offset += curr * self._io.size
2208 offset += curr * self._io.size
2197 transaction.add(self.indexfile, offset, curr)
2209 transaction.add(self.indexfile, offset, curr)
2198 ifh.write(entry)
2210 ifh.write(entry)
2199 ifh.write(data[0])
2211 ifh.write(data[0])
2200 ifh.write(data[1])
2212 ifh.write(data[1])
2201 self._enforceinlinesize(transaction, ifh)
2213 self._enforceinlinesize(transaction, ifh)
2202
2214
2203 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2215 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2204 """
2216 """
2205 add a delta group
2217 add a delta group
2206
2218
2207 given a set of deltas, add them to the revision log. the
2219 given a set of deltas, add them to the revision log. the
2208 first delta is against its parent, which should be in our
2220 first delta is against its parent, which should be in our
2209 log, the rest are against the previous delta.
2221 log, the rest are against the previous delta.
2210
2222
2211 If ``addrevisioncb`` is defined, it will be called with arguments of
2223 If ``addrevisioncb`` is defined, it will be called with arguments of
2212 this revlog and the node that was added.
2224 this revlog and the node that was added.
2213 """
2225 """
2214
2226
2215 nodes = []
2227 nodes = []
2216
2228
2217 r = len(self)
2229 r = len(self)
2218 end = 0
2230 end = 0
2219 if r:
2231 if r:
2220 end = self.end(r - 1)
2232 end = self.end(r - 1)
2221 ifh = self._indexfp("a+")
2233 ifh = self._indexfp("a+")
2222 isize = r * self._io.size
2234 isize = r * self._io.size
2223 if self._inline:
2235 if self._inline:
2224 transaction.add(self.indexfile, end + isize, r)
2236 transaction.add(self.indexfile, end + isize, r)
2225 dfh = None
2237 dfh = None
2226 else:
2238 else:
2227 transaction.add(self.indexfile, isize, r)
2239 transaction.add(self.indexfile, isize, r)
2228 transaction.add(self.datafile, end)
2240 transaction.add(self.datafile, end)
2229 dfh = self._datafp("a+")
2241 dfh = self._datafp("a+")
2230 def flush():
2242 def flush():
2231 if dfh:
2243 if dfh:
2232 dfh.flush()
2244 dfh.flush()
2233 ifh.flush()
2245 ifh.flush()
2234 try:
2246 try:
2235 deltacomputer = _deltacomputer(self)
2247 deltacomputer = _deltacomputer(self)
2236 # loop through our set of deltas
2248 # loop through our set of deltas
2237 for data in deltas:
2249 for data in deltas:
2238 node, p1, p2, linknode, deltabase, delta, flags = data
2250 node, p1, p2, linknode, deltabase, delta, flags = data
2239 link = linkmapper(linknode)
2251 link = linkmapper(linknode)
2240 flags = flags or REVIDX_DEFAULT_FLAGS
2252 flags = flags or REVIDX_DEFAULT_FLAGS
2241
2253
2242 nodes.append(node)
2254 nodes.append(node)
2243
2255
2244 if node in self.nodemap:
2256 if node in self.nodemap:
2245 # this can happen if two branches make the same change
2257 # this can happen if two branches make the same change
2246 continue
2258 continue
2247
2259
2248 for p in (p1, p2):
2260 for p in (p1, p2):
2249 if p not in self.nodemap:
2261 if p not in self.nodemap:
2250 raise LookupError(p, self.indexfile,
2262 raise LookupError(p, self.indexfile,
2251 _('unknown parent'))
2263 _('unknown parent'))
2252
2264
2253 if deltabase not in self.nodemap:
2265 if deltabase not in self.nodemap:
2254 raise LookupError(deltabase, self.indexfile,
2266 raise LookupError(deltabase, self.indexfile,
2255 _('unknown delta base'))
2267 _('unknown delta base'))
2256
2268
2257 baserev = self.rev(deltabase)
2269 baserev = self.rev(deltabase)
2258
2270
2259 if baserev != nullrev and self.iscensored(baserev):
2271 if baserev != nullrev and self.iscensored(baserev):
2260 # if base is censored, delta must be full replacement in a
2272 # if base is censored, delta must be full replacement in a
2261 # single patch operation
2273 # single patch operation
2262 hlen = struct.calcsize(">lll")
2274 hlen = struct.calcsize(">lll")
2263 oldlen = self.rawsize(baserev)
2275 oldlen = self.rawsize(baserev)
2264 newlen = len(delta) - hlen
2276 newlen = len(delta) - hlen
2265 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2277 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2266 raise error.CensoredBaseError(self.indexfile,
2278 raise error.CensoredBaseError(self.indexfile,
2267 self.node(baserev))
2279 self.node(baserev))
2268
2280
2269 if not flags and self._peek_iscensored(baserev, delta, flush):
2281 if not flags and self._peek_iscensored(baserev, delta, flush):
2270 flags |= REVIDX_ISCENSORED
2282 flags |= REVIDX_ISCENSORED
2271
2283
2272 # We assume consumers of addrevisioncb will want to retrieve
2284 # We assume consumers of addrevisioncb will want to retrieve
2273 # the added revision, which will require a call to
2285 # the added revision, which will require a call to
2274 # revision(). revision() will fast path if there is a cache
2286 # revision(). revision() will fast path if there is a cache
2275 # hit. So, we tell _addrevision() to always cache in this case.
2287 # hit. So, we tell _addrevision() to always cache in this case.
2276 # We're only using addgroup() in the context of changegroup
2288 # We're only using addgroup() in the context of changegroup
2277 # generation so the revision data can always be handled as raw
2289 # generation so the revision data can always be handled as raw
2278 # by the flagprocessor.
2290 # by the flagprocessor.
2279 self._addrevision(node, None, transaction, link,
2291 self._addrevision(node, None, transaction, link,
2280 p1, p2, flags, (baserev, delta),
2292 p1, p2, flags, (baserev, delta),
2281 ifh, dfh,
2293 ifh, dfh,
2282 alwayscache=bool(addrevisioncb),
2294 alwayscache=bool(addrevisioncb),
2283 deltacomputer=deltacomputer)
2295 deltacomputer=deltacomputer)
2284
2296
2285 if addrevisioncb:
2297 if addrevisioncb:
2286 addrevisioncb(self, node)
2298 addrevisioncb(self, node)
2287
2299
2288 if not dfh and not self._inline:
2300 if not dfh and not self._inline:
2289 # addrevision switched from inline to conventional
2301 # addrevision switched from inline to conventional
2290 # reopen the index
2302 # reopen the index
2291 ifh.close()
2303 ifh.close()
2292 dfh = self._datafp("a+")
2304 dfh = self._datafp("a+")
2293 ifh = self._indexfp("a+")
2305 ifh = self._indexfp("a+")
2294 finally:
2306 finally:
2295 if dfh:
2307 if dfh:
2296 dfh.close()
2308 dfh.close()
2297 ifh.close()
2309 ifh.close()
2298
2310
2299 return nodes
2311 return nodes
2300
2312
2301 def iscensored(self, rev):
2313 def iscensored(self, rev):
2302 """Check if a file revision is censored."""
2314 """Check if a file revision is censored."""
2303 return False
2315 if not self._censorable:
2316 return False
2317
2318 return self.flags(rev) & REVIDX_ISCENSORED
2304
2319
2305 def _peek_iscensored(self, baserev, delta, flush):
2320 def _peek_iscensored(self, baserev, delta, flush):
2306 """Quickly check if a delta produces a censored revision."""
2321 """Quickly check if a delta produces a censored revision."""
2307 return False
2322 if not self._censorable:
2323 return False
2324
2325 # Fragile heuristic: unless new file meta keys are added alphabetically
2326 # preceding "censored", all censored revisions are prefixed by
2327 # "\1\ncensored:". A delta producing such a censored revision must be a
2328 # full-replacement delta, so we inspect the first and only patch in the
2329 # delta for this prefix.
2330 hlen = struct.calcsize(">lll")
2331 if len(delta) <= hlen:
2332 return False
2333
2334 oldlen = self.rawsize(baserev)
2335 newlen = len(delta) - hlen
2336 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2337 return False
2338
2339 add = "\1\ncensored:"
2340 addlen = len(add)
2341 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2308
2342
2309 def getstrippoint(self, minlink):
2343 def getstrippoint(self, minlink):
2310 """find the minimum rev that must be stripped to strip the linkrev
2344 """find the minimum rev that must be stripped to strip the linkrev
2311
2345
2312 Returns a tuple containing the minimum rev and a set of all revs that
2346 Returns a tuple containing the minimum rev and a set of all revs that
2313 have linkrevs that will be broken by this strip.
2347 have linkrevs that will be broken by this strip.
2314 """
2348 """
2315 brokenrevs = set()
2349 brokenrevs = set()
2316 strippoint = len(self)
2350 strippoint = len(self)
2317
2351
2318 heads = {}
2352 heads = {}
2319 futurelargelinkrevs = set()
2353 futurelargelinkrevs = set()
2320 for head in self.headrevs():
2354 for head in self.headrevs():
2321 headlinkrev = self.linkrev(head)
2355 headlinkrev = self.linkrev(head)
2322 heads[head] = headlinkrev
2356 heads[head] = headlinkrev
2323 if headlinkrev >= minlink:
2357 if headlinkrev >= minlink:
2324 futurelargelinkrevs.add(headlinkrev)
2358 futurelargelinkrevs.add(headlinkrev)
2325
2359
2326 # This algorithm involves walking down the rev graph, starting at the
2360 # This algorithm involves walking down the rev graph, starting at the
2327 # heads. Since the revs are topologically sorted according to linkrev,
2361 # heads. Since the revs are topologically sorted according to linkrev,
2328 # once all head linkrevs are below the minlink, we know there are
2362 # once all head linkrevs are below the minlink, we know there are
2329 # no more revs that could have a linkrev greater than minlink.
2363 # no more revs that could have a linkrev greater than minlink.
2330 # So we can stop walking.
2364 # So we can stop walking.
2331 while futurelargelinkrevs:
2365 while futurelargelinkrevs:
2332 strippoint -= 1
2366 strippoint -= 1
2333 linkrev = heads.pop(strippoint)
2367 linkrev = heads.pop(strippoint)
2334
2368
2335 if linkrev < minlink:
2369 if linkrev < minlink:
2336 brokenrevs.add(strippoint)
2370 brokenrevs.add(strippoint)
2337 else:
2371 else:
2338 futurelargelinkrevs.remove(linkrev)
2372 futurelargelinkrevs.remove(linkrev)
2339
2373
2340 for p in self.parentrevs(strippoint):
2374 for p in self.parentrevs(strippoint):
2341 if p != nullrev:
2375 if p != nullrev:
2342 plinkrev = self.linkrev(p)
2376 plinkrev = self.linkrev(p)
2343 heads[p] = plinkrev
2377 heads[p] = plinkrev
2344 if plinkrev >= minlink:
2378 if plinkrev >= minlink:
2345 futurelargelinkrevs.add(plinkrev)
2379 futurelargelinkrevs.add(plinkrev)
2346
2380
2347 return strippoint, brokenrevs
2381 return strippoint, brokenrevs
2348
2382
2349 def strip(self, minlink, transaction):
2383 def strip(self, minlink, transaction):
2350 """truncate the revlog on the first revision with a linkrev >= minlink
2384 """truncate the revlog on the first revision with a linkrev >= minlink
2351
2385
2352 This function is called when we're stripping revision minlink and
2386 This function is called when we're stripping revision minlink and
2353 its descendants from the repository.
2387 its descendants from the repository.
2354
2388
2355 We have to remove all revisions with linkrev >= minlink, because
2389 We have to remove all revisions with linkrev >= minlink, because
2356 the equivalent changelog revisions will be renumbered after the
2390 the equivalent changelog revisions will be renumbered after the
2357 strip.
2391 strip.
2358
2392
2359 So we truncate the revlog on the first of these revisions, and
2393 So we truncate the revlog on the first of these revisions, and
2360 trust that the caller has saved the revisions that shouldn't be
2394 trust that the caller has saved the revisions that shouldn't be
2361 removed and that it'll re-add them after this truncation.
2395 removed and that it'll re-add them after this truncation.
2362 """
2396 """
2363 if len(self) == 0:
2397 if len(self) == 0:
2364 return
2398 return
2365
2399
2366 rev, _ = self.getstrippoint(minlink)
2400 rev, _ = self.getstrippoint(minlink)
2367 if rev == len(self):
2401 if rev == len(self):
2368 return
2402 return
2369
2403
2370 # first truncate the files on disk
2404 # first truncate the files on disk
2371 end = self.start(rev)
2405 end = self.start(rev)
2372 if not self._inline:
2406 if not self._inline:
2373 transaction.add(self.datafile, end)
2407 transaction.add(self.datafile, end)
2374 end = rev * self._io.size
2408 end = rev * self._io.size
2375 else:
2409 else:
2376 end += rev * self._io.size
2410 end += rev * self._io.size
2377
2411
2378 transaction.add(self.indexfile, end)
2412 transaction.add(self.indexfile, end)
2379
2413
2380 # then reset internal state in memory to forget those revisions
2414 # then reset internal state in memory to forget those revisions
2381 self._cache = None
2415 self._cache = None
2382 self._chaininfocache = {}
2416 self._chaininfocache = {}
2383 self._chunkclear()
2417 self._chunkclear()
2384 for x in xrange(rev, len(self)):
2418 for x in xrange(rev, len(self)):
2385 del self.nodemap[self.node(x)]
2419 del self.nodemap[self.node(x)]
2386
2420
2387 del self.index[rev:-1]
2421 del self.index[rev:-1]
2388
2422
2389 def checksize(self):
2423 def checksize(self):
2390 expected = 0
2424 expected = 0
2391 if len(self):
2425 if len(self):
2392 expected = max(0, self.end(len(self) - 1))
2426 expected = max(0, self.end(len(self) - 1))
2393
2427
2394 try:
2428 try:
2395 with self._datafp() as f:
2429 with self._datafp() as f:
2396 f.seek(0, 2)
2430 f.seek(0, 2)
2397 actual = f.tell()
2431 actual = f.tell()
2398 dd = actual - expected
2432 dd = actual - expected
2399 except IOError as inst:
2433 except IOError as inst:
2400 if inst.errno != errno.ENOENT:
2434 if inst.errno != errno.ENOENT:
2401 raise
2435 raise
2402 dd = 0
2436 dd = 0
2403
2437
2404 try:
2438 try:
2405 f = self.opener(self.indexfile)
2439 f = self.opener(self.indexfile)
2406 f.seek(0, 2)
2440 f.seek(0, 2)
2407 actual = f.tell()
2441 actual = f.tell()
2408 f.close()
2442 f.close()
2409 s = self._io.size
2443 s = self._io.size
2410 i = max(0, actual // s)
2444 i = max(0, actual // s)
2411 di = actual - (i * s)
2445 di = actual - (i * s)
2412 if self._inline:
2446 if self._inline:
2413 databytes = 0
2447 databytes = 0
2414 for r in self:
2448 for r in self:
2415 databytes += max(0, self.length(r))
2449 databytes += max(0, self.length(r))
2416 dd = 0
2450 dd = 0
2417 di = actual - len(self) * s - databytes
2451 di = actual - len(self) * s - databytes
2418 except IOError as inst:
2452 except IOError as inst:
2419 if inst.errno != errno.ENOENT:
2453 if inst.errno != errno.ENOENT:
2420 raise
2454 raise
2421 di = 0
2455 di = 0
2422
2456
2423 return (dd, di)
2457 return (dd, di)
2424
2458
2425 def files(self):
2459 def files(self):
2426 res = [self.indexfile]
2460 res = [self.indexfile]
2427 if not self._inline:
2461 if not self._inline:
2428 res.append(self.datafile)
2462 res.append(self.datafile)
2429 return res
2463 return res
2430
2464
2431 DELTAREUSEALWAYS = 'always'
2465 DELTAREUSEALWAYS = 'always'
2432 DELTAREUSESAMEREVS = 'samerevs'
2466 DELTAREUSESAMEREVS = 'samerevs'
2433 DELTAREUSENEVER = 'never'
2467 DELTAREUSENEVER = 'never'
2434
2468
2435 DELTAREUSEFULLADD = 'fulladd'
2469 DELTAREUSEFULLADD = 'fulladd'
2436
2470
2437 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2471 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2438
2472
2439 def clone(self, tr, destrevlog, addrevisioncb=None,
2473 def clone(self, tr, destrevlog, addrevisioncb=None,
2440 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2474 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2441 """Copy this revlog to another, possibly with format changes.
2475 """Copy this revlog to another, possibly with format changes.
2442
2476
2443 The destination revlog will contain the same revisions and nodes.
2477 The destination revlog will contain the same revisions and nodes.
2444 However, it may not be bit-for-bit identical due to e.g. delta encoding
2478 However, it may not be bit-for-bit identical due to e.g. delta encoding
2445 differences.
2479 differences.
2446
2480
2447 The ``deltareuse`` argument control how deltas from the existing revlog
2481 The ``deltareuse`` argument control how deltas from the existing revlog
2448 are preserved in the destination revlog. The argument can have the
2482 are preserved in the destination revlog. The argument can have the
2449 following values:
2483 following values:
2450
2484
2451 DELTAREUSEALWAYS
2485 DELTAREUSEALWAYS
2452 Deltas will always be reused (if possible), even if the destination
2486 Deltas will always be reused (if possible), even if the destination
2453 revlog would not select the same revisions for the delta. This is the
2487 revlog would not select the same revisions for the delta. This is the
2454 fastest mode of operation.
2488 fastest mode of operation.
2455 DELTAREUSESAMEREVS
2489 DELTAREUSESAMEREVS
2456 Deltas will be reused if the destination revlog would pick the same
2490 Deltas will be reused if the destination revlog would pick the same
2457 revisions for the delta. This mode strikes a balance between speed
2491 revisions for the delta. This mode strikes a balance between speed
2458 and optimization.
2492 and optimization.
2459 DELTAREUSENEVER
2493 DELTAREUSENEVER
2460 Deltas will never be reused. This is the slowest mode of execution.
2494 Deltas will never be reused. This is the slowest mode of execution.
2461 This mode can be used to recompute deltas (e.g. if the diff/delta
2495 This mode can be used to recompute deltas (e.g. if the diff/delta
2462 algorithm changes).
2496 algorithm changes).
2463
2497
2464 Delta computation can be slow, so the choice of delta reuse policy can
2498 Delta computation can be slow, so the choice of delta reuse policy can
2465 significantly affect run time.
2499 significantly affect run time.
2466
2500
2467 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2501 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2468 two extremes. Deltas will be reused if they are appropriate. But if the
2502 two extremes. Deltas will be reused if they are appropriate. But if the
2469 delta could choose a better revision, it will do so. This means if you
2503 delta could choose a better revision, it will do so. This means if you
2470 are converting a non-generaldelta revlog to a generaldelta revlog,
2504 are converting a non-generaldelta revlog to a generaldelta revlog,
2471 deltas will be recomputed if the delta's parent isn't a parent of the
2505 deltas will be recomputed if the delta's parent isn't a parent of the
2472 revision.
2506 revision.
2473
2507
2474 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2508 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2475 controls whether to compute deltas against both parents for merges.
2509 controls whether to compute deltas against both parents for merges.
2476 By default, the current default is used.
2510 By default, the current default is used.
2477 """
2511 """
2478 if deltareuse not in self.DELTAREUSEALL:
2512 if deltareuse not in self.DELTAREUSEALL:
2479 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2513 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2480
2514
2481 if len(destrevlog):
2515 if len(destrevlog):
2482 raise ValueError(_('destination revlog is not empty'))
2516 raise ValueError(_('destination revlog is not empty'))
2483
2517
2484 if getattr(self, 'filteredrevs', None):
2518 if getattr(self, 'filteredrevs', None):
2485 raise ValueError(_('source revlog has filtered revisions'))
2519 raise ValueError(_('source revlog has filtered revisions'))
2486 if getattr(destrevlog, 'filteredrevs', None):
2520 if getattr(destrevlog, 'filteredrevs', None):
2487 raise ValueError(_('destination revlog has filtered revisions'))
2521 raise ValueError(_('destination revlog has filtered revisions'))
2488
2522
2489 # lazydeltabase controls whether to reuse a cached delta, if possible.
2523 # lazydeltabase controls whether to reuse a cached delta, if possible.
2490 oldlazydeltabase = destrevlog._lazydeltabase
2524 oldlazydeltabase = destrevlog._lazydeltabase
2491 oldamd = destrevlog._aggressivemergedeltas
2525 oldamd = destrevlog._aggressivemergedeltas
2492
2526
2493 try:
2527 try:
2494 if deltareuse == self.DELTAREUSEALWAYS:
2528 if deltareuse == self.DELTAREUSEALWAYS:
2495 destrevlog._lazydeltabase = True
2529 destrevlog._lazydeltabase = True
2496 elif deltareuse == self.DELTAREUSESAMEREVS:
2530 elif deltareuse == self.DELTAREUSESAMEREVS:
2497 destrevlog._lazydeltabase = False
2531 destrevlog._lazydeltabase = False
2498
2532
2499 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2533 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2500
2534
2501 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2535 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2502 self.DELTAREUSESAMEREVS)
2536 self.DELTAREUSESAMEREVS)
2503
2537
2504 deltacomputer = _deltacomputer(destrevlog)
2538 deltacomputer = _deltacomputer(destrevlog)
2505 index = self.index
2539 index = self.index
2506 for rev in self:
2540 for rev in self:
2507 entry = index[rev]
2541 entry = index[rev]
2508
2542
2509 # Some classes override linkrev to take filtered revs into
2543 # Some classes override linkrev to take filtered revs into
2510 # account. Use raw entry from index.
2544 # account. Use raw entry from index.
2511 flags = entry[0] & 0xffff
2545 flags = entry[0] & 0xffff
2512 linkrev = entry[4]
2546 linkrev = entry[4]
2513 p1 = index[entry[5]][7]
2547 p1 = index[entry[5]][7]
2514 p2 = index[entry[6]][7]
2548 p2 = index[entry[6]][7]
2515 node = entry[7]
2549 node = entry[7]
2516
2550
2517 # (Possibly) reuse the delta from the revlog if allowed and
2551 # (Possibly) reuse the delta from the revlog if allowed and
2518 # the revlog chunk is a delta.
2552 # the revlog chunk is a delta.
2519 cachedelta = None
2553 cachedelta = None
2520 rawtext = None
2554 rawtext = None
2521 if populatecachedelta:
2555 if populatecachedelta:
2522 dp = self.deltaparent(rev)
2556 dp = self.deltaparent(rev)
2523 if dp != nullrev:
2557 if dp != nullrev:
2524 cachedelta = (dp, bytes(self._chunk(rev)))
2558 cachedelta = (dp, bytes(self._chunk(rev)))
2525
2559
2526 if not cachedelta:
2560 if not cachedelta:
2527 rawtext = self.revision(rev, raw=True)
2561 rawtext = self.revision(rev, raw=True)
2528
2562
2529
2563
2530 if deltareuse == self.DELTAREUSEFULLADD:
2564 if deltareuse == self.DELTAREUSEFULLADD:
2531 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2565 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2532 cachedelta=cachedelta,
2566 cachedelta=cachedelta,
2533 node=node, flags=flags,
2567 node=node, flags=flags,
2534 deltacomputer=deltacomputer)
2568 deltacomputer=deltacomputer)
2535 else:
2569 else:
2536 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2570 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2537 checkambig=False)
2571 checkambig=False)
2538 dfh = None
2572 dfh = None
2539 if not destrevlog._inline:
2573 if not destrevlog._inline:
2540 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2574 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2541 try:
2575 try:
2542 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2576 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2543 p2, flags, cachedelta, ifh, dfh,
2577 p2, flags, cachedelta, ifh, dfh,
2544 deltacomputer=deltacomputer)
2578 deltacomputer=deltacomputer)
2545 finally:
2579 finally:
2546 if dfh:
2580 if dfh:
2547 dfh.close()
2581 dfh.close()
2548 ifh.close()
2582 ifh.close()
2549
2583
2550 if addrevisioncb:
2584 if addrevisioncb:
2551 addrevisioncb(self, rev, node)
2585 addrevisioncb(self, rev, node)
2552 finally:
2586 finally:
2553 destrevlog._lazydeltabase = oldlazydeltabase
2587 destrevlog._lazydeltabase = oldlazydeltabase
2554 destrevlog._aggressivemergedeltas = oldamd
2588 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now