##// END OF EJS Templates
revlog: move check for wdir from changelog to revlog...
Martin von Zweigbergk -
r34029:bfb38c5c default
parent child Browse files
Show More
@@ -1,547 +1,543
1 # changelog.py - changelog class for mercurial
1 # changelog.py - changelog class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import (
13 from .node import (
14 bin,
14 bin,
15 hex,
15 hex,
16 nullid,
16 nullid,
17 wdirid,
18 )
17 )
19
18
20 from . import (
19 from . import (
21 encoding,
20 encoding,
22 error,
21 error,
23 revlog,
22 revlog,
24 util,
23 util,
25 )
24 )
26
25
27 _defaultextra = {'branch': 'default'}
26 _defaultextra = {'branch': 'default'}
28
27
29 def _string_escape(text):
28 def _string_escape(text):
30 """
29 """
31 >>> d = {'nl': chr(10), 'bs': chr(92), 'cr': chr(13), 'nul': chr(0)}
30 >>> d = {'nl': chr(10), 'bs': chr(92), 'cr': chr(13), 'nul': chr(0)}
32 >>> s = "ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
31 >>> s = "ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
33 >>> s
32 >>> s
34 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
33 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
35 >>> res = _string_escape(s)
34 >>> res = _string_escape(s)
36 >>> s == util.unescapestr(res)
35 >>> s == util.unescapestr(res)
37 True
36 True
38 """
37 """
39 # subset of the string_escape codec
38 # subset of the string_escape codec
40 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
39 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
41 return text.replace('\0', '\\0')
40 return text.replace('\0', '\\0')
42
41
43 def decodeextra(text):
42 def decodeextra(text):
44 """
43 """
45 >>> sorted(decodeextra(encodeextra({'foo': 'bar', 'baz': chr(0) + '2'})
44 >>> sorted(decodeextra(encodeextra({'foo': 'bar', 'baz': chr(0) + '2'})
46 ... ).iteritems())
45 ... ).iteritems())
47 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
46 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
48 >>> sorted(decodeextra(encodeextra({'foo': 'bar',
47 >>> sorted(decodeextra(encodeextra({'foo': 'bar',
49 ... 'baz': chr(92) + chr(0) + '2'})
48 ... 'baz': chr(92) + chr(0) + '2'})
50 ... ).iteritems())
49 ... ).iteritems())
51 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
50 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
52 """
51 """
53 extra = _defaultextra.copy()
52 extra = _defaultextra.copy()
54 for l in text.split('\0'):
53 for l in text.split('\0'):
55 if l:
54 if l:
56 if '\\0' in l:
55 if '\\0' in l:
57 # fix up \0 without getting into trouble with \\0
56 # fix up \0 without getting into trouble with \\0
58 l = l.replace('\\\\', '\\\\\n')
57 l = l.replace('\\\\', '\\\\\n')
59 l = l.replace('\\0', '\0')
58 l = l.replace('\\0', '\0')
60 l = l.replace('\n', '')
59 l = l.replace('\n', '')
61 k, v = util.unescapestr(l).split(':', 1)
60 k, v = util.unescapestr(l).split(':', 1)
62 extra[k] = v
61 extra[k] = v
63 return extra
62 return extra
64
63
65 def encodeextra(d):
64 def encodeextra(d):
66 # keys must be sorted to produce a deterministic changelog entry
65 # keys must be sorted to produce a deterministic changelog entry
67 items = [_string_escape('%s:%s' % (k, d[k])) for k in sorted(d)]
66 items = [_string_escape('%s:%s' % (k, d[k])) for k in sorted(d)]
68 return "\0".join(items)
67 return "\0".join(items)
69
68
70 def stripdesc(desc):
69 def stripdesc(desc):
71 """strip trailing whitespace and leading and trailing empty lines"""
70 """strip trailing whitespace and leading and trailing empty lines"""
72 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
71 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
73
72
74 class appender(object):
73 class appender(object):
75 '''the changelog index must be updated last on disk, so we use this class
74 '''the changelog index must be updated last on disk, so we use this class
76 to delay writes to it'''
75 to delay writes to it'''
77 def __init__(self, vfs, name, mode, buf):
76 def __init__(self, vfs, name, mode, buf):
78 self.data = buf
77 self.data = buf
79 fp = vfs(name, mode)
78 fp = vfs(name, mode)
80 self.fp = fp
79 self.fp = fp
81 self.offset = fp.tell()
80 self.offset = fp.tell()
82 self.size = vfs.fstat(fp).st_size
81 self.size = vfs.fstat(fp).st_size
83 self._end = self.size
82 self._end = self.size
84
83
85 def end(self):
84 def end(self):
86 return self._end
85 return self._end
87 def tell(self):
86 def tell(self):
88 return self.offset
87 return self.offset
89 def flush(self):
88 def flush(self):
90 pass
89 pass
91 def close(self):
90 def close(self):
92 self.fp.close()
91 self.fp.close()
93
92
94 def seek(self, offset, whence=0):
93 def seek(self, offset, whence=0):
95 '''virtual file offset spans real file and data'''
94 '''virtual file offset spans real file and data'''
96 if whence == 0:
95 if whence == 0:
97 self.offset = offset
96 self.offset = offset
98 elif whence == 1:
97 elif whence == 1:
99 self.offset += offset
98 self.offset += offset
100 elif whence == 2:
99 elif whence == 2:
101 self.offset = self.end() + offset
100 self.offset = self.end() + offset
102 if self.offset < self.size:
101 if self.offset < self.size:
103 self.fp.seek(self.offset)
102 self.fp.seek(self.offset)
104
103
105 def read(self, count=-1):
104 def read(self, count=-1):
106 '''only trick here is reads that span real file and data'''
105 '''only trick here is reads that span real file and data'''
107 ret = ""
106 ret = ""
108 if self.offset < self.size:
107 if self.offset < self.size:
109 s = self.fp.read(count)
108 s = self.fp.read(count)
110 ret = s
109 ret = s
111 self.offset += len(s)
110 self.offset += len(s)
112 if count > 0:
111 if count > 0:
113 count -= len(s)
112 count -= len(s)
114 if count != 0:
113 if count != 0:
115 doff = self.offset - self.size
114 doff = self.offset - self.size
116 self.data.insert(0, "".join(self.data))
115 self.data.insert(0, "".join(self.data))
117 del self.data[1:]
116 del self.data[1:]
118 s = self.data[0][doff:doff + count]
117 s = self.data[0][doff:doff + count]
119 self.offset += len(s)
118 self.offset += len(s)
120 ret += s
119 ret += s
121 return ret
120 return ret
122
121
123 def write(self, s):
122 def write(self, s):
124 self.data.append(bytes(s))
123 self.data.append(bytes(s))
125 self.offset += len(s)
124 self.offset += len(s)
126 self._end += len(s)
125 self._end += len(s)
127
126
128 def _divertopener(opener, target):
127 def _divertopener(opener, target):
129 """build an opener that writes in 'target.a' instead of 'target'"""
128 """build an opener that writes in 'target.a' instead of 'target'"""
130 def _divert(name, mode='r', checkambig=False):
129 def _divert(name, mode='r', checkambig=False):
131 if name != target:
130 if name != target:
132 return opener(name, mode)
131 return opener(name, mode)
133 return opener(name + ".a", mode)
132 return opener(name + ".a", mode)
134 return _divert
133 return _divert
135
134
136 def _delayopener(opener, target, buf):
135 def _delayopener(opener, target, buf):
137 """build an opener that stores chunks in 'buf' instead of 'target'"""
136 """build an opener that stores chunks in 'buf' instead of 'target'"""
138 def _delay(name, mode='r', checkambig=False):
137 def _delay(name, mode='r', checkambig=False):
139 if name != target:
138 if name != target:
140 return opener(name, mode)
139 return opener(name, mode)
141 return appender(opener, name, mode, buf)
140 return appender(opener, name, mode, buf)
142 return _delay
141 return _delay
143
142
144 _changelogrevision = collections.namedtuple(u'changelogrevision',
143 _changelogrevision = collections.namedtuple(u'changelogrevision',
145 (u'manifest', u'user', u'date',
144 (u'manifest', u'user', u'date',
146 u'files', u'description',
145 u'files', u'description',
147 u'extra'))
146 u'extra'))
148
147
149 class changelogrevision(object):
148 class changelogrevision(object):
150 """Holds results of a parsed changelog revision.
149 """Holds results of a parsed changelog revision.
151
150
152 Changelog revisions consist of multiple pieces of data, including
151 Changelog revisions consist of multiple pieces of data, including
153 the manifest node, user, and date. This object exposes a view into
152 the manifest node, user, and date. This object exposes a view into
154 the parsed object.
153 the parsed object.
155 """
154 """
156
155
157 __slots__ = (
156 __slots__ = (
158 u'_offsets',
157 u'_offsets',
159 u'_text',
158 u'_text',
160 )
159 )
161
160
162 def __new__(cls, text):
161 def __new__(cls, text):
163 if not text:
162 if not text:
164 return _changelogrevision(
163 return _changelogrevision(
165 manifest=nullid,
164 manifest=nullid,
166 user='',
165 user='',
167 date=(0, 0),
166 date=(0, 0),
168 files=[],
167 files=[],
169 description='',
168 description='',
170 extra=_defaultextra,
169 extra=_defaultextra,
171 )
170 )
172
171
173 self = super(changelogrevision, cls).__new__(cls)
172 self = super(changelogrevision, cls).__new__(cls)
174 # We could return here and implement the following as an __init__.
173 # We could return here and implement the following as an __init__.
175 # But doing it here is equivalent and saves an extra function call.
174 # But doing it here is equivalent and saves an extra function call.
176
175
177 # format used:
176 # format used:
178 # nodeid\n : manifest node in ascii
177 # nodeid\n : manifest node in ascii
179 # user\n : user, no \n or \r allowed
178 # user\n : user, no \n or \r allowed
180 # time tz extra\n : date (time is int or float, timezone is int)
179 # time tz extra\n : date (time is int or float, timezone is int)
181 # : extra is metadata, encoded and separated by '\0'
180 # : extra is metadata, encoded and separated by '\0'
182 # : older versions ignore it
181 # : older versions ignore it
183 # files\n\n : files modified by the cset, no \n or \r allowed
182 # files\n\n : files modified by the cset, no \n or \r allowed
184 # (.*) : comment (free text, ideally utf-8)
183 # (.*) : comment (free text, ideally utf-8)
185 #
184 #
186 # changelog v0 doesn't use extra
185 # changelog v0 doesn't use extra
187
186
188 nl1 = text.index('\n')
187 nl1 = text.index('\n')
189 nl2 = text.index('\n', nl1 + 1)
188 nl2 = text.index('\n', nl1 + 1)
190 nl3 = text.index('\n', nl2 + 1)
189 nl3 = text.index('\n', nl2 + 1)
191
190
192 # The list of files may be empty. Which means nl3 is the first of the
191 # The list of files may be empty. Which means nl3 is the first of the
193 # double newline that precedes the description.
192 # double newline that precedes the description.
194 if text[nl3 + 1:nl3 + 2] == '\n':
193 if text[nl3 + 1:nl3 + 2] == '\n':
195 doublenl = nl3
194 doublenl = nl3
196 else:
195 else:
197 doublenl = text.index('\n\n', nl3 + 1)
196 doublenl = text.index('\n\n', nl3 + 1)
198
197
199 self._offsets = (nl1, nl2, nl3, doublenl)
198 self._offsets = (nl1, nl2, nl3, doublenl)
200 self._text = text
199 self._text = text
201
200
202 return self
201 return self
203
202
204 @property
203 @property
205 def manifest(self):
204 def manifest(self):
206 return bin(self._text[0:self._offsets[0]])
205 return bin(self._text[0:self._offsets[0]])
207
206
208 @property
207 @property
209 def user(self):
208 def user(self):
210 off = self._offsets
209 off = self._offsets
211 return encoding.tolocal(self._text[off[0] + 1:off[1]])
210 return encoding.tolocal(self._text[off[0] + 1:off[1]])
212
211
213 @property
212 @property
214 def _rawdate(self):
213 def _rawdate(self):
215 off = self._offsets
214 off = self._offsets
216 dateextra = self._text[off[1] + 1:off[2]]
215 dateextra = self._text[off[1] + 1:off[2]]
217 return dateextra.split(' ', 2)[0:2]
216 return dateextra.split(' ', 2)[0:2]
218
217
219 @property
218 @property
220 def _rawextra(self):
219 def _rawextra(self):
221 off = self._offsets
220 off = self._offsets
222 dateextra = self._text[off[1] + 1:off[2]]
221 dateextra = self._text[off[1] + 1:off[2]]
223 fields = dateextra.split(' ', 2)
222 fields = dateextra.split(' ', 2)
224 if len(fields) != 3:
223 if len(fields) != 3:
225 return None
224 return None
226
225
227 return fields[2]
226 return fields[2]
228
227
229 @property
228 @property
230 def date(self):
229 def date(self):
231 raw = self._rawdate
230 raw = self._rawdate
232 time = float(raw[0])
231 time = float(raw[0])
233 # Various tools did silly things with the timezone.
232 # Various tools did silly things with the timezone.
234 try:
233 try:
235 timezone = int(raw[1])
234 timezone = int(raw[1])
236 except ValueError:
235 except ValueError:
237 timezone = 0
236 timezone = 0
238
237
239 return time, timezone
238 return time, timezone
240
239
241 @property
240 @property
242 def extra(self):
241 def extra(self):
243 raw = self._rawextra
242 raw = self._rawextra
244 if raw is None:
243 if raw is None:
245 return _defaultextra
244 return _defaultextra
246
245
247 return decodeextra(raw)
246 return decodeextra(raw)
248
247
249 @property
248 @property
250 def files(self):
249 def files(self):
251 off = self._offsets
250 off = self._offsets
252 if off[2] == off[3]:
251 if off[2] == off[3]:
253 return []
252 return []
254
253
255 return self._text[off[2] + 1:off[3]].split('\n')
254 return self._text[off[2] + 1:off[3]].split('\n')
256
255
257 @property
256 @property
258 def description(self):
257 def description(self):
259 return encoding.tolocal(self._text[self._offsets[3] + 2:])
258 return encoding.tolocal(self._text[self._offsets[3] + 2:])
260
259
261 class changelog(revlog.revlog):
260 class changelog(revlog.revlog):
262 def __init__(self, opener, trypending=False):
261 def __init__(self, opener, trypending=False):
263 """Load a changelog revlog using an opener.
262 """Load a changelog revlog using an opener.
264
263
265 If ``trypending`` is true, we attempt to load the index from a
264 If ``trypending`` is true, we attempt to load the index from a
266 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
265 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
267 The ``00changelog.i.a`` file contains index (and possibly inline
266 The ``00changelog.i.a`` file contains index (and possibly inline
268 revision) data for a transaction that hasn't been finalized yet.
267 revision) data for a transaction that hasn't been finalized yet.
269 It exists in a separate file to facilitate readers (such as
268 It exists in a separate file to facilitate readers (such as
270 hooks processes) accessing data before a transaction is finalized.
269 hooks processes) accessing data before a transaction is finalized.
271 """
270 """
272 if trypending and opener.exists('00changelog.i.a'):
271 if trypending and opener.exists('00changelog.i.a'):
273 indexfile = '00changelog.i.a'
272 indexfile = '00changelog.i.a'
274 else:
273 else:
275 indexfile = '00changelog.i'
274 indexfile = '00changelog.i'
276
275
277 datafile = '00changelog.d'
276 datafile = '00changelog.d'
278 revlog.revlog.__init__(self, opener, indexfile, datafile=datafile,
277 revlog.revlog.__init__(self, opener, indexfile, datafile=datafile,
279 checkambig=True)
278 checkambig=True)
280
279
281 if self._initempty:
280 if self._initempty:
282 # changelogs don't benefit from generaldelta
281 # changelogs don't benefit from generaldelta
283 self.version &= ~revlog.FLAG_GENERALDELTA
282 self.version &= ~revlog.FLAG_GENERALDELTA
284 self._generaldelta = False
283 self._generaldelta = False
285
284
286 # Delta chains for changelogs tend to be very small because entries
285 # Delta chains for changelogs tend to be very small because entries
287 # tend to be small and don't delta well with each. So disable delta
286 # tend to be small and don't delta well with each. So disable delta
288 # chains.
287 # chains.
289 self.storedeltachains = False
288 self.storedeltachains = False
290
289
291 self._realopener = opener
290 self._realopener = opener
292 self._delayed = False
291 self._delayed = False
293 self._delaybuf = None
292 self._delaybuf = None
294 self._divert = False
293 self._divert = False
295 self.filteredrevs = frozenset()
294 self.filteredrevs = frozenset()
296
295
297 def tip(self):
296 def tip(self):
298 """filtered version of revlog.tip"""
297 """filtered version of revlog.tip"""
299 for i in xrange(len(self) -1, -2, -1):
298 for i in xrange(len(self) -1, -2, -1):
300 if i not in self.filteredrevs:
299 if i not in self.filteredrevs:
301 return self.node(i)
300 return self.node(i)
302
301
303 def __contains__(self, rev):
302 def __contains__(self, rev):
304 """filtered version of revlog.__contains__"""
303 """filtered version of revlog.__contains__"""
305 return (0 <= rev < len(self)
304 return (0 <= rev < len(self)
306 and rev not in self.filteredrevs)
305 and rev not in self.filteredrevs)
307
306
308 def __iter__(self):
307 def __iter__(self):
309 """filtered version of revlog.__iter__"""
308 """filtered version of revlog.__iter__"""
310 if len(self.filteredrevs) == 0:
309 if len(self.filteredrevs) == 0:
311 return revlog.revlog.__iter__(self)
310 return revlog.revlog.__iter__(self)
312
311
313 def filterediter():
312 def filterediter():
314 for i in xrange(len(self)):
313 for i in xrange(len(self)):
315 if i not in self.filteredrevs:
314 if i not in self.filteredrevs:
316 yield i
315 yield i
317
316
318 return filterediter()
317 return filterediter()
319
318
320 def revs(self, start=0, stop=None):
319 def revs(self, start=0, stop=None):
321 """filtered version of revlog.revs"""
320 """filtered version of revlog.revs"""
322 for i in super(changelog, self).revs(start, stop):
321 for i in super(changelog, self).revs(start, stop):
323 if i not in self.filteredrevs:
322 if i not in self.filteredrevs:
324 yield i
323 yield i
325
324
326 @util.propertycache
325 @util.propertycache
327 def nodemap(self):
326 def nodemap(self):
328 # XXX need filtering too
327 # XXX need filtering too
329 self.rev(self.node(0))
328 self.rev(self.node(0))
330 return self._nodecache
329 return self._nodecache
331
330
332 def reachableroots(self, minroot, heads, roots, includepath=False):
331 def reachableroots(self, minroot, heads, roots, includepath=False):
333 return self.index.reachableroots2(minroot, heads, roots, includepath)
332 return self.index.reachableroots2(minroot, heads, roots, includepath)
334
333
335 def headrevs(self):
334 def headrevs(self):
336 if self.filteredrevs:
335 if self.filteredrevs:
337 try:
336 try:
338 return self.index.headrevsfiltered(self.filteredrevs)
337 return self.index.headrevsfiltered(self.filteredrevs)
339 # AttributeError covers non-c-extension environments and
338 # AttributeError covers non-c-extension environments and
340 # old c extensions without filter handling.
339 # old c extensions without filter handling.
341 except AttributeError:
340 except AttributeError:
342 return self._headrevs()
341 return self._headrevs()
343
342
344 return super(changelog, self).headrevs()
343 return super(changelog, self).headrevs()
345
344
346 def strip(self, *args, **kwargs):
345 def strip(self, *args, **kwargs):
347 # XXX make something better than assert
346 # XXX make something better than assert
348 # We can't expect proper strip behavior if we are filtered.
347 # We can't expect proper strip behavior if we are filtered.
349 assert not self.filteredrevs
348 assert not self.filteredrevs
350 super(changelog, self).strip(*args, **kwargs)
349 super(changelog, self).strip(*args, **kwargs)
351
350
352 def rev(self, node):
351 def rev(self, node):
353 """filtered version of revlog.rev"""
352 """filtered version of revlog.rev"""
354 r = super(changelog, self).rev(node)
353 r = super(changelog, self).rev(node)
355 if r in self.filteredrevs:
354 if r in self.filteredrevs:
356 raise error.FilteredLookupError(hex(node), self.indexfile,
355 raise error.FilteredLookupError(hex(node), self.indexfile,
357 _('filtered node'))
356 _('filtered node'))
358 return r
357 return r
359
358
360 def node(self, rev):
359 def node(self, rev):
361 """filtered version of revlog.node"""
360 """filtered version of revlog.node"""
362 if rev in self.filteredrevs:
361 if rev in self.filteredrevs:
363 raise error.FilteredIndexError(rev)
362 raise error.FilteredIndexError(rev)
364 return super(changelog, self).node(rev)
363 return super(changelog, self).node(rev)
365
364
366 def linkrev(self, rev):
365 def linkrev(self, rev):
367 """filtered version of revlog.linkrev"""
366 """filtered version of revlog.linkrev"""
368 if rev in self.filteredrevs:
367 if rev in self.filteredrevs:
369 raise error.FilteredIndexError(rev)
368 raise error.FilteredIndexError(rev)
370 return super(changelog, self).linkrev(rev)
369 return super(changelog, self).linkrev(rev)
371
370
372 def parentrevs(self, rev):
371 def parentrevs(self, rev):
373 """filtered version of revlog.parentrevs"""
372 """filtered version of revlog.parentrevs"""
374 if rev in self.filteredrevs:
373 if rev in self.filteredrevs:
375 raise error.FilteredIndexError(rev)
374 raise error.FilteredIndexError(rev)
376 return super(changelog, self).parentrevs(rev)
375 return super(changelog, self).parentrevs(rev)
377
376
378 def flags(self, rev):
377 def flags(self, rev):
379 """filtered version of revlog.flags"""
378 """filtered version of revlog.flags"""
380 if rev in self.filteredrevs:
379 if rev in self.filteredrevs:
381 raise error.FilteredIndexError(rev)
380 raise error.FilteredIndexError(rev)
382 return super(changelog, self).flags(rev)
381 return super(changelog, self).flags(rev)
383
382
384 def delayupdate(self, tr):
383 def delayupdate(self, tr):
385 "delay visibility of index updates to other readers"
384 "delay visibility of index updates to other readers"
386
385
387 if not self._delayed:
386 if not self._delayed:
388 if len(self) == 0:
387 if len(self) == 0:
389 self._divert = True
388 self._divert = True
390 if self._realopener.exists(self.indexfile + '.a'):
389 if self._realopener.exists(self.indexfile + '.a'):
391 self._realopener.unlink(self.indexfile + '.a')
390 self._realopener.unlink(self.indexfile + '.a')
392 self.opener = _divertopener(self._realopener, self.indexfile)
391 self.opener = _divertopener(self._realopener, self.indexfile)
393 else:
392 else:
394 self._delaybuf = []
393 self._delaybuf = []
395 self.opener = _delayopener(self._realopener, self.indexfile,
394 self.opener = _delayopener(self._realopener, self.indexfile,
396 self._delaybuf)
395 self._delaybuf)
397 self._delayed = True
396 self._delayed = True
398 tr.addpending('cl-%i' % id(self), self._writepending)
397 tr.addpending('cl-%i' % id(self), self._writepending)
399 tr.addfinalize('cl-%i' % id(self), self._finalize)
398 tr.addfinalize('cl-%i' % id(self), self._finalize)
400
399
401 def _finalize(self, tr):
400 def _finalize(self, tr):
402 "finalize index updates"
401 "finalize index updates"
403 self._delayed = False
402 self._delayed = False
404 self.opener = self._realopener
403 self.opener = self._realopener
405 # move redirected index data back into place
404 # move redirected index data back into place
406 if self._divert:
405 if self._divert:
407 assert not self._delaybuf
406 assert not self._delaybuf
408 tmpname = self.indexfile + ".a"
407 tmpname = self.indexfile + ".a"
409 nfile = self.opener.open(tmpname)
408 nfile = self.opener.open(tmpname)
410 nfile.close()
409 nfile.close()
411 self.opener.rename(tmpname, self.indexfile, checkambig=True)
410 self.opener.rename(tmpname, self.indexfile, checkambig=True)
412 elif self._delaybuf:
411 elif self._delaybuf:
413 fp = self.opener(self.indexfile, 'a', checkambig=True)
412 fp = self.opener(self.indexfile, 'a', checkambig=True)
414 fp.write("".join(self._delaybuf))
413 fp.write("".join(self._delaybuf))
415 fp.close()
414 fp.close()
416 self._delaybuf = None
415 self._delaybuf = None
417 self._divert = False
416 self._divert = False
418 # split when we're done
417 # split when we're done
419 self.checkinlinesize(tr)
418 self.checkinlinesize(tr)
420
419
421 def _writepending(self, tr):
420 def _writepending(self, tr):
422 "create a file containing the unfinalized state for pretxnchangegroup"
421 "create a file containing the unfinalized state for pretxnchangegroup"
423 if self._delaybuf:
422 if self._delaybuf:
424 # make a temporary copy of the index
423 # make a temporary copy of the index
425 fp1 = self._realopener(self.indexfile)
424 fp1 = self._realopener(self.indexfile)
426 pendingfilename = self.indexfile + ".a"
425 pendingfilename = self.indexfile + ".a"
427 # register as a temp file to ensure cleanup on failure
426 # register as a temp file to ensure cleanup on failure
428 tr.registertmp(pendingfilename)
427 tr.registertmp(pendingfilename)
429 # write existing data
428 # write existing data
430 fp2 = self._realopener(pendingfilename, "w")
429 fp2 = self._realopener(pendingfilename, "w")
431 fp2.write(fp1.read())
430 fp2.write(fp1.read())
432 # add pending data
431 # add pending data
433 fp2.write("".join(self._delaybuf))
432 fp2.write("".join(self._delaybuf))
434 fp2.close()
433 fp2.close()
435 # switch modes so finalize can simply rename
434 # switch modes so finalize can simply rename
436 self._delaybuf = None
435 self._delaybuf = None
437 self._divert = True
436 self._divert = True
438 self.opener = _divertopener(self._realopener, self.indexfile)
437 self.opener = _divertopener(self._realopener, self.indexfile)
439
438
440 if self._divert:
439 if self._divert:
441 return True
440 return True
442
441
443 return False
442 return False
444
443
445 def checkinlinesize(self, tr, fp=None):
444 def checkinlinesize(self, tr, fp=None):
446 if not self._delayed:
445 if not self._delayed:
447 revlog.revlog.checkinlinesize(self, tr, fp)
446 revlog.revlog.checkinlinesize(self, tr, fp)
448
447
449 def read(self, node):
448 def read(self, node):
450 """Obtain data from a parsed changelog revision.
449 """Obtain data from a parsed changelog revision.
451
450
452 Returns a 6-tuple of:
451 Returns a 6-tuple of:
453
452
454 - manifest node in binary
453 - manifest node in binary
455 - author/user as a localstr
454 - author/user as a localstr
456 - date as a 2-tuple of (time, timezone)
455 - date as a 2-tuple of (time, timezone)
457 - list of files
456 - list of files
458 - commit message as a localstr
457 - commit message as a localstr
459 - dict of extra metadata
458 - dict of extra metadata
460
459
461 Unless you need to access all fields, consider calling
460 Unless you need to access all fields, consider calling
462 ``changelogrevision`` instead, as it is faster for partial object
461 ``changelogrevision`` instead, as it is faster for partial object
463 access.
462 access.
464 """
463 """
465 c = changelogrevision(self.revision(node))
464 c = changelogrevision(self.revision(node))
466 return (
465 return (
467 c.manifest,
466 c.manifest,
468 c.user,
467 c.user,
469 c.date,
468 c.date,
470 c.files,
469 c.files,
471 c.description,
470 c.description,
472 c.extra
471 c.extra
473 )
472 )
474
473
475 def changelogrevision(self, nodeorrev):
474 def changelogrevision(self, nodeorrev):
476 """Obtain a ``changelogrevision`` for a node or revision."""
475 """Obtain a ``changelogrevision`` for a node or revision."""
477 return changelogrevision(self.revision(nodeorrev))
476 return changelogrevision(self.revision(nodeorrev))
478
477
479 def readfiles(self, node):
478 def readfiles(self, node):
480 """
479 """
481 short version of read that only returns the files modified by the cset
480 short version of read that only returns the files modified by the cset
482 """
481 """
483 text = self.revision(node)
482 text = self.revision(node)
484 if not text:
483 if not text:
485 return []
484 return []
486 last = text.index("\n\n")
485 last = text.index("\n\n")
487 l = text[:last].split('\n')
486 l = text[:last].split('\n')
488 return l[3:]
487 return l[3:]
489
488
490 def add(self, manifest, files, desc, transaction, p1, p2,
489 def add(self, manifest, files, desc, transaction, p1, p2,
491 user, date=None, extra=None):
490 user, date=None, extra=None):
492 # Convert to UTF-8 encoded bytestrings as the very first
491 # Convert to UTF-8 encoded bytestrings as the very first
493 # thing: calling any method on a localstr object will turn it
492 # thing: calling any method on a localstr object will turn it
494 # into a str object and the cached UTF-8 string is thus lost.
493 # into a str object and the cached UTF-8 string is thus lost.
495 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
494 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
496
495
497 user = user.strip()
496 user = user.strip()
498 # An empty username or a username with a "\n" will make the
497 # An empty username or a username with a "\n" will make the
499 # revision text contain two "\n\n" sequences -> corrupt
498 # revision text contain two "\n\n" sequences -> corrupt
500 # repository since read cannot unpack the revision.
499 # repository since read cannot unpack the revision.
501 if not user:
500 if not user:
502 raise error.RevlogError(_("empty username"))
501 raise error.RevlogError(_("empty username"))
503 if "\n" in user:
502 if "\n" in user:
504 raise error.RevlogError(_("username %s contains a newline")
503 raise error.RevlogError(_("username %s contains a newline")
505 % repr(user))
504 % repr(user))
506
505
507 desc = stripdesc(desc)
506 desc = stripdesc(desc)
508
507
509 if date:
508 if date:
510 parseddate = "%d %d" % util.parsedate(date)
509 parseddate = "%d %d" % util.parsedate(date)
511 else:
510 else:
512 parseddate = "%d %d" % util.makedate()
511 parseddate = "%d %d" % util.makedate()
513 if extra:
512 if extra:
514 branch = extra.get("branch")
513 branch = extra.get("branch")
515 if branch in ("default", ""):
514 if branch in ("default", ""):
516 del extra["branch"]
515 del extra["branch"]
517 elif branch in (".", "null", "tip"):
516 elif branch in (".", "null", "tip"):
518 raise error.RevlogError(_('the name \'%s\' is reserved')
517 raise error.RevlogError(_('the name \'%s\' is reserved')
519 % branch)
518 % branch)
520 if extra:
519 if extra:
521 extra = encodeextra(extra)
520 extra = encodeextra(extra)
522 parseddate = "%s %s" % (parseddate, extra)
521 parseddate = "%s %s" % (parseddate, extra)
523 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
522 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
524 text = "\n".join(l)
523 text = "\n".join(l)
525 return self.addrevision(text, transaction, len(self), p1, p2)
524 return self.addrevision(text, transaction, len(self), p1, p2)
526
525
527 def branchinfo(self, rev):
526 def branchinfo(self, rev):
528 """return the branch name and open/close state of a revision
527 """return the branch name and open/close state of a revision
529
528
530 This function exists because creating a changectx object
529 This function exists because creating a changectx object
531 just to access this is costly."""
530 just to access this is costly."""
532 extra = self.read(rev)[5]
531 extra = self.read(rev)[5]
533 return encoding.tolocal(extra.get("branch")), 'close' in extra
532 return encoding.tolocal(extra.get("branch")), 'close' in extra
534
533
535 def _addrevision(self, node, rawtext, transaction, *args, **kwargs):
534 def _addrevision(self, node, rawtext, transaction, *args, **kwargs):
536 # overlay over the standard revlog._addrevision to track the new
535 # overlay over the standard revlog._addrevision to track the new
537 # revision on the transaction.
536 # revision on the transaction.
538 rev = len(self)
537 rev = len(self)
539 if node == wdirid: # nullid is checked in super method
540 raise error.RevlogError(_("%s: attempt to add wdir revision") %
541 (self.indexfile))
542 node = super(changelog, self)._addrevision(node, rawtext, transaction,
538 node = super(changelog, self)._addrevision(node, rawtext, transaction,
543 *args, **kwargs)
539 *args, **kwargs)
544 revs = transaction.changes.get('revs')
540 revs = transaction.changes.get('revs')
545 if revs is not None:
541 if revs is not None:
546 revs.add(rev)
542 revs.add(rev)
547 return node
543 return node
@@ -1,2218 +1,2222
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 wdirhex,
30 wdirhex,
31 wdirid,
31 wdirid,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from . import (
35 from . import (
36 ancestor,
36 ancestor,
37 error,
37 error,
38 mdiff,
38 mdiff,
39 policy,
39 policy,
40 pycompat,
40 pycompat,
41 templatefilters,
41 templatefilters,
42 util,
42 util,
43 )
43 )
44
44
45 parsers = policy.importmod(r'parsers')
45 parsers = policy.importmod(r'parsers')
46
46
47 # Aliased for performance.
47 # Aliased for performance.
48 _zlibdecompress = zlib.decompress
48 _zlibdecompress = zlib.decompress
49
49
50 # revlog header flags
50 # revlog header flags
51 REVLOGV0 = 0
51 REVLOGV0 = 0
52 REVLOGV1 = 1
52 REVLOGV1 = 1
53 # Dummy value until file format is finalized.
53 # Dummy value until file format is finalized.
54 # Reminder: change the bounds check in revlog.__init__ when this is changed.
54 # Reminder: change the bounds check in revlog.__init__ when this is changed.
55 REVLOGV2 = 0xDEAD
55 REVLOGV2 = 0xDEAD
56 FLAG_INLINE_DATA = (1 << 16)
56 FLAG_INLINE_DATA = (1 << 16)
57 FLAG_GENERALDELTA = (1 << 17)
57 FLAG_GENERALDELTA = (1 << 17)
58 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
58 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
59 REVLOG_DEFAULT_FORMAT = REVLOGV1
59 REVLOG_DEFAULT_FORMAT = REVLOGV1
60 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
60 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
61 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
61 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
62 REVLOGV2_FLAGS = REVLOGV1_FLAGS
62 REVLOGV2_FLAGS = REVLOGV1_FLAGS
63
63
64 # revlog index flags
64 # revlog index flags
65 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
65 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
66 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
66 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
67 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
67 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
68 REVIDX_DEFAULT_FLAGS = 0
68 REVIDX_DEFAULT_FLAGS = 0
69 # stable order in which flags need to be processed and their processors applied
69 # stable order in which flags need to be processed and their processors applied
70 REVIDX_FLAGS_ORDER = [
70 REVIDX_FLAGS_ORDER = [
71 REVIDX_ISCENSORED,
71 REVIDX_ISCENSORED,
72 REVIDX_ELLIPSIS,
72 REVIDX_ELLIPSIS,
73 REVIDX_EXTSTORED,
73 REVIDX_EXTSTORED,
74 ]
74 ]
75 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
75 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
76
76
77 # max size of revlog with inline data
77 # max size of revlog with inline data
78 _maxinline = 131072
78 _maxinline = 131072
79 _chunksize = 1048576
79 _chunksize = 1048576
80
80
81 RevlogError = error.RevlogError
81 RevlogError = error.RevlogError
82 LookupError = error.LookupError
82 LookupError = error.LookupError
83 CensoredNodeError = error.CensoredNodeError
83 CensoredNodeError = error.CensoredNodeError
84 ProgrammingError = error.ProgrammingError
84 ProgrammingError = error.ProgrammingError
85
85
86 # Store flag processors (cf. 'addflagprocessor()' to register)
86 # Store flag processors (cf. 'addflagprocessor()' to register)
87 _flagprocessors = {
87 _flagprocessors = {
88 REVIDX_ISCENSORED: None,
88 REVIDX_ISCENSORED: None,
89 }
89 }
90
90
91 def addflagprocessor(flag, processor):
91 def addflagprocessor(flag, processor):
92 """Register a flag processor on a revision data flag.
92 """Register a flag processor on a revision data flag.
93
93
94 Invariant:
94 Invariant:
95 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
95 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
96 - Only one flag processor can be registered on a specific flag.
96 - Only one flag processor can be registered on a specific flag.
97 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
97 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
98 following signatures:
98 following signatures:
99 - (read) f(self, rawtext) -> text, bool
99 - (read) f(self, rawtext) -> text, bool
100 - (write) f(self, text) -> rawtext, bool
100 - (write) f(self, text) -> rawtext, bool
101 - (raw) f(self, rawtext) -> bool
101 - (raw) f(self, rawtext) -> bool
102 "text" is presented to the user. "rawtext" is stored in revlog data, not
102 "text" is presented to the user. "rawtext" is stored in revlog data, not
103 directly visible to the user.
103 directly visible to the user.
104 The boolean returned by these transforms is used to determine whether
104 The boolean returned by these transforms is used to determine whether
105 the returned text can be used for hash integrity checking. For example,
105 the returned text can be used for hash integrity checking. For example,
106 if "write" returns False, then "text" is used to generate hash. If
106 if "write" returns False, then "text" is used to generate hash. If
107 "write" returns True, that basically means "rawtext" returned by "write"
107 "write" returns True, that basically means "rawtext" returned by "write"
108 should be used to generate hash. Usually, "write" and "read" return
108 should be used to generate hash. Usually, "write" and "read" return
109 different booleans. And "raw" returns a same boolean as "write".
109 different booleans. And "raw" returns a same boolean as "write".
110
110
111 Note: The 'raw' transform is used for changegroup generation and in some
111 Note: The 'raw' transform is used for changegroup generation and in some
112 debug commands. In this case the transform only indicates whether the
112 debug commands. In this case the transform only indicates whether the
113 contents can be used for hash integrity checks.
113 contents can be used for hash integrity checks.
114 """
114 """
115 if not flag & REVIDX_KNOWN_FLAGS:
115 if not flag & REVIDX_KNOWN_FLAGS:
116 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
116 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
117 raise ProgrammingError(msg)
117 raise ProgrammingError(msg)
118 if flag not in REVIDX_FLAGS_ORDER:
118 if flag not in REVIDX_FLAGS_ORDER:
119 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
119 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
120 raise ProgrammingError(msg)
120 raise ProgrammingError(msg)
121 if flag in _flagprocessors:
121 if flag in _flagprocessors:
122 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
122 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
123 raise error.Abort(msg)
123 raise error.Abort(msg)
124 _flagprocessors[flag] = processor
124 _flagprocessors[flag] = processor
125
125
126 def getoffset(q):
126 def getoffset(q):
127 return int(q >> 16)
127 return int(q >> 16)
128
128
129 def gettype(q):
129 def gettype(q):
130 return int(q & 0xFFFF)
130 return int(q & 0xFFFF)
131
131
132 def offset_type(offset, type):
132 def offset_type(offset, type):
133 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
133 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
134 raise ValueError('unknown revlog index flags')
134 raise ValueError('unknown revlog index flags')
135 return int(int(offset) << 16 | type)
135 return int(int(offset) << 16 | type)
136
136
137 _nullhash = hashlib.sha1(nullid)
137 _nullhash = hashlib.sha1(nullid)
138
138
139 def hash(text, p1, p2):
139 def hash(text, p1, p2):
140 """generate a hash from the given text and its parent hashes
140 """generate a hash from the given text and its parent hashes
141
141
142 This hash combines both the current file contents and its history
142 This hash combines both the current file contents and its history
143 in a manner that makes it easy to distinguish nodes with the same
143 in a manner that makes it easy to distinguish nodes with the same
144 content in the revision graph.
144 content in the revision graph.
145 """
145 """
146 # As of now, if one of the parent node is null, p2 is null
146 # As of now, if one of the parent node is null, p2 is null
147 if p2 == nullid:
147 if p2 == nullid:
148 # deep copy of a hash is faster than creating one
148 # deep copy of a hash is faster than creating one
149 s = _nullhash.copy()
149 s = _nullhash.copy()
150 s.update(p1)
150 s.update(p1)
151 else:
151 else:
152 # none of the parent nodes are nullid
152 # none of the parent nodes are nullid
153 if p1 < p2:
153 if p1 < p2:
154 a = p1
154 a = p1
155 b = p2
155 b = p2
156 else:
156 else:
157 a = p2
157 a = p2
158 b = p1
158 b = p1
159 s = hashlib.sha1(a)
159 s = hashlib.sha1(a)
160 s.update(b)
160 s.update(b)
161 s.update(text)
161 s.update(text)
162 return s.digest()
162 return s.digest()
163
163
164 # index v0:
164 # index v0:
165 # 4 bytes: offset
165 # 4 bytes: offset
166 # 4 bytes: compressed length
166 # 4 bytes: compressed length
167 # 4 bytes: base rev
167 # 4 bytes: base rev
168 # 4 bytes: link rev
168 # 4 bytes: link rev
169 # 20 bytes: parent 1 nodeid
169 # 20 bytes: parent 1 nodeid
170 # 20 bytes: parent 2 nodeid
170 # 20 bytes: parent 2 nodeid
171 # 20 bytes: nodeid
171 # 20 bytes: nodeid
172 indexformatv0 = struct.Struct(">4l20s20s20s")
172 indexformatv0 = struct.Struct(">4l20s20s20s")
173 indexformatv0_pack = indexformatv0.pack
173 indexformatv0_pack = indexformatv0.pack
174 indexformatv0_unpack = indexformatv0.unpack
174 indexformatv0_unpack = indexformatv0.unpack
175
175
176 class revlogoldio(object):
176 class revlogoldio(object):
177 def __init__(self):
177 def __init__(self):
178 self.size = indexformatv0.size
178 self.size = indexformatv0.size
179
179
180 def parseindex(self, data, inline):
180 def parseindex(self, data, inline):
181 s = self.size
181 s = self.size
182 index = []
182 index = []
183 nodemap = {nullid: nullrev}
183 nodemap = {nullid: nullrev}
184 n = off = 0
184 n = off = 0
185 l = len(data)
185 l = len(data)
186 while off + s <= l:
186 while off + s <= l:
187 cur = data[off:off + s]
187 cur = data[off:off + s]
188 off += s
188 off += s
189 e = indexformatv0_unpack(cur)
189 e = indexformatv0_unpack(cur)
190 # transform to revlogv1 format
190 # transform to revlogv1 format
191 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
191 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
192 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
192 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
193 index.append(e2)
193 index.append(e2)
194 nodemap[e[6]] = n
194 nodemap[e[6]] = n
195 n += 1
195 n += 1
196
196
197 # add the magic null revision at -1
197 # add the magic null revision at -1
198 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
198 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
199
199
200 return index, nodemap, None
200 return index, nodemap, None
201
201
202 def packentry(self, entry, node, version, rev):
202 def packentry(self, entry, node, version, rev):
203 if gettype(entry[0]):
203 if gettype(entry[0]):
204 raise RevlogError(_('index entry flags need revlog version 1'))
204 raise RevlogError(_('index entry flags need revlog version 1'))
205 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
205 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
206 node(entry[5]), node(entry[6]), entry[7])
206 node(entry[5]), node(entry[6]), entry[7])
207 return indexformatv0_pack(*e2)
207 return indexformatv0_pack(*e2)
208
208
209 # index ng:
209 # index ng:
210 # 6 bytes: offset
210 # 6 bytes: offset
211 # 2 bytes: flags
211 # 2 bytes: flags
212 # 4 bytes: compressed length
212 # 4 bytes: compressed length
213 # 4 bytes: uncompressed length
213 # 4 bytes: uncompressed length
214 # 4 bytes: base rev
214 # 4 bytes: base rev
215 # 4 bytes: link rev
215 # 4 bytes: link rev
216 # 4 bytes: parent 1 rev
216 # 4 bytes: parent 1 rev
217 # 4 bytes: parent 2 rev
217 # 4 bytes: parent 2 rev
218 # 32 bytes: nodeid
218 # 32 bytes: nodeid
219 indexformatng = struct.Struct(">Qiiiiii20s12x")
219 indexformatng = struct.Struct(">Qiiiiii20s12x")
220 indexformatng_pack = indexformatng.pack
220 indexformatng_pack = indexformatng.pack
221 versionformat = struct.Struct(">I")
221 versionformat = struct.Struct(">I")
222 versionformat_pack = versionformat.pack
222 versionformat_pack = versionformat.pack
223 versionformat_unpack = versionformat.unpack
223 versionformat_unpack = versionformat.unpack
224
224
225 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
225 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
226 # signed integer)
226 # signed integer)
227 _maxentrysize = 0x7fffffff
227 _maxentrysize = 0x7fffffff
228
228
229 class revlogio(object):
229 class revlogio(object):
230 def __init__(self):
230 def __init__(self):
231 self.size = indexformatng.size
231 self.size = indexformatng.size
232
232
233 def parseindex(self, data, inline):
233 def parseindex(self, data, inline):
234 # call the C implementation to parse the index data
234 # call the C implementation to parse the index data
235 index, cache = parsers.parse_index2(data, inline)
235 index, cache = parsers.parse_index2(data, inline)
236 return index, getattr(index, 'nodemap', None), cache
236 return index, getattr(index, 'nodemap', None), cache
237
237
238 def packentry(self, entry, node, version, rev):
238 def packentry(self, entry, node, version, rev):
239 p = indexformatng_pack(*entry)
239 p = indexformatng_pack(*entry)
240 if rev == 0:
240 if rev == 0:
241 p = versionformat_pack(version) + p[4:]
241 p = versionformat_pack(version) + p[4:]
242 return p
242 return p
243
243
244 class revlog(object):
244 class revlog(object):
245 """
245 """
246 the underlying revision storage object
246 the underlying revision storage object
247
247
248 A revlog consists of two parts, an index and the revision data.
248 A revlog consists of two parts, an index and the revision data.
249
249
250 The index is a file with a fixed record size containing
250 The index is a file with a fixed record size containing
251 information on each revision, including its nodeid (hash), the
251 information on each revision, including its nodeid (hash), the
252 nodeids of its parents, the position and offset of its data within
252 nodeids of its parents, the position and offset of its data within
253 the data file, and the revision it's based on. Finally, each entry
253 the data file, and the revision it's based on. Finally, each entry
254 contains a linkrev entry that can serve as a pointer to external
254 contains a linkrev entry that can serve as a pointer to external
255 data.
255 data.
256
256
257 The revision data itself is a linear collection of data chunks.
257 The revision data itself is a linear collection of data chunks.
258 Each chunk represents a revision and is usually represented as a
258 Each chunk represents a revision and is usually represented as a
259 delta against the previous chunk. To bound lookup time, runs of
259 delta against the previous chunk. To bound lookup time, runs of
260 deltas are limited to about 2 times the length of the original
260 deltas are limited to about 2 times the length of the original
261 version data. This makes retrieval of a version proportional to
261 version data. This makes retrieval of a version proportional to
262 its size, or O(1) relative to the number of revisions.
262 its size, or O(1) relative to the number of revisions.
263
263
264 Both pieces of the revlog are written to in an append-only
264 Both pieces of the revlog are written to in an append-only
265 fashion, which means we never need to rewrite a file to insert or
265 fashion, which means we never need to rewrite a file to insert or
266 remove data, and can use some simple techniques to avoid the need
266 remove data, and can use some simple techniques to avoid the need
267 for locking while reading.
267 for locking while reading.
268
268
269 If checkambig, indexfile is opened with checkambig=True at
269 If checkambig, indexfile is opened with checkambig=True at
270 writing, to avoid file stat ambiguity.
270 writing, to avoid file stat ambiguity.
271 """
271 """
272 def __init__(self, opener, indexfile, datafile=None, checkambig=False):
272 def __init__(self, opener, indexfile, datafile=None, checkambig=False):
273 """
273 """
274 create a revlog object
274 create a revlog object
275
275
276 opener is a function that abstracts the file opening operation
276 opener is a function that abstracts the file opening operation
277 and can be used to implement COW semantics or the like.
277 and can be used to implement COW semantics or the like.
278 """
278 """
279 self.indexfile = indexfile
279 self.indexfile = indexfile
280 self.datafile = datafile or (indexfile[:-2] + ".d")
280 self.datafile = datafile or (indexfile[:-2] + ".d")
281 self.opener = opener
281 self.opener = opener
282 # When True, indexfile is opened with checkambig=True at writing, to
282 # When True, indexfile is opened with checkambig=True at writing, to
283 # avoid file stat ambiguity.
283 # avoid file stat ambiguity.
284 self._checkambig = checkambig
284 self._checkambig = checkambig
285 # 3-tuple of (node, rev, text) for a raw revision.
285 # 3-tuple of (node, rev, text) for a raw revision.
286 self._cache = None
286 self._cache = None
287 # Maps rev to chain base rev.
287 # Maps rev to chain base rev.
288 self._chainbasecache = util.lrucachedict(100)
288 self._chainbasecache = util.lrucachedict(100)
289 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
289 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
290 self._chunkcache = (0, '')
290 self._chunkcache = (0, '')
291 # How much data to read and cache into the raw revlog data cache.
291 # How much data to read and cache into the raw revlog data cache.
292 self._chunkcachesize = 65536
292 self._chunkcachesize = 65536
293 self._maxchainlen = None
293 self._maxchainlen = None
294 self._aggressivemergedeltas = False
294 self._aggressivemergedeltas = False
295 self.index = []
295 self.index = []
296 # Mapping of partial identifiers to full nodes.
296 # Mapping of partial identifiers to full nodes.
297 self._pcache = {}
297 self._pcache = {}
298 # Mapping of revision integer to full node.
298 # Mapping of revision integer to full node.
299 self._nodecache = {nullid: nullrev}
299 self._nodecache = {nullid: nullrev}
300 self._nodepos = None
300 self._nodepos = None
301 self._compengine = 'zlib'
301 self._compengine = 'zlib'
302 self._maxdeltachainspan = -1
302 self._maxdeltachainspan = -1
303
303
304 v = REVLOG_DEFAULT_VERSION
304 v = REVLOG_DEFAULT_VERSION
305 opts = getattr(opener, 'options', None)
305 opts = getattr(opener, 'options', None)
306 if opts is not None:
306 if opts is not None:
307 if 'revlogv2' in opts:
307 if 'revlogv2' in opts:
308 # version 2 revlogs always use generaldelta.
308 # version 2 revlogs always use generaldelta.
309 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
309 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
310 elif 'revlogv1' in opts:
310 elif 'revlogv1' in opts:
311 if 'generaldelta' in opts:
311 if 'generaldelta' in opts:
312 v |= FLAG_GENERALDELTA
312 v |= FLAG_GENERALDELTA
313 else:
313 else:
314 v = 0
314 v = 0
315 if 'chunkcachesize' in opts:
315 if 'chunkcachesize' in opts:
316 self._chunkcachesize = opts['chunkcachesize']
316 self._chunkcachesize = opts['chunkcachesize']
317 if 'maxchainlen' in opts:
317 if 'maxchainlen' in opts:
318 self._maxchainlen = opts['maxchainlen']
318 self._maxchainlen = opts['maxchainlen']
319 if 'aggressivemergedeltas' in opts:
319 if 'aggressivemergedeltas' in opts:
320 self._aggressivemergedeltas = opts['aggressivemergedeltas']
320 self._aggressivemergedeltas = opts['aggressivemergedeltas']
321 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
321 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
322 if 'compengine' in opts:
322 if 'compengine' in opts:
323 self._compengine = opts['compengine']
323 self._compengine = opts['compengine']
324 if 'maxdeltachainspan' in opts:
324 if 'maxdeltachainspan' in opts:
325 self._maxdeltachainspan = opts['maxdeltachainspan']
325 self._maxdeltachainspan = opts['maxdeltachainspan']
326
326
327 if self._chunkcachesize <= 0:
327 if self._chunkcachesize <= 0:
328 raise RevlogError(_('revlog chunk cache size %r is not greater '
328 raise RevlogError(_('revlog chunk cache size %r is not greater '
329 'than 0') % self._chunkcachesize)
329 'than 0') % self._chunkcachesize)
330 elif self._chunkcachesize & (self._chunkcachesize - 1):
330 elif self._chunkcachesize & (self._chunkcachesize - 1):
331 raise RevlogError(_('revlog chunk cache size %r is not a power '
331 raise RevlogError(_('revlog chunk cache size %r is not a power '
332 'of 2') % self._chunkcachesize)
332 'of 2') % self._chunkcachesize)
333
333
334 indexdata = ''
334 indexdata = ''
335 self._initempty = True
335 self._initempty = True
336 try:
336 try:
337 f = self.opener(self.indexfile)
337 f = self.opener(self.indexfile)
338 indexdata = f.read()
338 indexdata = f.read()
339 f.close()
339 f.close()
340 if len(indexdata) > 0:
340 if len(indexdata) > 0:
341 v = versionformat_unpack(indexdata[:4])[0]
341 v = versionformat_unpack(indexdata[:4])[0]
342 self._initempty = False
342 self._initempty = False
343 except IOError as inst:
343 except IOError as inst:
344 if inst.errno != errno.ENOENT:
344 if inst.errno != errno.ENOENT:
345 raise
345 raise
346
346
347 self.version = v
347 self.version = v
348 self._inline = v & FLAG_INLINE_DATA
348 self._inline = v & FLAG_INLINE_DATA
349 self._generaldelta = v & FLAG_GENERALDELTA
349 self._generaldelta = v & FLAG_GENERALDELTA
350 flags = v & ~0xFFFF
350 flags = v & ~0xFFFF
351 fmt = v & 0xFFFF
351 fmt = v & 0xFFFF
352 if fmt == REVLOGV0:
352 if fmt == REVLOGV0:
353 if flags:
353 if flags:
354 raise RevlogError(_('unknown flags (%#04x) in version %d '
354 raise RevlogError(_('unknown flags (%#04x) in version %d '
355 'revlog %s') %
355 'revlog %s') %
356 (flags >> 16, fmt, self.indexfile))
356 (flags >> 16, fmt, self.indexfile))
357 elif fmt == REVLOGV1:
357 elif fmt == REVLOGV1:
358 if flags & ~REVLOGV1_FLAGS:
358 if flags & ~REVLOGV1_FLAGS:
359 raise RevlogError(_('unknown flags (%#04x) in version %d '
359 raise RevlogError(_('unknown flags (%#04x) in version %d '
360 'revlog %s') %
360 'revlog %s') %
361 (flags >> 16, fmt, self.indexfile))
361 (flags >> 16, fmt, self.indexfile))
362 elif fmt == REVLOGV2:
362 elif fmt == REVLOGV2:
363 if flags & ~REVLOGV2_FLAGS:
363 if flags & ~REVLOGV2_FLAGS:
364 raise RevlogError(_('unknown flags (%#04x) in version %d '
364 raise RevlogError(_('unknown flags (%#04x) in version %d '
365 'revlog %s') %
365 'revlog %s') %
366 (flags >> 16, fmt, self.indexfile))
366 (flags >> 16, fmt, self.indexfile))
367 else:
367 else:
368 raise RevlogError(_('unknown version (%d) in revlog %s') %
368 raise RevlogError(_('unknown version (%d) in revlog %s') %
369 (fmt, self.indexfile))
369 (fmt, self.indexfile))
370
370
371 self.storedeltachains = True
371 self.storedeltachains = True
372
372
373 self._io = revlogio()
373 self._io = revlogio()
374 if self.version == REVLOGV0:
374 if self.version == REVLOGV0:
375 self._io = revlogoldio()
375 self._io = revlogoldio()
376 try:
376 try:
377 d = self._io.parseindex(indexdata, self._inline)
377 d = self._io.parseindex(indexdata, self._inline)
378 except (ValueError, IndexError):
378 except (ValueError, IndexError):
379 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
379 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
380 self.index, nodemap, self._chunkcache = d
380 self.index, nodemap, self._chunkcache = d
381 if nodemap is not None:
381 if nodemap is not None:
382 self.nodemap = self._nodecache = nodemap
382 self.nodemap = self._nodecache = nodemap
383 if not self._chunkcache:
383 if not self._chunkcache:
384 self._chunkclear()
384 self._chunkclear()
385 # revnum -> (chain-length, sum-delta-length)
385 # revnum -> (chain-length, sum-delta-length)
386 self._chaininfocache = {}
386 self._chaininfocache = {}
387 # revlog header -> revlog compressor
387 # revlog header -> revlog compressor
388 self._decompressors = {}
388 self._decompressors = {}
389
389
390 @util.propertycache
390 @util.propertycache
391 def _compressor(self):
391 def _compressor(self):
392 return util.compengines[self._compengine].revlogcompressor()
392 return util.compengines[self._compengine].revlogcompressor()
393
393
394 def tip(self):
394 def tip(self):
395 return self.node(len(self.index) - 2)
395 return self.node(len(self.index) - 2)
396 def __contains__(self, rev):
396 def __contains__(self, rev):
397 return 0 <= rev < len(self)
397 return 0 <= rev < len(self)
398 def __len__(self):
398 def __len__(self):
399 return len(self.index) - 1
399 return len(self.index) - 1
400 def __iter__(self):
400 def __iter__(self):
401 return iter(xrange(len(self)))
401 return iter(xrange(len(self)))
402 def revs(self, start=0, stop=None):
402 def revs(self, start=0, stop=None):
403 """iterate over all rev in this revlog (from start to stop)"""
403 """iterate over all rev in this revlog (from start to stop)"""
404 step = 1
404 step = 1
405 if stop is not None:
405 if stop is not None:
406 if start > stop:
406 if start > stop:
407 step = -1
407 step = -1
408 stop += step
408 stop += step
409 else:
409 else:
410 stop = len(self)
410 stop = len(self)
411 return xrange(start, stop, step)
411 return xrange(start, stop, step)
412
412
413 @util.propertycache
413 @util.propertycache
414 def nodemap(self):
414 def nodemap(self):
415 self.rev(self.node(0))
415 self.rev(self.node(0))
416 return self._nodecache
416 return self._nodecache
417
417
418 def hasnode(self, node):
418 def hasnode(self, node):
419 try:
419 try:
420 self.rev(node)
420 self.rev(node)
421 return True
421 return True
422 except KeyError:
422 except KeyError:
423 return False
423 return False
424
424
425 def clearcaches(self):
425 def clearcaches(self):
426 self._cache = None
426 self._cache = None
427 self._chainbasecache.clear()
427 self._chainbasecache.clear()
428 self._chunkcache = (0, '')
428 self._chunkcache = (0, '')
429 self._pcache = {}
429 self._pcache = {}
430
430
431 try:
431 try:
432 self._nodecache.clearcaches()
432 self._nodecache.clearcaches()
433 except AttributeError:
433 except AttributeError:
434 self._nodecache = {nullid: nullrev}
434 self._nodecache = {nullid: nullrev}
435 self._nodepos = None
435 self._nodepos = None
436
436
437 def rev(self, node):
437 def rev(self, node):
438 try:
438 try:
439 return self._nodecache[node]
439 return self._nodecache[node]
440 except TypeError:
440 except TypeError:
441 raise
441 raise
442 except RevlogError:
442 except RevlogError:
443 # parsers.c radix tree lookup failed
443 # parsers.c radix tree lookup failed
444 if node == wdirid:
444 if node == wdirid:
445 raise error.WdirUnsupported
445 raise error.WdirUnsupported
446 raise LookupError(node, self.indexfile, _('no node'))
446 raise LookupError(node, self.indexfile, _('no node'))
447 except KeyError:
447 except KeyError:
448 # pure python cache lookup failed
448 # pure python cache lookup failed
449 n = self._nodecache
449 n = self._nodecache
450 i = self.index
450 i = self.index
451 p = self._nodepos
451 p = self._nodepos
452 if p is None:
452 if p is None:
453 p = len(i) - 2
453 p = len(i) - 2
454 for r in xrange(p, -1, -1):
454 for r in xrange(p, -1, -1):
455 v = i[r][7]
455 v = i[r][7]
456 n[v] = r
456 n[v] = r
457 if v == node:
457 if v == node:
458 self._nodepos = r - 1
458 self._nodepos = r - 1
459 return r
459 return r
460 if node == wdirid:
460 if node == wdirid:
461 raise error.WdirUnsupported
461 raise error.WdirUnsupported
462 raise LookupError(node, self.indexfile, _('no node'))
462 raise LookupError(node, self.indexfile, _('no node'))
463
463
464 # Accessors for index entries.
464 # Accessors for index entries.
465
465
466 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
466 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
467 # are flags.
467 # are flags.
468 def start(self, rev):
468 def start(self, rev):
469 return int(self.index[rev][0] >> 16)
469 return int(self.index[rev][0] >> 16)
470
470
471 def flags(self, rev):
471 def flags(self, rev):
472 return self.index[rev][0] & 0xFFFF
472 return self.index[rev][0] & 0xFFFF
473
473
474 def length(self, rev):
474 def length(self, rev):
475 return self.index[rev][1]
475 return self.index[rev][1]
476
476
477 def rawsize(self, rev):
477 def rawsize(self, rev):
478 """return the length of the uncompressed text for a given revision"""
478 """return the length of the uncompressed text for a given revision"""
479 l = self.index[rev][2]
479 l = self.index[rev][2]
480 if l >= 0:
480 if l >= 0:
481 return l
481 return l
482
482
483 t = self.revision(rev, raw=True)
483 t = self.revision(rev, raw=True)
484 return len(t)
484 return len(t)
485
485
486 def size(self, rev):
486 def size(self, rev):
487 """length of non-raw text (processed by a "read" flag processor)"""
487 """length of non-raw text (processed by a "read" flag processor)"""
488 # fast path: if no "read" flag processor could change the content,
488 # fast path: if no "read" flag processor could change the content,
489 # size is rawsize. note: ELLIPSIS is known to not change the content.
489 # size is rawsize. note: ELLIPSIS is known to not change the content.
490 flags = self.flags(rev)
490 flags = self.flags(rev)
491 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
491 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
492 return self.rawsize(rev)
492 return self.rawsize(rev)
493
493
494 return len(self.revision(rev, raw=False))
494 return len(self.revision(rev, raw=False))
495
495
496 def chainbase(self, rev):
496 def chainbase(self, rev):
497 base = self._chainbasecache.get(rev)
497 base = self._chainbasecache.get(rev)
498 if base is not None:
498 if base is not None:
499 return base
499 return base
500
500
501 index = self.index
501 index = self.index
502 base = index[rev][3]
502 base = index[rev][3]
503 while base != rev:
503 while base != rev:
504 rev = base
504 rev = base
505 base = index[rev][3]
505 base = index[rev][3]
506
506
507 self._chainbasecache[rev] = base
507 self._chainbasecache[rev] = base
508 return base
508 return base
509
509
510 def linkrev(self, rev):
510 def linkrev(self, rev):
511 return self.index[rev][4]
511 return self.index[rev][4]
512
512
513 def parentrevs(self, rev):
513 def parentrevs(self, rev):
514 try:
514 try:
515 return self.index[rev][5:7]
515 return self.index[rev][5:7]
516 except IndexError:
516 except IndexError:
517 if rev == wdirrev:
517 if rev == wdirrev:
518 raise error.WdirUnsupported
518 raise error.WdirUnsupported
519 raise
519 raise
520
520
521 def node(self, rev):
521 def node(self, rev):
522 try:
522 try:
523 return self.index[rev][7]
523 return self.index[rev][7]
524 except IndexError:
524 except IndexError:
525 if rev == wdirrev:
525 if rev == wdirrev:
526 raise error.WdirUnsupported
526 raise error.WdirUnsupported
527 raise
527 raise
528
528
529 # Derived from index values.
529 # Derived from index values.
530
530
531 def end(self, rev):
531 def end(self, rev):
532 return self.start(rev) + self.length(rev)
532 return self.start(rev) + self.length(rev)
533
533
534 def parents(self, node):
534 def parents(self, node):
535 i = self.index
535 i = self.index
536 d = i[self.rev(node)]
536 d = i[self.rev(node)]
537 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
537 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
538
538
539 def chainlen(self, rev):
539 def chainlen(self, rev):
540 return self._chaininfo(rev)[0]
540 return self._chaininfo(rev)[0]
541
541
542 def _chaininfo(self, rev):
542 def _chaininfo(self, rev):
543 chaininfocache = self._chaininfocache
543 chaininfocache = self._chaininfocache
544 if rev in chaininfocache:
544 if rev in chaininfocache:
545 return chaininfocache[rev]
545 return chaininfocache[rev]
546 index = self.index
546 index = self.index
547 generaldelta = self._generaldelta
547 generaldelta = self._generaldelta
548 iterrev = rev
548 iterrev = rev
549 e = index[iterrev]
549 e = index[iterrev]
550 clen = 0
550 clen = 0
551 compresseddeltalen = 0
551 compresseddeltalen = 0
552 while iterrev != e[3]:
552 while iterrev != e[3]:
553 clen += 1
553 clen += 1
554 compresseddeltalen += e[1]
554 compresseddeltalen += e[1]
555 if generaldelta:
555 if generaldelta:
556 iterrev = e[3]
556 iterrev = e[3]
557 else:
557 else:
558 iterrev -= 1
558 iterrev -= 1
559 if iterrev in chaininfocache:
559 if iterrev in chaininfocache:
560 t = chaininfocache[iterrev]
560 t = chaininfocache[iterrev]
561 clen += t[0]
561 clen += t[0]
562 compresseddeltalen += t[1]
562 compresseddeltalen += t[1]
563 break
563 break
564 e = index[iterrev]
564 e = index[iterrev]
565 else:
565 else:
566 # Add text length of base since decompressing that also takes
566 # Add text length of base since decompressing that also takes
567 # work. For cache hits the length is already included.
567 # work. For cache hits the length is already included.
568 compresseddeltalen += e[1]
568 compresseddeltalen += e[1]
569 r = (clen, compresseddeltalen)
569 r = (clen, compresseddeltalen)
570 chaininfocache[rev] = r
570 chaininfocache[rev] = r
571 return r
571 return r
572
572
573 def _deltachain(self, rev, stoprev=None):
573 def _deltachain(self, rev, stoprev=None):
574 """Obtain the delta chain for a revision.
574 """Obtain the delta chain for a revision.
575
575
576 ``stoprev`` specifies a revision to stop at. If not specified, we
576 ``stoprev`` specifies a revision to stop at. If not specified, we
577 stop at the base of the chain.
577 stop at the base of the chain.
578
578
579 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
579 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
580 revs in ascending order and ``stopped`` is a bool indicating whether
580 revs in ascending order and ``stopped`` is a bool indicating whether
581 ``stoprev`` was hit.
581 ``stoprev`` was hit.
582 """
582 """
583 # Try C implementation.
583 # Try C implementation.
584 try:
584 try:
585 return self.index.deltachain(rev, stoprev, self._generaldelta)
585 return self.index.deltachain(rev, stoprev, self._generaldelta)
586 except AttributeError:
586 except AttributeError:
587 pass
587 pass
588
588
589 chain = []
589 chain = []
590
590
591 # Alias to prevent attribute lookup in tight loop.
591 # Alias to prevent attribute lookup in tight loop.
592 index = self.index
592 index = self.index
593 generaldelta = self._generaldelta
593 generaldelta = self._generaldelta
594
594
595 iterrev = rev
595 iterrev = rev
596 e = index[iterrev]
596 e = index[iterrev]
597 while iterrev != e[3] and iterrev != stoprev:
597 while iterrev != e[3] and iterrev != stoprev:
598 chain.append(iterrev)
598 chain.append(iterrev)
599 if generaldelta:
599 if generaldelta:
600 iterrev = e[3]
600 iterrev = e[3]
601 else:
601 else:
602 iterrev -= 1
602 iterrev -= 1
603 e = index[iterrev]
603 e = index[iterrev]
604
604
605 if iterrev == stoprev:
605 if iterrev == stoprev:
606 stopped = True
606 stopped = True
607 else:
607 else:
608 chain.append(iterrev)
608 chain.append(iterrev)
609 stopped = False
609 stopped = False
610
610
611 chain.reverse()
611 chain.reverse()
612 return chain, stopped
612 return chain, stopped
613
613
614 def ancestors(self, revs, stoprev=0, inclusive=False):
614 def ancestors(self, revs, stoprev=0, inclusive=False):
615 """Generate the ancestors of 'revs' in reverse topological order.
615 """Generate the ancestors of 'revs' in reverse topological order.
616 Does not generate revs lower than stoprev.
616 Does not generate revs lower than stoprev.
617
617
618 See the documentation for ancestor.lazyancestors for more details."""
618 See the documentation for ancestor.lazyancestors for more details."""
619
619
620 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
620 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
621 inclusive=inclusive)
621 inclusive=inclusive)
622
622
623 def descendants(self, revs):
623 def descendants(self, revs):
624 """Generate the descendants of 'revs' in revision order.
624 """Generate the descendants of 'revs' in revision order.
625
625
626 Yield a sequence of revision numbers starting with a child of
626 Yield a sequence of revision numbers starting with a child of
627 some rev in revs, i.e., each revision is *not* considered a
627 some rev in revs, i.e., each revision is *not* considered a
628 descendant of itself. Results are ordered by revision number (a
628 descendant of itself. Results are ordered by revision number (a
629 topological sort)."""
629 topological sort)."""
630 first = min(revs)
630 first = min(revs)
631 if first == nullrev:
631 if first == nullrev:
632 for i in self:
632 for i in self:
633 yield i
633 yield i
634 return
634 return
635
635
636 seen = set(revs)
636 seen = set(revs)
637 for i in self.revs(start=first + 1):
637 for i in self.revs(start=first + 1):
638 for x in self.parentrevs(i):
638 for x in self.parentrevs(i):
639 if x != nullrev and x in seen:
639 if x != nullrev and x in seen:
640 seen.add(i)
640 seen.add(i)
641 yield i
641 yield i
642 break
642 break
643
643
644 def findcommonmissing(self, common=None, heads=None):
644 def findcommonmissing(self, common=None, heads=None):
645 """Return a tuple of the ancestors of common and the ancestors of heads
645 """Return a tuple of the ancestors of common and the ancestors of heads
646 that are not ancestors of common. In revset terminology, we return the
646 that are not ancestors of common. In revset terminology, we return the
647 tuple:
647 tuple:
648
648
649 ::common, (::heads) - (::common)
649 ::common, (::heads) - (::common)
650
650
651 The list is sorted by revision number, meaning it is
651 The list is sorted by revision number, meaning it is
652 topologically sorted.
652 topologically sorted.
653
653
654 'heads' and 'common' are both lists of node IDs. If heads is
654 'heads' and 'common' are both lists of node IDs. If heads is
655 not supplied, uses all of the revlog's heads. If common is not
655 not supplied, uses all of the revlog's heads. If common is not
656 supplied, uses nullid."""
656 supplied, uses nullid."""
657 if common is None:
657 if common is None:
658 common = [nullid]
658 common = [nullid]
659 if heads is None:
659 if heads is None:
660 heads = self.heads()
660 heads = self.heads()
661
661
662 common = [self.rev(n) for n in common]
662 common = [self.rev(n) for n in common]
663 heads = [self.rev(n) for n in heads]
663 heads = [self.rev(n) for n in heads]
664
664
665 # we want the ancestors, but inclusive
665 # we want the ancestors, but inclusive
666 class lazyset(object):
666 class lazyset(object):
667 def __init__(self, lazyvalues):
667 def __init__(self, lazyvalues):
668 self.addedvalues = set()
668 self.addedvalues = set()
669 self.lazyvalues = lazyvalues
669 self.lazyvalues = lazyvalues
670
670
671 def __contains__(self, value):
671 def __contains__(self, value):
672 return value in self.addedvalues or value in self.lazyvalues
672 return value in self.addedvalues or value in self.lazyvalues
673
673
674 def __iter__(self):
674 def __iter__(self):
675 added = self.addedvalues
675 added = self.addedvalues
676 for r in added:
676 for r in added:
677 yield r
677 yield r
678 for r in self.lazyvalues:
678 for r in self.lazyvalues:
679 if not r in added:
679 if not r in added:
680 yield r
680 yield r
681
681
682 def add(self, value):
682 def add(self, value):
683 self.addedvalues.add(value)
683 self.addedvalues.add(value)
684
684
685 def update(self, values):
685 def update(self, values):
686 self.addedvalues.update(values)
686 self.addedvalues.update(values)
687
687
688 has = lazyset(self.ancestors(common))
688 has = lazyset(self.ancestors(common))
689 has.add(nullrev)
689 has.add(nullrev)
690 has.update(common)
690 has.update(common)
691
691
692 # take all ancestors from heads that aren't in has
692 # take all ancestors from heads that aren't in has
693 missing = set()
693 missing = set()
694 visit = collections.deque(r for r in heads if r not in has)
694 visit = collections.deque(r for r in heads if r not in has)
695 while visit:
695 while visit:
696 r = visit.popleft()
696 r = visit.popleft()
697 if r in missing:
697 if r in missing:
698 continue
698 continue
699 else:
699 else:
700 missing.add(r)
700 missing.add(r)
701 for p in self.parentrevs(r):
701 for p in self.parentrevs(r):
702 if p not in has:
702 if p not in has:
703 visit.append(p)
703 visit.append(p)
704 missing = list(missing)
704 missing = list(missing)
705 missing.sort()
705 missing.sort()
706 return has, [self.node(miss) for miss in missing]
706 return has, [self.node(miss) for miss in missing]
707
707
708 def incrementalmissingrevs(self, common=None):
708 def incrementalmissingrevs(self, common=None):
709 """Return an object that can be used to incrementally compute the
709 """Return an object that can be used to incrementally compute the
710 revision numbers of the ancestors of arbitrary sets that are not
710 revision numbers of the ancestors of arbitrary sets that are not
711 ancestors of common. This is an ancestor.incrementalmissingancestors
711 ancestors of common. This is an ancestor.incrementalmissingancestors
712 object.
712 object.
713
713
714 'common' is a list of revision numbers. If common is not supplied, uses
714 'common' is a list of revision numbers. If common is not supplied, uses
715 nullrev.
715 nullrev.
716 """
716 """
717 if common is None:
717 if common is None:
718 common = [nullrev]
718 common = [nullrev]
719
719
720 return ancestor.incrementalmissingancestors(self.parentrevs, common)
720 return ancestor.incrementalmissingancestors(self.parentrevs, common)
721
721
722 def findmissingrevs(self, common=None, heads=None):
722 def findmissingrevs(self, common=None, heads=None):
723 """Return the revision numbers of the ancestors of heads that
723 """Return the revision numbers of the ancestors of heads that
724 are not ancestors of common.
724 are not ancestors of common.
725
725
726 More specifically, return a list of revision numbers corresponding to
726 More specifically, return a list of revision numbers corresponding to
727 nodes N such that every N satisfies the following constraints:
727 nodes N such that every N satisfies the following constraints:
728
728
729 1. N is an ancestor of some node in 'heads'
729 1. N is an ancestor of some node in 'heads'
730 2. N is not an ancestor of any node in 'common'
730 2. N is not an ancestor of any node in 'common'
731
731
732 The list is sorted by revision number, meaning it is
732 The list is sorted by revision number, meaning it is
733 topologically sorted.
733 topologically sorted.
734
734
735 'heads' and 'common' are both lists of revision numbers. If heads is
735 'heads' and 'common' are both lists of revision numbers. If heads is
736 not supplied, uses all of the revlog's heads. If common is not
736 not supplied, uses all of the revlog's heads. If common is not
737 supplied, uses nullid."""
737 supplied, uses nullid."""
738 if common is None:
738 if common is None:
739 common = [nullrev]
739 common = [nullrev]
740 if heads is None:
740 if heads is None:
741 heads = self.headrevs()
741 heads = self.headrevs()
742
742
743 inc = self.incrementalmissingrevs(common=common)
743 inc = self.incrementalmissingrevs(common=common)
744 return inc.missingancestors(heads)
744 return inc.missingancestors(heads)
745
745
746 def findmissing(self, common=None, heads=None):
746 def findmissing(self, common=None, heads=None):
747 """Return the ancestors of heads that are not ancestors of common.
747 """Return the ancestors of heads that are not ancestors of common.
748
748
749 More specifically, return a list of nodes N such that every N
749 More specifically, return a list of nodes N such that every N
750 satisfies the following constraints:
750 satisfies the following constraints:
751
751
752 1. N is an ancestor of some node in 'heads'
752 1. N is an ancestor of some node in 'heads'
753 2. N is not an ancestor of any node in 'common'
753 2. N is not an ancestor of any node in 'common'
754
754
755 The list is sorted by revision number, meaning it is
755 The list is sorted by revision number, meaning it is
756 topologically sorted.
756 topologically sorted.
757
757
758 'heads' and 'common' are both lists of node IDs. If heads is
758 'heads' and 'common' are both lists of node IDs. If heads is
759 not supplied, uses all of the revlog's heads. If common is not
759 not supplied, uses all of the revlog's heads. If common is not
760 supplied, uses nullid."""
760 supplied, uses nullid."""
761 if common is None:
761 if common is None:
762 common = [nullid]
762 common = [nullid]
763 if heads is None:
763 if heads is None:
764 heads = self.heads()
764 heads = self.heads()
765
765
766 common = [self.rev(n) for n in common]
766 common = [self.rev(n) for n in common]
767 heads = [self.rev(n) for n in heads]
767 heads = [self.rev(n) for n in heads]
768
768
769 inc = self.incrementalmissingrevs(common=common)
769 inc = self.incrementalmissingrevs(common=common)
770 return [self.node(r) for r in inc.missingancestors(heads)]
770 return [self.node(r) for r in inc.missingancestors(heads)]
771
771
772 def nodesbetween(self, roots=None, heads=None):
772 def nodesbetween(self, roots=None, heads=None):
773 """Return a topological path from 'roots' to 'heads'.
773 """Return a topological path from 'roots' to 'heads'.
774
774
775 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
775 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
776 topologically sorted list of all nodes N that satisfy both of
776 topologically sorted list of all nodes N that satisfy both of
777 these constraints:
777 these constraints:
778
778
779 1. N is a descendant of some node in 'roots'
779 1. N is a descendant of some node in 'roots'
780 2. N is an ancestor of some node in 'heads'
780 2. N is an ancestor of some node in 'heads'
781
781
782 Every node is considered to be both a descendant and an ancestor
782 Every node is considered to be both a descendant and an ancestor
783 of itself, so every reachable node in 'roots' and 'heads' will be
783 of itself, so every reachable node in 'roots' and 'heads' will be
784 included in 'nodes'.
784 included in 'nodes'.
785
785
786 'outroots' is the list of reachable nodes in 'roots', i.e., the
786 'outroots' is the list of reachable nodes in 'roots', i.e., the
787 subset of 'roots' that is returned in 'nodes'. Likewise,
787 subset of 'roots' that is returned in 'nodes'. Likewise,
788 'outheads' is the subset of 'heads' that is also in 'nodes'.
788 'outheads' is the subset of 'heads' that is also in 'nodes'.
789
789
790 'roots' and 'heads' are both lists of node IDs. If 'roots' is
790 'roots' and 'heads' are both lists of node IDs. If 'roots' is
791 unspecified, uses nullid as the only root. If 'heads' is
791 unspecified, uses nullid as the only root. If 'heads' is
792 unspecified, uses list of all of the revlog's heads."""
792 unspecified, uses list of all of the revlog's heads."""
793 nonodes = ([], [], [])
793 nonodes = ([], [], [])
794 if roots is not None:
794 if roots is not None:
795 roots = list(roots)
795 roots = list(roots)
796 if not roots:
796 if not roots:
797 return nonodes
797 return nonodes
798 lowestrev = min([self.rev(n) for n in roots])
798 lowestrev = min([self.rev(n) for n in roots])
799 else:
799 else:
800 roots = [nullid] # Everybody's a descendant of nullid
800 roots = [nullid] # Everybody's a descendant of nullid
801 lowestrev = nullrev
801 lowestrev = nullrev
802 if (lowestrev == nullrev) and (heads is None):
802 if (lowestrev == nullrev) and (heads is None):
803 # We want _all_ the nodes!
803 # We want _all_ the nodes!
804 return ([self.node(r) for r in self], [nullid], list(self.heads()))
804 return ([self.node(r) for r in self], [nullid], list(self.heads()))
805 if heads is None:
805 if heads is None:
806 # All nodes are ancestors, so the latest ancestor is the last
806 # All nodes are ancestors, so the latest ancestor is the last
807 # node.
807 # node.
808 highestrev = len(self) - 1
808 highestrev = len(self) - 1
809 # Set ancestors to None to signal that every node is an ancestor.
809 # Set ancestors to None to signal that every node is an ancestor.
810 ancestors = None
810 ancestors = None
811 # Set heads to an empty dictionary for later discovery of heads
811 # Set heads to an empty dictionary for later discovery of heads
812 heads = {}
812 heads = {}
813 else:
813 else:
814 heads = list(heads)
814 heads = list(heads)
815 if not heads:
815 if not heads:
816 return nonodes
816 return nonodes
817 ancestors = set()
817 ancestors = set()
818 # Turn heads into a dictionary so we can remove 'fake' heads.
818 # Turn heads into a dictionary so we can remove 'fake' heads.
819 # Also, later we will be using it to filter out the heads we can't
819 # Also, later we will be using it to filter out the heads we can't
820 # find from roots.
820 # find from roots.
821 heads = dict.fromkeys(heads, False)
821 heads = dict.fromkeys(heads, False)
822 # Start at the top and keep marking parents until we're done.
822 # Start at the top and keep marking parents until we're done.
823 nodestotag = set(heads)
823 nodestotag = set(heads)
824 # Remember where the top was so we can use it as a limit later.
824 # Remember where the top was so we can use it as a limit later.
825 highestrev = max([self.rev(n) for n in nodestotag])
825 highestrev = max([self.rev(n) for n in nodestotag])
826 while nodestotag:
826 while nodestotag:
827 # grab a node to tag
827 # grab a node to tag
828 n = nodestotag.pop()
828 n = nodestotag.pop()
829 # Never tag nullid
829 # Never tag nullid
830 if n == nullid:
830 if n == nullid:
831 continue
831 continue
832 # A node's revision number represents its place in a
832 # A node's revision number represents its place in a
833 # topologically sorted list of nodes.
833 # topologically sorted list of nodes.
834 r = self.rev(n)
834 r = self.rev(n)
835 if r >= lowestrev:
835 if r >= lowestrev:
836 if n not in ancestors:
836 if n not in ancestors:
837 # If we are possibly a descendant of one of the roots
837 # If we are possibly a descendant of one of the roots
838 # and we haven't already been marked as an ancestor
838 # and we haven't already been marked as an ancestor
839 ancestors.add(n) # Mark as ancestor
839 ancestors.add(n) # Mark as ancestor
840 # Add non-nullid parents to list of nodes to tag.
840 # Add non-nullid parents to list of nodes to tag.
841 nodestotag.update([p for p in self.parents(n) if
841 nodestotag.update([p for p in self.parents(n) if
842 p != nullid])
842 p != nullid])
843 elif n in heads: # We've seen it before, is it a fake head?
843 elif n in heads: # We've seen it before, is it a fake head?
844 # So it is, real heads should not be the ancestors of
844 # So it is, real heads should not be the ancestors of
845 # any other heads.
845 # any other heads.
846 heads.pop(n)
846 heads.pop(n)
847 if not ancestors:
847 if not ancestors:
848 return nonodes
848 return nonodes
849 # Now that we have our set of ancestors, we want to remove any
849 # Now that we have our set of ancestors, we want to remove any
850 # roots that are not ancestors.
850 # roots that are not ancestors.
851
851
852 # If one of the roots was nullid, everything is included anyway.
852 # If one of the roots was nullid, everything is included anyway.
853 if lowestrev > nullrev:
853 if lowestrev > nullrev:
854 # But, since we weren't, let's recompute the lowest rev to not
854 # But, since we weren't, let's recompute the lowest rev to not
855 # include roots that aren't ancestors.
855 # include roots that aren't ancestors.
856
856
857 # Filter out roots that aren't ancestors of heads
857 # Filter out roots that aren't ancestors of heads
858 roots = [root for root in roots if root in ancestors]
858 roots = [root for root in roots if root in ancestors]
859 # Recompute the lowest revision
859 # Recompute the lowest revision
860 if roots:
860 if roots:
861 lowestrev = min([self.rev(root) for root in roots])
861 lowestrev = min([self.rev(root) for root in roots])
862 else:
862 else:
863 # No more roots? Return empty list
863 # No more roots? Return empty list
864 return nonodes
864 return nonodes
865 else:
865 else:
866 # We are descending from nullid, and don't need to care about
866 # We are descending from nullid, and don't need to care about
867 # any other roots.
867 # any other roots.
868 lowestrev = nullrev
868 lowestrev = nullrev
869 roots = [nullid]
869 roots = [nullid]
870 # Transform our roots list into a set.
870 # Transform our roots list into a set.
871 descendants = set(roots)
871 descendants = set(roots)
872 # Also, keep the original roots so we can filter out roots that aren't
872 # Also, keep the original roots so we can filter out roots that aren't
873 # 'real' roots (i.e. are descended from other roots).
873 # 'real' roots (i.e. are descended from other roots).
874 roots = descendants.copy()
874 roots = descendants.copy()
875 # Our topologically sorted list of output nodes.
875 # Our topologically sorted list of output nodes.
876 orderedout = []
876 orderedout = []
877 # Don't start at nullid since we don't want nullid in our output list,
877 # Don't start at nullid since we don't want nullid in our output list,
878 # and if nullid shows up in descendants, empty parents will look like
878 # and if nullid shows up in descendants, empty parents will look like
879 # they're descendants.
879 # they're descendants.
880 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
880 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
881 n = self.node(r)
881 n = self.node(r)
882 isdescendant = False
882 isdescendant = False
883 if lowestrev == nullrev: # Everybody is a descendant of nullid
883 if lowestrev == nullrev: # Everybody is a descendant of nullid
884 isdescendant = True
884 isdescendant = True
885 elif n in descendants:
885 elif n in descendants:
886 # n is already a descendant
886 # n is already a descendant
887 isdescendant = True
887 isdescendant = True
888 # This check only needs to be done here because all the roots
888 # This check only needs to be done here because all the roots
889 # will start being marked is descendants before the loop.
889 # will start being marked is descendants before the loop.
890 if n in roots:
890 if n in roots:
891 # If n was a root, check if it's a 'real' root.
891 # If n was a root, check if it's a 'real' root.
892 p = tuple(self.parents(n))
892 p = tuple(self.parents(n))
893 # If any of its parents are descendants, it's not a root.
893 # If any of its parents are descendants, it's not a root.
894 if (p[0] in descendants) or (p[1] in descendants):
894 if (p[0] in descendants) or (p[1] in descendants):
895 roots.remove(n)
895 roots.remove(n)
896 else:
896 else:
897 p = tuple(self.parents(n))
897 p = tuple(self.parents(n))
898 # A node is a descendant if either of its parents are
898 # A node is a descendant if either of its parents are
899 # descendants. (We seeded the dependents list with the roots
899 # descendants. (We seeded the dependents list with the roots
900 # up there, remember?)
900 # up there, remember?)
901 if (p[0] in descendants) or (p[1] in descendants):
901 if (p[0] in descendants) or (p[1] in descendants):
902 descendants.add(n)
902 descendants.add(n)
903 isdescendant = True
903 isdescendant = True
904 if isdescendant and ((ancestors is None) or (n in ancestors)):
904 if isdescendant and ((ancestors is None) or (n in ancestors)):
905 # Only include nodes that are both descendants and ancestors.
905 # Only include nodes that are both descendants and ancestors.
906 orderedout.append(n)
906 orderedout.append(n)
907 if (ancestors is not None) and (n in heads):
907 if (ancestors is not None) and (n in heads):
908 # We're trying to figure out which heads are reachable
908 # We're trying to figure out which heads are reachable
909 # from roots.
909 # from roots.
910 # Mark this head as having been reached
910 # Mark this head as having been reached
911 heads[n] = True
911 heads[n] = True
912 elif ancestors is None:
912 elif ancestors is None:
913 # Otherwise, we're trying to discover the heads.
913 # Otherwise, we're trying to discover the heads.
914 # Assume this is a head because if it isn't, the next step
914 # Assume this is a head because if it isn't, the next step
915 # will eventually remove it.
915 # will eventually remove it.
916 heads[n] = True
916 heads[n] = True
917 # But, obviously its parents aren't.
917 # But, obviously its parents aren't.
918 for p in self.parents(n):
918 for p in self.parents(n):
919 heads.pop(p, None)
919 heads.pop(p, None)
920 heads = [head for head, flag in heads.iteritems() if flag]
920 heads = [head for head, flag in heads.iteritems() if flag]
921 roots = list(roots)
921 roots = list(roots)
922 assert orderedout
922 assert orderedout
923 assert roots
923 assert roots
924 assert heads
924 assert heads
925 return (orderedout, roots, heads)
925 return (orderedout, roots, heads)
926
926
927 def headrevs(self):
927 def headrevs(self):
928 try:
928 try:
929 return self.index.headrevs()
929 return self.index.headrevs()
930 except AttributeError:
930 except AttributeError:
931 return self._headrevs()
931 return self._headrevs()
932
932
933 def computephases(self, roots):
933 def computephases(self, roots):
934 return self.index.computephasesmapsets(roots)
934 return self.index.computephasesmapsets(roots)
935
935
936 def _headrevs(self):
936 def _headrevs(self):
937 count = len(self)
937 count = len(self)
938 if not count:
938 if not count:
939 return [nullrev]
939 return [nullrev]
940 # we won't iter over filtered rev so nobody is a head at start
940 # we won't iter over filtered rev so nobody is a head at start
941 ishead = [0] * (count + 1)
941 ishead = [0] * (count + 1)
942 index = self.index
942 index = self.index
943 for r in self:
943 for r in self:
944 ishead[r] = 1 # I may be an head
944 ishead[r] = 1 # I may be an head
945 e = index[r]
945 e = index[r]
946 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
946 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
947 return [r for r, val in enumerate(ishead) if val]
947 return [r for r, val in enumerate(ishead) if val]
948
948
949 def heads(self, start=None, stop=None):
949 def heads(self, start=None, stop=None):
950 """return the list of all nodes that have no children
950 """return the list of all nodes that have no children
951
951
952 if start is specified, only heads that are descendants of
952 if start is specified, only heads that are descendants of
953 start will be returned
953 start will be returned
954 if stop is specified, it will consider all the revs from stop
954 if stop is specified, it will consider all the revs from stop
955 as if they had no children
955 as if they had no children
956 """
956 """
957 if start is None and stop is None:
957 if start is None and stop is None:
958 if not len(self):
958 if not len(self):
959 return [nullid]
959 return [nullid]
960 return [self.node(r) for r in self.headrevs()]
960 return [self.node(r) for r in self.headrevs()]
961
961
962 if start is None:
962 if start is None:
963 start = nullid
963 start = nullid
964 if stop is None:
964 if stop is None:
965 stop = []
965 stop = []
966 stoprevs = set([self.rev(n) for n in stop])
966 stoprevs = set([self.rev(n) for n in stop])
967 startrev = self.rev(start)
967 startrev = self.rev(start)
968 reachable = {startrev}
968 reachable = {startrev}
969 heads = {startrev}
969 heads = {startrev}
970
970
971 parentrevs = self.parentrevs
971 parentrevs = self.parentrevs
972 for r in self.revs(start=startrev + 1):
972 for r in self.revs(start=startrev + 1):
973 for p in parentrevs(r):
973 for p in parentrevs(r):
974 if p in reachable:
974 if p in reachable:
975 if r not in stoprevs:
975 if r not in stoprevs:
976 reachable.add(r)
976 reachable.add(r)
977 heads.add(r)
977 heads.add(r)
978 if p in heads and p not in stoprevs:
978 if p in heads and p not in stoprevs:
979 heads.remove(p)
979 heads.remove(p)
980
980
981 return [self.node(r) for r in heads]
981 return [self.node(r) for r in heads]
982
982
983 def children(self, node):
983 def children(self, node):
984 """find the children of a given node"""
984 """find the children of a given node"""
985 c = []
985 c = []
986 p = self.rev(node)
986 p = self.rev(node)
987 for r in self.revs(start=p + 1):
987 for r in self.revs(start=p + 1):
988 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
988 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
989 if prevs:
989 if prevs:
990 for pr in prevs:
990 for pr in prevs:
991 if pr == p:
991 if pr == p:
992 c.append(self.node(r))
992 c.append(self.node(r))
993 elif p == nullrev:
993 elif p == nullrev:
994 c.append(self.node(r))
994 c.append(self.node(r))
995 return c
995 return c
996
996
997 def descendant(self, start, end):
997 def descendant(self, start, end):
998 if start == nullrev:
998 if start == nullrev:
999 return True
999 return True
1000 for i in self.descendants([start]):
1000 for i in self.descendants([start]):
1001 if i == end:
1001 if i == end:
1002 return True
1002 return True
1003 elif i > end:
1003 elif i > end:
1004 break
1004 break
1005 return False
1005 return False
1006
1006
1007 def commonancestorsheads(self, a, b):
1007 def commonancestorsheads(self, a, b):
1008 """calculate all the heads of the common ancestors of nodes a and b"""
1008 """calculate all the heads of the common ancestors of nodes a and b"""
1009 a, b = self.rev(a), self.rev(b)
1009 a, b = self.rev(a), self.rev(b)
1010 try:
1010 try:
1011 ancs = self.index.commonancestorsheads(a, b)
1011 ancs = self.index.commonancestorsheads(a, b)
1012 except (AttributeError, OverflowError): # C implementation failed
1012 except (AttributeError, OverflowError): # C implementation failed
1013 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1013 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1014 return pycompat.maplist(self.node, ancs)
1014 return pycompat.maplist(self.node, ancs)
1015
1015
1016 def isancestor(self, a, b):
1016 def isancestor(self, a, b):
1017 """return True if node a is an ancestor of node b
1017 """return True if node a is an ancestor of node b
1018
1018
1019 The implementation of this is trivial but the use of
1019 The implementation of this is trivial but the use of
1020 commonancestorsheads is not."""
1020 commonancestorsheads is not."""
1021 return a in self.commonancestorsheads(a, b)
1021 return a in self.commonancestorsheads(a, b)
1022
1022
1023 def ancestor(self, a, b):
1023 def ancestor(self, a, b):
1024 """calculate the "best" common ancestor of nodes a and b"""
1024 """calculate the "best" common ancestor of nodes a and b"""
1025
1025
1026 a, b = self.rev(a), self.rev(b)
1026 a, b = self.rev(a), self.rev(b)
1027 try:
1027 try:
1028 ancs = self.index.ancestors(a, b)
1028 ancs = self.index.ancestors(a, b)
1029 except (AttributeError, OverflowError):
1029 except (AttributeError, OverflowError):
1030 ancs = ancestor.ancestors(self.parentrevs, a, b)
1030 ancs = ancestor.ancestors(self.parentrevs, a, b)
1031 if ancs:
1031 if ancs:
1032 # choose a consistent winner when there's a tie
1032 # choose a consistent winner when there's a tie
1033 return min(map(self.node, ancs))
1033 return min(map(self.node, ancs))
1034 return nullid
1034 return nullid
1035
1035
1036 def _match(self, id):
1036 def _match(self, id):
1037 if isinstance(id, int):
1037 if isinstance(id, int):
1038 # rev
1038 # rev
1039 return self.node(id)
1039 return self.node(id)
1040 if len(id) == 20:
1040 if len(id) == 20:
1041 # possibly a binary node
1041 # possibly a binary node
1042 # odds of a binary node being all hex in ASCII are 1 in 10**25
1042 # odds of a binary node being all hex in ASCII are 1 in 10**25
1043 try:
1043 try:
1044 node = id
1044 node = id
1045 self.rev(node) # quick search the index
1045 self.rev(node) # quick search the index
1046 return node
1046 return node
1047 except LookupError:
1047 except LookupError:
1048 pass # may be partial hex id
1048 pass # may be partial hex id
1049 try:
1049 try:
1050 # str(rev)
1050 # str(rev)
1051 rev = int(id)
1051 rev = int(id)
1052 if str(rev) != id:
1052 if str(rev) != id:
1053 raise ValueError
1053 raise ValueError
1054 if rev < 0:
1054 if rev < 0:
1055 rev = len(self) + rev
1055 rev = len(self) + rev
1056 if rev < 0 or rev >= len(self):
1056 if rev < 0 or rev >= len(self):
1057 raise ValueError
1057 raise ValueError
1058 return self.node(rev)
1058 return self.node(rev)
1059 except (ValueError, OverflowError):
1059 except (ValueError, OverflowError):
1060 pass
1060 pass
1061 if len(id) == 40:
1061 if len(id) == 40:
1062 try:
1062 try:
1063 # a full hex nodeid?
1063 # a full hex nodeid?
1064 node = bin(id)
1064 node = bin(id)
1065 self.rev(node)
1065 self.rev(node)
1066 return node
1066 return node
1067 except (TypeError, LookupError):
1067 except (TypeError, LookupError):
1068 pass
1068 pass
1069
1069
1070 def _partialmatch(self, id):
1070 def _partialmatch(self, id):
1071 maybewdir = wdirhex.startswith(id)
1071 maybewdir = wdirhex.startswith(id)
1072 try:
1072 try:
1073 partial = self.index.partialmatch(id)
1073 partial = self.index.partialmatch(id)
1074 if partial and self.hasnode(partial):
1074 if partial and self.hasnode(partial):
1075 if maybewdir:
1075 if maybewdir:
1076 # single 'ff...' match in radix tree, ambiguous with wdir
1076 # single 'ff...' match in radix tree, ambiguous with wdir
1077 raise RevlogError
1077 raise RevlogError
1078 return partial
1078 return partial
1079 if maybewdir:
1079 if maybewdir:
1080 # no 'ff...' match in radix tree, wdir identified
1080 # no 'ff...' match in radix tree, wdir identified
1081 raise error.WdirUnsupported
1081 raise error.WdirUnsupported
1082 return None
1082 return None
1083 except RevlogError:
1083 except RevlogError:
1084 # parsers.c radix tree lookup gave multiple matches
1084 # parsers.c radix tree lookup gave multiple matches
1085 # fast path: for unfiltered changelog, radix tree is accurate
1085 # fast path: for unfiltered changelog, radix tree is accurate
1086 if not getattr(self, 'filteredrevs', None):
1086 if not getattr(self, 'filteredrevs', None):
1087 raise LookupError(id, self.indexfile,
1087 raise LookupError(id, self.indexfile,
1088 _('ambiguous identifier'))
1088 _('ambiguous identifier'))
1089 # fall through to slow path that filters hidden revisions
1089 # fall through to slow path that filters hidden revisions
1090 except (AttributeError, ValueError):
1090 except (AttributeError, ValueError):
1091 # we are pure python, or key was too short to search radix tree
1091 # we are pure python, or key was too short to search radix tree
1092 pass
1092 pass
1093
1093
1094 if id in self._pcache:
1094 if id in self._pcache:
1095 return self._pcache[id]
1095 return self._pcache[id]
1096
1096
1097 if len(id) < 40:
1097 if len(id) < 40:
1098 try:
1098 try:
1099 # hex(node)[:...]
1099 # hex(node)[:...]
1100 l = len(id) // 2 # grab an even number of digits
1100 l = len(id) // 2 # grab an even number of digits
1101 prefix = bin(id[:l * 2])
1101 prefix = bin(id[:l * 2])
1102 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1102 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1103 nl = [n for n in nl if hex(n).startswith(id) and
1103 nl = [n for n in nl if hex(n).startswith(id) and
1104 self.hasnode(n)]
1104 self.hasnode(n)]
1105 if len(nl) > 0:
1105 if len(nl) > 0:
1106 if len(nl) == 1 and not maybewdir:
1106 if len(nl) == 1 and not maybewdir:
1107 self._pcache[id] = nl[0]
1107 self._pcache[id] = nl[0]
1108 return nl[0]
1108 return nl[0]
1109 raise LookupError(id, self.indexfile,
1109 raise LookupError(id, self.indexfile,
1110 _('ambiguous identifier'))
1110 _('ambiguous identifier'))
1111 if maybewdir:
1111 if maybewdir:
1112 raise error.WdirUnsupported
1112 raise error.WdirUnsupported
1113 return None
1113 return None
1114 except (TypeError, binascii.Error):
1114 except (TypeError, binascii.Error):
1115 pass
1115 pass
1116
1116
1117 def lookup(self, id):
1117 def lookup(self, id):
1118 """locate a node based on:
1118 """locate a node based on:
1119 - revision number or str(revision number)
1119 - revision number or str(revision number)
1120 - nodeid or subset of hex nodeid
1120 - nodeid or subset of hex nodeid
1121 """
1121 """
1122 n = self._match(id)
1122 n = self._match(id)
1123 if n is not None:
1123 if n is not None:
1124 return n
1124 return n
1125 n = self._partialmatch(id)
1125 n = self._partialmatch(id)
1126 if n:
1126 if n:
1127 return n
1127 return n
1128
1128
1129 raise LookupError(id, self.indexfile, _('no match found'))
1129 raise LookupError(id, self.indexfile, _('no match found'))
1130
1130
1131 def cmp(self, node, text):
1131 def cmp(self, node, text):
1132 """compare text with a given file revision
1132 """compare text with a given file revision
1133
1133
1134 returns True if text is different than what is stored.
1134 returns True if text is different than what is stored.
1135 """
1135 """
1136 p1, p2 = self.parents(node)
1136 p1, p2 = self.parents(node)
1137 return hash(text, p1, p2) != node
1137 return hash(text, p1, p2) != node
1138
1138
1139 def _cachesegment(self, offset, data):
1139 def _cachesegment(self, offset, data):
1140 """Add a segment to the revlog cache.
1140 """Add a segment to the revlog cache.
1141
1141
1142 Accepts an absolute offset and the data that is at that location.
1142 Accepts an absolute offset and the data that is at that location.
1143 """
1143 """
1144 o, d = self._chunkcache
1144 o, d = self._chunkcache
1145 # try to add to existing cache
1145 # try to add to existing cache
1146 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1146 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1147 self._chunkcache = o, d + data
1147 self._chunkcache = o, d + data
1148 else:
1148 else:
1149 self._chunkcache = offset, data
1149 self._chunkcache = offset, data
1150
1150
1151 def _readsegment(self, offset, length, df=None):
1151 def _readsegment(self, offset, length, df=None):
1152 """Load a segment of raw data from the revlog.
1152 """Load a segment of raw data from the revlog.
1153
1153
1154 Accepts an absolute offset, length to read, and an optional existing
1154 Accepts an absolute offset, length to read, and an optional existing
1155 file handle to read from.
1155 file handle to read from.
1156
1156
1157 If an existing file handle is passed, it will be seeked and the
1157 If an existing file handle is passed, it will be seeked and the
1158 original seek position will NOT be restored.
1158 original seek position will NOT be restored.
1159
1159
1160 Returns a str or buffer of raw byte data.
1160 Returns a str or buffer of raw byte data.
1161 """
1161 """
1162 if df is not None:
1162 if df is not None:
1163 closehandle = False
1163 closehandle = False
1164 else:
1164 else:
1165 if self._inline:
1165 if self._inline:
1166 df = self.opener(self.indexfile)
1166 df = self.opener(self.indexfile)
1167 else:
1167 else:
1168 df = self.opener(self.datafile)
1168 df = self.opener(self.datafile)
1169 closehandle = True
1169 closehandle = True
1170
1170
1171 # Cache data both forward and backward around the requested
1171 # Cache data both forward and backward around the requested
1172 # data, in a fixed size window. This helps speed up operations
1172 # data, in a fixed size window. This helps speed up operations
1173 # involving reading the revlog backwards.
1173 # involving reading the revlog backwards.
1174 cachesize = self._chunkcachesize
1174 cachesize = self._chunkcachesize
1175 realoffset = offset & ~(cachesize - 1)
1175 realoffset = offset & ~(cachesize - 1)
1176 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1176 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1177 - realoffset)
1177 - realoffset)
1178 df.seek(realoffset)
1178 df.seek(realoffset)
1179 d = df.read(reallength)
1179 d = df.read(reallength)
1180 if closehandle:
1180 if closehandle:
1181 df.close()
1181 df.close()
1182 self._cachesegment(realoffset, d)
1182 self._cachesegment(realoffset, d)
1183 if offset != realoffset or reallength != length:
1183 if offset != realoffset or reallength != length:
1184 return util.buffer(d, offset - realoffset, length)
1184 return util.buffer(d, offset - realoffset, length)
1185 return d
1185 return d
1186
1186
1187 def _getsegment(self, offset, length, df=None):
1187 def _getsegment(self, offset, length, df=None):
1188 """Obtain a segment of raw data from the revlog.
1188 """Obtain a segment of raw data from the revlog.
1189
1189
1190 Accepts an absolute offset, length of bytes to obtain, and an
1190 Accepts an absolute offset, length of bytes to obtain, and an
1191 optional file handle to the already-opened revlog. If the file
1191 optional file handle to the already-opened revlog. If the file
1192 handle is used, it's original seek position will not be preserved.
1192 handle is used, it's original seek position will not be preserved.
1193
1193
1194 Requests for data may be returned from a cache.
1194 Requests for data may be returned from a cache.
1195
1195
1196 Returns a str or a buffer instance of raw byte data.
1196 Returns a str or a buffer instance of raw byte data.
1197 """
1197 """
1198 o, d = self._chunkcache
1198 o, d = self._chunkcache
1199 l = len(d)
1199 l = len(d)
1200
1200
1201 # is it in the cache?
1201 # is it in the cache?
1202 cachestart = offset - o
1202 cachestart = offset - o
1203 cacheend = cachestart + length
1203 cacheend = cachestart + length
1204 if cachestart >= 0 and cacheend <= l:
1204 if cachestart >= 0 and cacheend <= l:
1205 if cachestart == 0 and cacheend == l:
1205 if cachestart == 0 and cacheend == l:
1206 return d # avoid a copy
1206 return d # avoid a copy
1207 return util.buffer(d, cachestart, cacheend - cachestart)
1207 return util.buffer(d, cachestart, cacheend - cachestart)
1208
1208
1209 return self._readsegment(offset, length, df=df)
1209 return self._readsegment(offset, length, df=df)
1210
1210
1211 def _getsegmentforrevs(self, startrev, endrev, df=None):
1211 def _getsegmentforrevs(self, startrev, endrev, df=None):
1212 """Obtain a segment of raw data corresponding to a range of revisions.
1212 """Obtain a segment of raw data corresponding to a range of revisions.
1213
1213
1214 Accepts the start and end revisions and an optional already-open
1214 Accepts the start and end revisions and an optional already-open
1215 file handle to be used for reading. If the file handle is read, its
1215 file handle to be used for reading. If the file handle is read, its
1216 seek position will not be preserved.
1216 seek position will not be preserved.
1217
1217
1218 Requests for data may be satisfied by a cache.
1218 Requests for data may be satisfied by a cache.
1219
1219
1220 Returns a 2-tuple of (offset, data) for the requested range of
1220 Returns a 2-tuple of (offset, data) for the requested range of
1221 revisions. Offset is the integer offset from the beginning of the
1221 revisions. Offset is the integer offset from the beginning of the
1222 revlog and data is a str or buffer of the raw byte data.
1222 revlog and data is a str or buffer of the raw byte data.
1223
1223
1224 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1224 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1225 to determine where each revision's data begins and ends.
1225 to determine where each revision's data begins and ends.
1226 """
1226 """
1227 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1227 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1228 # (functions are expensive).
1228 # (functions are expensive).
1229 index = self.index
1229 index = self.index
1230 istart = index[startrev]
1230 istart = index[startrev]
1231 start = int(istart[0] >> 16)
1231 start = int(istart[0] >> 16)
1232 if startrev == endrev:
1232 if startrev == endrev:
1233 end = start + istart[1]
1233 end = start + istart[1]
1234 else:
1234 else:
1235 iend = index[endrev]
1235 iend = index[endrev]
1236 end = int(iend[0] >> 16) + iend[1]
1236 end = int(iend[0] >> 16) + iend[1]
1237
1237
1238 if self._inline:
1238 if self._inline:
1239 start += (startrev + 1) * self._io.size
1239 start += (startrev + 1) * self._io.size
1240 end += (endrev + 1) * self._io.size
1240 end += (endrev + 1) * self._io.size
1241 length = end - start
1241 length = end - start
1242
1242
1243 return start, self._getsegment(start, length, df=df)
1243 return start, self._getsegment(start, length, df=df)
1244
1244
1245 def _chunk(self, rev, df=None):
1245 def _chunk(self, rev, df=None):
1246 """Obtain a single decompressed chunk for a revision.
1246 """Obtain a single decompressed chunk for a revision.
1247
1247
1248 Accepts an integer revision and an optional already-open file handle
1248 Accepts an integer revision and an optional already-open file handle
1249 to be used for reading. If used, the seek position of the file will not
1249 to be used for reading. If used, the seek position of the file will not
1250 be preserved.
1250 be preserved.
1251
1251
1252 Returns a str holding uncompressed data for the requested revision.
1252 Returns a str holding uncompressed data for the requested revision.
1253 """
1253 """
1254 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1254 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1255
1255
1256 def _chunks(self, revs, df=None):
1256 def _chunks(self, revs, df=None):
1257 """Obtain decompressed chunks for the specified revisions.
1257 """Obtain decompressed chunks for the specified revisions.
1258
1258
1259 Accepts an iterable of numeric revisions that are assumed to be in
1259 Accepts an iterable of numeric revisions that are assumed to be in
1260 ascending order. Also accepts an optional already-open file handle
1260 ascending order. Also accepts an optional already-open file handle
1261 to be used for reading. If used, the seek position of the file will
1261 to be used for reading. If used, the seek position of the file will
1262 not be preserved.
1262 not be preserved.
1263
1263
1264 This function is similar to calling ``self._chunk()`` multiple times,
1264 This function is similar to calling ``self._chunk()`` multiple times,
1265 but is faster.
1265 but is faster.
1266
1266
1267 Returns a list with decompressed data for each requested revision.
1267 Returns a list with decompressed data for each requested revision.
1268 """
1268 """
1269 if not revs:
1269 if not revs:
1270 return []
1270 return []
1271 start = self.start
1271 start = self.start
1272 length = self.length
1272 length = self.length
1273 inline = self._inline
1273 inline = self._inline
1274 iosize = self._io.size
1274 iosize = self._io.size
1275 buffer = util.buffer
1275 buffer = util.buffer
1276
1276
1277 l = []
1277 l = []
1278 ladd = l.append
1278 ladd = l.append
1279
1279
1280 try:
1280 try:
1281 offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)
1281 offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)
1282 except OverflowError:
1282 except OverflowError:
1283 # issue4215 - we can't cache a run of chunks greater than
1283 # issue4215 - we can't cache a run of chunks greater than
1284 # 2G on Windows
1284 # 2G on Windows
1285 return [self._chunk(rev, df=df) for rev in revs]
1285 return [self._chunk(rev, df=df) for rev in revs]
1286
1286
1287 decomp = self.decompress
1287 decomp = self.decompress
1288 for rev in revs:
1288 for rev in revs:
1289 chunkstart = start(rev)
1289 chunkstart = start(rev)
1290 if inline:
1290 if inline:
1291 chunkstart += (rev + 1) * iosize
1291 chunkstart += (rev + 1) * iosize
1292 chunklength = length(rev)
1292 chunklength = length(rev)
1293 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1293 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1294
1294
1295 return l
1295 return l
1296
1296
1297 def _chunkclear(self):
1297 def _chunkclear(self):
1298 """Clear the raw chunk cache."""
1298 """Clear the raw chunk cache."""
1299 self._chunkcache = (0, '')
1299 self._chunkcache = (0, '')
1300
1300
1301 def deltaparent(self, rev):
1301 def deltaparent(self, rev):
1302 """return deltaparent of the given revision"""
1302 """return deltaparent of the given revision"""
1303 base = self.index[rev][3]
1303 base = self.index[rev][3]
1304 if base == rev:
1304 if base == rev:
1305 return nullrev
1305 return nullrev
1306 elif self._generaldelta:
1306 elif self._generaldelta:
1307 return base
1307 return base
1308 else:
1308 else:
1309 return rev - 1
1309 return rev - 1
1310
1310
1311 def revdiff(self, rev1, rev2):
1311 def revdiff(self, rev1, rev2):
1312 """return or calculate a delta between two revisions
1312 """return or calculate a delta between two revisions
1313
1313
1314 The delta calculated is in binary form and is intended to be written to
1314 The delta calculated is in binary form and is intended to be written to
1315 revlog data directly. So this function needs raw revision data.
1315 revlog data directly. So this function needs raw revision data.
1316 """
1316 """
1317 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1317 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1318 return bytes(self._chunk(rev2))
1318 return bytes(self._chunk(rev2))
1319
1319
1320 return mdiff.textdiff(self.revision(rev1, raw=True),
1320 return mdiff.textdiff(self.revision(rev1, raw=True),
1321 self.revision(rev2, raw=True))
1321 self.revision(rev2, raw=True))
1322
1322
1323 def revision(self, nodeorrev, _df=None, raw=False):
1323 def revision(self, nodeorrev, _df=None, raw=False):
1324 """return an uncompressed revision of a given node or revision
1324 """return an uncompressed revision of a given node or revision
1325 number.
1325 number.
1326
1326
1327 _df - an existing file handle to read from. (internal-only)
1327 _df - an existing file handle to read from. (internal-only)
1328 raw - an optional argument specifying if the revision data is to be
1328 raw - an optional argument specifying if the revision data is to be
1329 treated as raw data when applying flag transforms. 'raw' should be set
1329 treated as raw data when applying flag transforms. 'raw' should be set
1330 to True when generating changegroups or in debug commands.
1330 to True when generating changegroups or in debug commands.
1331 """
1331 """
1332 if isinstance(nodeorrev, int):
1332 if isinstance(nodeorrev, int):
1333 rev = nodeorrev
1333 rev = nodeorrev
1334 node = self.node(rev)
1334 node = self.node(rev)
1335 else:
1335 else:
1336 node = nodeorrev
1336 node = nodeorrev
1337 rev = None
1337 rev = None
1338
1338
1339 cachedrev = None
1339 cachedrev = None
1340 flags = None
1340 flags = None
1341 rawtext = None
1341 rawtext = None
1342 if node == nullid:
1342 if node == nullid:
1343 return ""
1343 return ""
1344 if self._cache:
1344 if self._cache:
1345 if self._cache[0] == node:
1345 if self._cache[0] == node:
1346 # _cache only stores rawtext
1346 # _cache only stores rawtext
1347 if raw:
1347 if raw:
1348 return self._cache[2]
1348 return self._cache[2]
1349 # duplicated, but good for perf
1349 # duplicated, but good for perf
1350 if rev is None:
1350 if rev is None:
1351 rev = self.rev(node)
1351 rev = self.rev(node)
1352 if flags is None:
1352 if flags is None:
1353 flags = self.flags(rev)
1353 flags = self.flags(rev)
1354 # no extra flags set, no flag processor runs, text = rawtext
1354 # no extra flags set, no flag processor runs, text = rawtext
1355 if flags == REVIDX_DEFAULT_FLAGS:
1355 if flags == REVIDX_DEFAULT_FLAGS:
1356 return self._cache[2]
1356 return self._cache[2]
1357 # rawtext is reusable. need to run flag processor
1357 # rawtext is reusable. need to run flag processor
1358 rawtext = self._cache[2]
1358 rawtext = self._cache[2]
1359
1359
1360 cachedrev = self._cache[1]
1360 cachedrev = self._cache[1]
1361
1361
1362 # look up what we need to read
1362 # look up what we need to read
1363 if rawtext is None:
1363 if rawtext is None:
1364 if rev is None:
1364 if rev is None:
1365 rev = self.rev(node)
1365 rev = self.rev(node)
1366
1366
1367 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1367 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1368 if stopped:
1368 if stopped:
1369 rawtext = self._cache[2]
1369 rawtext = self._cache[2]
1370
1370
1371 # drop cache to save memory
1371 # drop cache to save memory
1372 self._cache = None
1372 self._cache = None
1373
1373
1374 bins = self._chunks(chain, df=_df)
1374 bins = self._chunks(chain, df=_df)
1375 if rawtext is None:
1375 if rawtext is None:
1376 rawtext = bytes(bins[0])
1376 rawtext = bytes(bins[0])
1377 bins = bins[1:]
1377 bins = bins[1:]
1378
1378
1379 rawtext = mdiff.patches(rawtext, bins)
1379 rawtext = mdiff.patches(rawtext, bins)
1380 self._cache = (node, rev, rawtext)
1380 self._cache = (node, rev, rawtext)
1381
1381
1382 if flags is None:
1382 if flags is None:
1383 if rev is None:
1383 if rev is None:
1384 rev = self.rev(node)
1384 rev = self.rev(node)
1385 flags = self.flags(rev)
1385 flags = self.flags(rev)
1386
1386
1387 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1387 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1388 if validatehash:
1388 if validatehash:
1389 self.checkhash(text, node, rev=rev)
1389 self.checkhash(text, node, rev=rev)
1390
1390
1391 return text
1391 return text
1392
1392
1393 def hash(self, text, p1, p2):
1393 def hash(self, text, p1, p2):
1394 """Compute a node hash.
1394 """Compute a node hash.
1395
1395
1396 Available as a function so that subclasses can replace the hash
1396 Available as a function so that subclasses can replace the hash
1397 as needed.
1397 as needed.
1398 """
1398 """
1399 return hash(text, p1, p2)
1399 return hash(text, p1, p2)
1400
1400
1401 def _processflags(self, text, flags, operation, raw=False):
1401 def _processflags(self, text, flags, operation, raw=False):
1402 """Inspect revision data flags and applies transforms defined by
1402 """Inspect revision data flags and applies transforms defined by
1403 registered flag processors.
1403 registered flag processors.
1404
1404
1405 ``text`` - the revision data to process
1405 ``text`` - the revision data to process
1406 ``flags`` - the revision flags
1406 ``flags`` - the revision flags
1407 ``operation`` - the operation being performed (read or write)
1407 ``operation`` - the operation being performed (read or write)
1408 ``raw`` - an optional argument describing if the raw transform should be
1408 ``raw`` - an optional argument describing if the raw transform should be
1409 applied.
1409 applied.
1410
1410
1411 This method processes the flags in the order (or reverse order if
1411 This method processes the flags in the order (or reverse order if
1412 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1412 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1413 flag processors registered for present flags. The order of flags defined
1413 flag processors registered for present flags. The order of flags defined
1414 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1414 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1415
1415
1416 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1416 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1417 processed text and ``validatehash`` is a bool indicating whether the
1417 processed text and ``validatehash`` is a bool indicating whether the
1418 returned text should be checked for hash integrity.
1418 returned text should be checked for hash integrity.
1419
1419
1420 Note: If the ``raw`` argument is set, it has precedence over the
1420 Note: If the ``raw`` argument is set, it has precedence over the
1421 operation and will only update the value of ``validatehash``.
1421 operation and will only update the value of ``validatehash``.
1422 """
1422 """
1423 # fast path: no flag processors will run
1423 # fast path: no flag processors will run
1424 if flags == 0:
1424 if flags == 0:
1425 return text, True
1425 return text, True
1426 if not operation in ('read', 'write'):
1426 if not operation in ('read', 'write'):
1427 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1427 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1428 # Check all flags are known.
1428 # Check all flags are known.
1429 if flags & ~REVIDX_KNOWN_FLAGS:
1429 if flags & ~REVIDX_KNOWN_FLAGS:
1430 raise RevlogError(_("incompatible revision flag '%#x'") %
1430 raise RevlogError(_("incompatible revision flag '%#x'") %
1431 (flags & ~REVIDX_KNOWN_FLAGS))
1431 (flags & ~REVIDX_KNOWN_FLAGS))
1432 validatehash = True
1432 validatehash = True
1433 # Depending on the operation (read or write), the order might be
1433 # Depending on the operation (read or write), the order might be
1434 # reversed due to non-commutative transforms.
1434 # reversed due to non-commutative transforms.
1435 orderedflags = REVIDX_FLAGS_ORDER
1435 orderedflags = REVIDX_FLAGS_ORDER
1436 if operation == 'write':
1436 if operation == 'write':
1437 orderedflags = reversed(orderedflags)
1437 orderedflags = reversed(orderedflags)
1438
1438
1439 for flag in orderedflags:
1439 for flag in orderedflags:
1440 # If a flagprocessor has been registered for a known flag, apply the
1440 # If a flagprocessor has been registered for a known flag, apply the
1441 # related operation transform and update result tuple.
1441 # related operation transform and update result tuple.
1442 if flag & flags:
1442 if flag & flags:
1443 vhash = True
1443 vhash = True
1444
1444
1445 if flag not in _flagprocessors:
1445 if flag not in _flagprocessors:
1446 message = _("missing processor for flag '%#x'") % (flag)
1446 message = _("missing processor for flag '%#x'") % (flag)
1447 raise RevlogError(message)
1447 raise RevlogError(message)
1448
1448
1449 processor = _flagprocessors[flag]
1449 processor = _flagprocessors[flag]
1450 if processor is not None:
1450 if processor is not None:
1451 readtransform, writetransform, rawtransform = processor
1451 readtransform, writetransform, rawtransform = processor
1452
1452
1453 if raw:
1453 if raw:
1454 vhash = rawtransform(self, text)
1454 vhash = rawtransform(self, text)
1455 elif operation == 'read':
1455 elif operation == 'read':
1456 text, vhash = readtransform(self, text)
1456 text, vhash = readtransform(self, text)
1457 else: # write operation
1457 else: # write operation
1458 text, vhash = writetransform(self, text)
1458 text, vhash = writetransform(self, text)
1459 validatehash = validatehash and vhash
1459 validatehash = validatehash and vhash
1460
1460
1461 return text, validatehash
1461 return text, validatehash
1462
1462
1463 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1463 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1464 """Check node hash integrity.
1464 """Check node hash integrity.
1465
1465
1466 Available as a function so that subclasses can extend hash mismatch
1466 Available as a function so that subclasses can extend hash mismatch
1467 behaviors as needed.
1467 behaviors as needed.
1468 """
1468 """
1469 if p1 is None and p2 is None:
1469 if p1 is None and p2 is None:
1470 p1, p2 = self.parents(node)
1470 p1, p2 = self.parents(node)
1471 if node != self.hash(text, p1, p2):
1471 if node != self.hash(text, p1, p2):
1472 revornode = rev
1472 revornode = rev
1473 if revornode is None:
1473 if revornode is None:
1474 revornode = templatefilters.short(hex(node))
1474 revornode = templatefilters.short(hex(node))
1475 raise RevlogError(_("integrity check failed on %s:%s")
1475 raise RevlogError(_("integrity check failed on %s:%s")
1476 % (self.indexfile, pycompat.bytestr(revornode)))
1476 % (self.indexfile, pycompat.bytestr(revornode)))
1477
1477
1478 def checkinlinesize(self, tr, fp=None):
1478 def checkinlinesize(self, tr, fp=None):
1479 """Check if the revlog is too big for inline and convert if so.
1479 """Check if the revlog is too big for inline and convert if so.
1480
1480
1481 This should be called after revisions are added to the revlog. If the
1481 This should be called after revisions are added to the revlog. If the
1482 revlog has grown too large to be an inline revlog, it will convert it
1482 revlog has grown too large to be an inline revlog, it will convert it
1483 to use multiple index and data files.
1483 to use multiple index and data files.
1484 """
1484 """
1485 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1485 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1486 return
1486 return
1487
1487
1488 trinfo = tr.find(self.indexfile)
1488 trinfo = tr.find(self.indexfile)
1489 if trinfo is None:
1489 if trinfo is None:
1490 raise RevlogError(_("%s not found in the transaction")
1490 raise RevlogError(_("%s not found in the transaction")
1491 % self.indexfile)
1491 % self.indexfile)
1492
1492
1493 trindex = trinfo[2]
1493 trindex = trinfo[2]
1494 if trindex is not None:
1494 if trindex is not None:
1495 dataoff = self.start(trindex)
1495 dataoff = self.start(trindex)
1496 else:
1496 else:
1497 # revlog was stripped at start of transaction, use all leftover data
1497 # revlog was stripped at start of transaction, use all leftover data
1498 trindex = len(self) - 1
1498 trindex = len(self) - 1
1499 dataoff = self.end(-2)
1499 dataoff = self.end(-2)
1500
1500
1501 tr.add(self.datafile, dataoff)
1501 tr.add(self.datafile, dataoff)
1502
1502
1503 if fp:
1503 if fp:
1504 fp.flush()
1504 fp.flush()
1505 fp.close()
1505 fp.close()
1506
1506
1507 df = self.opener(self.datafile, 'w')
1507 df = self.opener(self.datafile, 'w')
1508 try:
1508 try:
1509 for r in self:
1509 for r in self:
1510 df.write(self._getsegmentforrevs(r, r)[1])
1510 df.write(self._getsegmentforrevs(r, r)[1])
1511 finally:
1511 finally:
1512 df.close()
1512 df.close()
1513
1513
1514 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1514 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1515 checkambig=self._checkambig)
1515 checkambig=self._checkambig)
1516 self.version &= ~FLAG_INLINE_DATA
1516 self.version &= ~FLAG_INLINE_DATA
1517 self._inline = False
1517 self._inline = False
1518 for i in self:
1518 for i in self:
1519 e = self._io.packentry(self.index[i], self.node, self.version, i)
1519 e = self._io.packentry(self.index[i], self.node, self.version, i)
1520 fp.write(e)
1520 fp.write(e)
1521
1521
1522 # if we don't call close, the temp file will never replace the
1522 # if we don't call close, the temp file will never replace the
1523 # real index
1523 # real index
1524 fp.close()
1524 fp.close()
1525
1525
1526 tr.replace(self.indexfile, trindex * self._io.size)
1526 tr.replace(self.indexfile, trindex * self._io.size)
1527 self._chunkclear()
1527 self._chunkclear()
1528
1528
1529 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1529 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1530 node=None, flags=REVIDX_DEFAULT_FLAGS):
1530 node=None, flags=REVIDX_DEFAULT_FLAGS):
1531 """add a revision to the log
1531 """add a revision to the log
1532
1532
1533 text - the revision data to add
1533 text - the revision data to add
1534 transaction - the transaction object used for rollback
1534 transaction - the transaction object used for rollback
1535 link - the linkrev data to add
1535 link - the linkrev data to add
1536 p1, p2 - the parent nodeids of the revision
1536 p1, p2 - the parent nodeids of the revision
1537 cachedelta - an optional precomputed delta
1537 cachedelta - an optional precomputed delta
1538 node - nodeid of revision; typically node is not specified, and it is
1538 node - nodeid of revision; typically node is not specified, and it is
1539 computed by default as hash(text, p1, p2), however subclasses might
1539 computed by default as hash(text, p1, p2), however subclasses might
1540 use different hashing method (and override checkhash() in such case)
1540 use different hashing method (and override checkhash() in such case)
1541 flags - the known flags to set on the revision
1541 flags - the known flags to set on the revision
1542 """
1542 """
1543 if link == nullrev:
1543 if link == nullrev:
1544 raise RevlogError(_("attempted to add linkrev -1 to %s")
1544 raise RevlogError(_("attempted to add linkrev -1 to %s")
1545 % self.indexfile)
1545 % self.indexfile)
1546
1546
1547 if flags:
1547 if flags:
1548 node = node or self.hash(text, p1, p2)
1548 node = node or self.hash(text, p1, p2)
1549
1549
1550 rawtext, validatehash = self._processflags(text, flags, 'write')
1550 rawtext, validatehash = self._processflags(text, flags, 'write')
1551
1551
1552 # If the flag processor modifies the revision data, ignore any provided
1552 # If the flag processor modifies the revision data, ignore any provided
1553 # cachedelta.
1553 # cachedelta.
1554 if rawtext != text:
1554 if rawtext != text:
1555 cachedelta = None
1555 cachedelta = None
1556
1556
1557 if len(rawtext) > _maxentrysize:
1557 if len(rawtext) > _maxentrysize:
1558 raise RevlogError(
1558 raise RevlogError(
1559 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1559 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1560 % (self.indexfile, len(rawtext)))
1560 % (self.indexfile, len(rawtext)))
1561
1561
1562 node = node or self.hash(rawtext, p1, p2)
1562 node = node or self.hash(rawtext, p1, p2)
1563 if node in self.nodemap:
1563 if node in self.nodemap:
1564 return node
1564 return node
1565
1565
1566 if validatehash:
1566 if validatehash:
1567 self.checkhash(rawtext, node, p1=p1, p2=p2)
1567 self.checkhash(rawtext, node, p1=p1, p2=p2)
1568
1568
1569 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1569 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1570 flags, cachedelta=cachedelta)
1570 flags, cachedelta=cachedelta)
1571
1571
1572 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1572 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1573 cachedelta=None):
1573 cachedelta=None):
1574 """add a raw revision with known flags, node and parents
1574 """add a raw revision with known flags, node and parents
1575 useful when reusing a revision not stored in this revlog (ex: received
1575 useful when reusing a revision not stored in this revlog (ex: received
1576 over wire, or read from an external bundle).
1576 over wire, or read from an external bundle).
1577 """
1577 """
1578 dfh = None
1578 dfh = None
1579 if not self._inline:
1579 if not self._inline:
1580 dfh = self.opener(self.datafile, "a+")
1580 dfh = self.opener(self.datafile, "a+")
1581 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1581 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1582 try:
1582 try:
1583 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1583 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1584 flags, cachedelta, ifh, dfh)
1584 flags, cachedelta, ifh, dfh)
1585 finally:
1585 finally:
1586 if dfh:
1586 if dfh:
1587 dfh.close()
1587 dfh.close()
1588 ifh.close()
1588 ifh.close()
1589
1589
1590 def compress(self, data):
1590 def compress(self, data):
1591 """Generate a possibly-compressed representation of data."""
1591 """Generate a possibly-compressed representation of data."""
1592 if not data:
1592 if not data:
1593 return '', data
1593 return '', data
1594
1594
1595 compressed = self._compressor.compress(data)
1595 compressed = self._compressor.compress(data)
1596
1596
1597 if compressed:
1597 if compressed:
1598 # The revlog compressor added the header in the returned data.
1598 # The revlog compressor added the header in the returned data.
1599 return '', compressed
1599 return '', compressed
1600
1600
1601 if data[0:1] == '\0':
1601 if data[0:1] == '\0':
1602 return '', data
1602 return '', data
1603 return 'u', data
1603 return 'u', data
1604
1604
1605 def decompress(self, data):
1605 def decompress(self, data):
1606 """Decompress a revlog chunk.
1606 """Decompress a revlog chunk.
1607
1607
1608 The chunk is expected to begin with a header identifying the
1608 The chunk is expected to begin with a header identifying the
1609 format type so it can be routed to an appropriate decompressor.
1609 format type so it can be routed to an appropriate decompressor.
1610 """
1610 """
1611 if not data:
1611 if not data:
1612 return data
1612 return data
1613
1613
1614 # Revlogs are read much more frequently than they are written and many
1614 # Revlogs are read much more frequently than they are written and many
1615 # chunks only take microseconds to decompress, so performance is
1615 # chunks only take microseconds to decompress, so performance is
1616 # important here.
1616 # important here.
1617 #
1617 #
1618 # We can make a few assumptions about revlogs:
1618 # We can make a few assumptions about revlogs:
1619 #
1619 #
1620 # 1) the majority of chunks will be compressed (as opposed to inline
1620 # 1) the majority of chunks will be compressed (as opposed to inline
1621 # raw data).
1621 # raw data).
1622 # 2) decompressing *any* data will likely by at least 10x slower than
1622 # 2) decompressing *any* data will likely by at least 10x slower than
1623 # returning raw inline data.
1623 # returning raw inline data.
1624 # 3) we want to prioritize common and officially supported compression
1624 # 3) we want to prioritize common and officially supported compression
1625 # engines
1625 # engines
1626 #
1626 #
1627 # It follows that we want to optimize for "decompress compressed data
1627 # It follows that we want to optimize for "decompress compressed data
1628 # when encoded with common and officially supported compression engines"
1628 # when encoded with common and officially supported compression engines"
1629 # case over "raw data" and "data encoded by less common or non-official
1629 # case over "raw data" and "data encoded by less common or non-official
1630 # compression engines." That is why we have the inline lookup first
1630 # compression engines." That is why we have the inline lookup first
1631 # followed by the compengines lookup.
1631 # followed by the compengines lookup.
1632 #
1632 #
1633 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1633 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1634 # compressed chunks. And this matters for changelog and manifest reads.
1634 # compressed chunks. And this matters for changelog and manifest reads.
1635 t = data[0:1]
1635 t = data[0:1]
1636
1636
1637 if t == 'x':
1637 if t == 'x':
1638 try:
1638 try:
1639 return _zlibdecompress(data)
1639 return _zlibdecompress(data)
1640 except zlib.error as e:
1640 except zlib.error as e:
1641 raise RevlogError(_('revlog decompress error: %s') % str(e))
1641 raise RevlogError(_('revlog decompress error: %s') % str(e))
1642 # '\0' is more common than 'u' so it goes first.
1642 # '\0' is more common than 'u' so it goes first.
1643 elif t == '\0':
1643 elif t == '\0':
1644 return data
1644 return data
1645 elif t == 'u':
1645 elif t == 'u':
1646 return util.buffer(data, 1)
1646 return util.buffer(data, 1)
1647
1647
1648 try:
1648 try:
1649 compressor = self._decompressors[t]
1649 compressor = self._decompressors[t]
1650 except KeyError:
1650 except KeyError:
1651 try:
1651 try:
1652 engine = util.compengines.forrevlogheader(t)
1652 engine = util.compengines.forrevlogheader(t)
1653 compressor = engine.revlogcompressor()
1653 compressor = engine.revlogcompressor()
1654 self._decompressors[t] = compressor
1654 self._decompressors[t] = compressor
1655 except KeyError:
1655 except KeyError:
1656 raise RevlogError(_('unknown compression type %r') % t)
1656 raise RevlogError(_('unknown compression type %r') % t)
1657
1657
1658 return compressor.decompress(data)
1658 return compressor.decompress(data)
1659
1659
1660 def _isgooddelta(self, d, textlen):
1660 def _isgooddelta(self, d, textlen):
1661 """Returns True if the given delta is good. Good means that it is within
1661 """Returns True if the given delta is good. Good means that it is within
1662 the disk span, disk size, and chain length bounds that we know to be
1662 the disk span, disk size, and chain length bounds that we know to be
1663 performant."""
1663 performant."""
1664 if d is None:
1664 if d is None:
1665 return False
1665 return False
1666
1666
1667 # - 'dist' is the distance from the base revision -- bounding it limits
1667 # - 'dist' is the distance from the base revision -- bounding it limits
1668 # the amount of I/O we need to do.
1668 # the amount of I/O we need to do.
1669 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1669 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1670 # to apply -- bounding it limits the amount of CPU we consume.
1670 # to apply -- bounding it limits the amount of CPU we consume.
1671 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1671 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1672
1672
1673 defaultmax = textlen * 4
1673 defaultmax = textlen * 4
1674 maxdist = self._maxdeltachainspan
1674 maxdist = self._maxdeltachainspan
1675 if not maxdist:
1675 if not maxdist:
1676 maxdist = dist # ensure the conditional pass
1676 maxdist = dist # ensure the conditional pass
1677 maxdist = max(maxdist, defaultmax)
1677 maxdist = max(maxdist, defaultmax)
1678 if (dist > maxdist or l > textlen or
1678 if (dist > maxdist or l > textlen or
1679 compresseddeltalen > textlen * 2 or
1679 compresseddeltalen > textlen * 2 or
1680 (self._maxchainlen and chainlen > self._maxchainlen)):
1680 (self._maxchainlen and chainlen > self._maxchainlen)):
1681 return False
1681 return False
1682
1682
1683 return True
1683 return True
1684
1684
1685 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1685 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1686 cachedelta, ifh, dfh, alwayscache=False):
1686 cachedelta, ifh, dfh, alwayscache=False):
1687 """internal function to add revisions to the log
1687 """internal function to add revisions to the log
1688
1688
1689 see addrevision for argument descriptions.
1689 see addrevision for argument descriptions.
1690
1690
1691 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1691 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1692
1692
1693 invariants:
1693 invariants:
1694 - rawtext is optional (can be None); if not set, cachedelta must be set.
1694 - rawtext is optional (can be None); if not set, cachedelta must be set.
1695 if both are set, they must correspond to each other.
1695 if both are set, they must correspond to each other.
1696 """
1696 """
1697 if node == nullid:
1697 if node == nullid:
1698 raise RevlogError(_("%s: attempt to add null revision") %
1698 raise RevlogError(_("%s: attempt to add null revision") %
1699 (self.indexfile))
1699 (self.indexfile))
1700 if node == wdirid:
1701 raise RevlogError(_("%s: attempt to add wdir revision") %
1702 (self.indexfile))
1703
1700 btext = [rawtext]
1704 btext = [rawtext]
1701 def buildtext():
1705 def buildtext():
1702 if btext[0] is not None:
1706 if btext[0] is not None:
1703 return btext[0]
1707 return btext[0]
1704 baserev = cachedelta[0]
1708 baserev = cachedelta[0]
1705 delta = cachedelta[1]
1709 delta = cachedelta[1]
1706 # special case deltas which replace entire base; no need to decode
1710 # special case deltas which replace entire base; no need to decode
1707 # base revision. this neatly avoids censored bases, which throw when
1711 # base revision. this neatly avoids censored bases, which throw when
1708 # they're decoded.
1712 # they're decoded.
1709 hlen = struct.calcsize(">lll")
1713 hlen = struct.calcsize(">lll")
1710 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1714 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1711 len(delta) - hlen):
1715 len(delta) - hlen):
1712 btext[0] = delta[hlen:]
1716 btext[0] = delta[hlen:]
1713 else:
1717 else:
1714 if self._inline:
1718 if self._inline:
1715 fh = ifh
1719 fh = ifh
1716 else:
1720 else:
1717 fh = dfh
1721 fh = dfh
1718 basetext = self.revision(baserev, _df=fh, raw=True)
1722 basetext = self.revision(baserev, _df=fh, raw=True)
1719 btext[0] = mdiff.patch(basetext, delta)
1723 btext[0] = mdiff.patch(basetext, delta)
1720
1724
1721 try:
1725 try:
1722 res = self._processflags(btext[0], flags, 'read', raw=True)
1726 res = self._processflags(btext[0], flags, 'read', raw=True)
1723 btext[0], validatehash = res
1727 btext[0], validatehash = res
1724 if validatehash:
1728 if validatehash:
1725 self.checkhash(btext[0], node, p1=p1, p2=p2)
1729 self.checkhash(btext[0], node, p1=p1, p2=p2)
1726 if flags & REVIDX_ISCENSORED:
1730 if flags & REVIDX_ISCENSORED:
1727 raise RevlogError(_('node %s is not censored') % node)
1731 raise RevlogError(_('node %s is not censored') % node)
1728 except CensoredNodeError:
1732 except CensoredNodeError:
1729 # must pass the censored index flag to add censored revisions
1733 # must pass the censored index flag to add censored revisions
1730 if not flags & REVIDX_ISCENSORED:
1734 if not flags & REVIDX_ISCENSORED:
1731 raise
1735 raise
1732 return btext[0]
1736 return btext[0]
1733
1737
1734 def builddelta(rev):
1738 def builddelta(rev):
1735 # can we use the cached delta?
1739 # can we use the cached delta?
1736 if cachedelta and cachedelta[0] == rev:
1740 if cachedelta and cachedelta[0] == rev:
1737 delta = cachedelta[1]
1741 delta = cachedelta[1]
1738 else:
1742 else:
1739 t = buildtext()
1743 t = buildtext()
1740 if self.iscensored(rev):
1744 if self.iscensored(rev):
1741 # deltas based on a censored revision must replace the
1745 # deltas based on a censored revision must replace the
1742 # full content in one patch, so delta works everywhere
1746 # full content in one patch, so delta works everywhere
1743 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1747 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1744 delta = header + t
1748 delta = header + t
1745 else:
1749 else:
1746 if self._inline:
1750 if self._inline:
1747 fh = ifh
1751 fh = ifh
1748 else:
1752 else:
1749 fh = dfh
1753 fh = dfh
1750 ptext = self.revision(rev, _df=fh, raw=True)
1754 ptext = self.revision(rev, _df=fh, raw=True)
1751 delta = mdiff.textdiff(ptext, t)
1755 delta = mdiff.textdiff(ptext, t)
1752 header, data = self.compress(delta)
1756 header, data = self.compress(delta)
1753 deltalen = len(header) + len(data)
1757 deltalen = len(header) + len(data)
1754 chainbase = self.chainbase(rev)
1758 chainbase = self.chainbase(rev)
1755 dist = deltalen + offset - self.start(chainbase)
1759 dist = deltalen + offset - self.start(chainbase)
1756 if self._generaldelta:
1760 if self._generaldelta:
1757 base = rev
1761 base = rev
1758 else:
1762 else:
1759 base = chainbase
1763 base = chainbase
1760 chainlen, compresseddeltalen = self._chaininfo(rev)
1764 chainlen, compresseddeltalen = self._chaininfo(rev)
1761 chainlen += 1
1765 chainlen += 1
1762 compresseddeltalen += deltalen
1766 compresseddeltalen += deltalen
1763 return (dist, deltalen, (header, data), base,
1767 return (dist, deltalen, (header, data), base,
1764 chainbase, chainlen, compresseddeltalen)
1768 chainbase, chainlen, compresseddeltalen)
1765
1769
1766 curr = len(self)
1770 curr = len(self)
1767 prev = curr - 1
1771 prev = curr - 1
1768 offset = self.end(prev)
1772 offset = self.end(prev)
1769 delta = None
1773 delta = None
1770 p1r, p2r = self.rev(p1), self.rev(p2)
1774 p1r, p2r = self.rev(p1), self.rev(p2)
1771
1775
1772 # full versions are inserted when the needed deltas
1776 # full versions are inserted when the needed deltas
1773 # become comparable to the uncompressed text
1777 # become comparable to the uncompressed text
1774 if rawtext is None:
1778 if rawtext is None:
1775 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1779 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1776 cachedelta[1])
1780 cachedelta[1])
1777 else:
1781 else:
1778 textlen = len(rawtext)
1782 textlen = len(rawtext)
1779
1783
1780 # should we try to build a delta?
1784 # should we try to build a delta?
1781 if prev != nullrev and self.storedeltachains:
1785 if prev != nullrev and self.storedeltachains:
1782 tested = set()
1786 tested = set()
1783 # This condition is true most of the time when processing
1787 # This condition is true most of the time when processing
1784 # changegroup data into a generaldelta repo. The only time it
1788 # changegroup data into a generaldelta repo. The only time it
1785 # isn't true is if this is the first revision in a delta chain
1789 # isn't true is if this is the first revision in a delta chain
1786 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1790 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1787 if cachedelta and self._generaldelta and self._lazydeltabase:
1791 if cachedelta and self._generaldelta and self._lazydeltabase:
1788 # Assume what we received from the server is a good choice
1792 # Assume what we received from the server is a good choice
1789 # build delta will reuse the cache
1793 # build delta will reuse the cache
1790 candidatedelta = builddelta(cachedelta[0])
1794 candidatedelta = builddelta(cachedelta[0])
1791 tested.add(cachedelta[0])
1795 tested.add(cachedelta[0])
1792 if self._isgooddelta(candidatedelta, textlen):
1796 if self._isgooddelta(candidatedelta, textlen):
1793 delta = candidatedelta
1797 delta = candidatedelta
1794 if delta is None and self._generaldelta:
1798 if delta is None and self._generaldelta:
1795 # exclude already lazy tested base if any
1799 # exclude already lazy tested base if any
1796 parents = [p for p in (p1r, p2r)
1800 parents = [p for p in (p1r, p2r)
1797 if p != nullrev and p not in tested]
1801 if p != nullrev and p not in tested]
1798 if parents and not self._aggressivemergedeltas:
1802 if parents and not self._aggressivemergedeltas:
1799 # Pick whichever parent is closer to us (to minimize the
1803 # Pick whichever parent is closer to us (to minimize the
1800 # chance of having to build a fulltext).
1804 # chance of having to build a fulltext).
1801 parents = [max(parents)]
1805 parents = [max(parents)]
1802 tested.update(parents)
1806 tested.update(parents)
1803 pdeltas = []
1807 pdeltas = []
1804 for p in parents:
1808 for p in parents:
1805 pd = builddelta(p)
1809 pd = builddelta(p)
1806 if self._isgooddelta(pd, textlen):
1810 if self._isgooddelta(pd, textlen):
1807 pdeltas.append(pd)
1811 pdeltas.append(pd)
1808 if pdeltas:
1812 if pdeltas:
1809 delta = min(pdeltas, key=lambda x: x[1])
1813 delta = min(pdeltas, key=lambda x: x[1])
1810 if delta is None and prev not in tested:
1814 if delta is None and prev not in tested:
1811 # other approach failed try against prev to hopefully save us a
1815 # other approach failed try against prev to hopefully save us a
1812 # fulltext.
1816 # fulltext.
1813 candidatedelta = builddelta(prev)
1817 candidatedelta = builddelta(prev)
1814 if self._isgooddelta(candidatedelta, textlen):
1818 if self._isgooddelta(candidatedelta, textlen):
1815 delta = candidatedelta
1819 delta = candidatedelta
1816 if delta is not None:
1820 if delta is not None:
1817 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1821 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1818 else:
1822 else:
1819 rawtext = buildtext()
1823 rawtext = buildtext()
1820 data = self.compress(rawtext)
1824 data = self.compress(rawtext)
1821 l = len(data[1]) + len(data[0])
1825 l = len(data[1]) + len(data[0])
1822 base = chainbase = curr
1826 base = chainbase = curr
1823
1827
1824 e = (offset_type(offset, flags), l, textlen,
1828 e = (offset_type(offset, flags), l, textlen,
1825 base, link, p1r, p2r, node)
1829 base, link, p1r, p2r, node)
1826 self.index.insert(-1, e)
1830 self.index.insert(-1, e)
1827 self.nodemap[node] = curr
1831 self.nodemap[node] = curr
1828
1832
1829 entry = self._io.packentry(e, self.node, self.version, curr)
1833 entry = self._io.packentry(e, self.node, self.version, curr)
1830 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1834 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1831
1835
1832 if alwayscache and rawtext is None:
1836 if alwayscache and rawtext is None:
1833 rawtext = buildtext()
1837 rawtext = buildtext()
1834
1838
1835 if type(rawtext) == str: # only accept immutable objects
1839 if type(rawtext) == str: # only accept immutable objects
1836 self._cache = (node, curr, rawtext)
1840 self._cache = (node, curr, rawtext)
1837 self._chainbasecache[curr] = chainbase
1841 self._chainbasecache[curr] = chainbase
1838 return node
1842 return node
1839
1843
1840 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1844 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1841 # Files opened in a+ mode have inconsistent behavior on various
1845 # Files opened in a+ mode have inconsistent behavior on various
1842 # platforms. Windows requires that a file positioning call be made
1846 # platforms. Windows requires that a file positioning call be made
1843 # when the file handle transitions between reads and writes. See
1847 # when the file handle transitions between reads and writes. See
1844 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1848 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1845 # platforms, Python or the platform itself can be buggy. Some versions
1849 # platforms, Python or the platform itself can be buggy. Some versions
1846 # of Solaris have been observed to not append at the end of the file
1850 # of Solaris have been observed to not append at the end of the file
1847 # if the file was seeked to before the end. See issue4943 for more.
1851 # if the file was seeked to before the end. See issue4943 for more.
1848 #
1852 #
1849 # We work around this issue by inserting a seek() before writing.
1853 # We work around this issue by inserting a seek() before writing.
1850 # Note: This is likely not necessary on Python 3.
1854 # Note: This is likely not necessary on Python 3.
1851 ifh.seek(0, os.SEEK_END)
1855 ifh.seek(0, os.SEEK_END)
1852 if dfh:
1856 if dfh:
1853 dfh.seek(0, os.SEEK_END)
1857 dfh.seek(0, os.SEEK_END)
1854
1858
1855 curr = len(self) - 1
1859 curr = len(self) - 1
1856 if not self._inline:
1860 if not self._inline:
1857 transaction.add(self.datafile, offset)
1861 transaction.add(self.datafile, offset)
1858 transaction.add(self.indexfile, curr * len(entry))
1862 transaction.add(self.indexfile, curr * len(entry))
1859 if data[0]:
1863 if data[0]:
1860 dfh.write(data[0])
1864 dfh.write(data[0])
1861 dfh.write(data[1])
1865 dfh.write(data[1])
1862 ifh.write(entry)
1866 ifh.write(entry)
1863 else:
1867 else:
1864 offset += curr * self._io.size
1868 offset += curr * self._io.size
1865 transaction.add(self.indexfile, offset, curr)
1869 transaction.add(self.indexfile, offset, curr)
1866 ifh.write(entry)
1870 ifh.write(entry)
1867 ifh.write(data[0])
1871 ifh.write(data[0])
1868 ifh.write(data[1])
1872 ifh.write(data[1])
1869 self.checkinlinesize(transaction, ifh)
1873 self.checkinlinesize(transaction, ifh)
1870
1874
1871 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1875 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1872 """
1876 """
1873 add a delta group
1877 add a delta group
1874
1878
1875 given a set of deltas, add them to the revision log. the
1879 given a set of deltas, add them to the revision log. the
1876 first delta is against its parent, which should be in our
1880 first delta is against its parent, which should be in our
1877 log, the rest are against the previous delta.
1881 log, the rest are against the previous delta.
1878
1882
1879 If ``addrevisioncb`` is defined, it will be called with arguments of
1883 If ``addrevisioncb`` is defined, it will be called with arguments of
1880 this revlog and the node that was added.
1884 this revlog and the node that was added.
1881 """
1885 """
1882
1886
1883 nodes = []
1887 nodes = []
1884
1888
1885 r = len(self)
1889 r = len(self)
1886 end = 0
1890 end = 0
1887 if r:
1891 if r:
1888 end = self.end(r - 1)
1892 end = self.end(r - 1)
1889 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1893 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1890 isize = r * self._io.size
1894 isize = r * self._io.size
1891 if self._inline:
1895 if self._inline:
1892 transaction.add(self.indexfile, end + isize, r)
1896 transaction.add(self.indexfile, end + isize, r)
1893 dfh = None
1897 dfh = None
1894 else:
1898 else:
1895 transaction.add(self.indexfile, isize, r)
1899 transaction.add(self.indexfile, isize, r)
1896 transaction.add(self.datafile, end)
1900 transaction.add(self.datafile, end)
1897 dfh = self.opener(self.datafile, "a+")
1901 dfh = self.opener(self.datafile, "a+")
1898 def flush():
1902 def flush():
1899 if dfh:
1903 if dfh:
1900 dfh.flush()
1904 dfh.flush()
1901 ifh.flush()
1905 ifh.flush()
1902 try:
1906 try:
1903 # loop through our set of deltas
1907 # loop through our set of deltas
1904 chain = None
1908 chain = None
1905 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1909 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1906 node = chunkdata['node']
1910 node = chunkdata['node']
1907 p1 = chunkdata['p1']
1911 p1 = chunkdata['p1']
1908 p2 = chunkdata['p2']
1912 p2 = chunkdata['p2']
1909 cs = chunkdata['cs']
1913 cs = chunkdata['cs']
1910 deltabase = chunkdata['deltabase']
1914 deltabase = chunkdata['deltabase']
1911 delta = chunkdata['delta']
1915 delta = chunkdata['delta']
1912 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1916 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1913
1917
1914 nodes.append(node)
1918 nodes.append(node)
1915
1919
1916 link = linkmapper(cs)
1920 link = linkmapper(cs)
1917 if node in self.nodemap:
1921 if node in self.nodemap:
1918 # this can happen if two branches make the same change
1922 # this can happen if two branches make the same change
1919 chain = node
1923 chain = node
1920 continue
1924 continue
1921
1925
1922 for p in (p1, p2):
1926 for p in (p1, p2):
1923 if p not in self.nodemap:
1927 if p not in self.nodemap:
1924 raise LookupError(p, self.indexfile,
1928 raise LookupError(p, self.indexfile,
1925 _('unknown parent'))
1929 _('unknown parent'))
1926
1930
1927 if deltabase not in self.nodemap:
1931 if deltabase not in self.nodemap:
1928 raise LookupError(deltabase, self.indexfile,
1932 raise LookupError(deltabase, self.indexfile,
1929 _('unknown delta base'))
1933 _('unknown delta base'))
1930
1934
1931 baserev = self.rev(deltabase)
1935 baserev = self.rev(deltabase)
1932
1936
1933 if baserev != nullrev and self.iscensored(baserev):
1937 if baserev != nullrev and self.iscensored(baserev):
1934 # if base is censored, delta must be full replacement in a
1938 # if base is censored, delta must be full replacement in a
1935 # single patch operation
1939 # single patch operation
1936 hlen = struct.calcsize(">lll")
1940 hlen = struct.calcsize(">lll")
1937 oldlen = self.rawsize(baserev)
1941 oldlen = self.rawsize(baserev)
1938 newlen = len(delta) - hlen
1942 newlen = len(delta) - hlen
1939 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1943 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1940 raise error.CensoredBaseError(self.indexfile,
1944 raise error.CensoredBaseError(self.indexfile,
1941 self.node(baserev))
1945 self.node(baserev))
1942
1946
1943 if not flags and self._peek_iscensored(baserev, delta, flush):
1947 if not flags and self._peek_iscensored(baserev, delta, flush):
1944 flags |= REVIDX_ISCENSORED
1948 flags |= REVIDX_ISCENSORED
1945
1949
1946 # We assume consumers of addrevisioncb will want to retrieve
1950 # We assume consumers of addrevisioncb will want to retrieve
1947 # the added revision, which will require a call to
1951 # the added revision, which will require a call to
1948 # revision(). revision() will fast path if there is a cache
1952 # revision(). revision() will fast path if there is a cache
1949 # hit. So, we tell _addrevision() to always cache in this case.
1953 # hit. So, we tell _addrevision() to always cache in this case.
1950 # We're only using addgroup() in the context of changegroup
1954 # We're only using addgroup() in the context of changegroup
1951 # generation so the revision data can always be handled as raw
1955 # generation so the revision data can always be handled as raw
1952 # by the flagprocessor.
1956 # by the flagprocessor.
1953 chain = self._addrevision(node, None, transaction, link,
1957 chain = self._addrevision(node, None, transaction, link,
1954 p1, p2, flags, (baserev, delta),
1958 p1, p2, flags, (baserev, delta),
1955 ifh, dfh,
1959 ifh, dfh,
1956 alwayscache=bool(addrevisioncb))
1960 alwayscache=bool(addrevisioncb))
1957
1961
1958 if addrevisioncb:
1962 if addrevisioncb:
1959 addrevisioncb(self, chain)
1963 addrevisioncb(self, chain)
1960
1964
1961 if not dfh and not self._inline:
1965 if not dfh and not self._inline:
1962 # addrevision switched from inline to conventional
1966 # addrevision switched from inline to conventional
1963 # reopen the index
1967 # reopen the index
1964 ifh.close()
1968 ifh.close()
1965 dfh = self.opener(self.datafile, "a+")
1969 dfh = self.opener(self.datafile, "a+")
1966 ifh = self.opener(self.indexfile, "a+",
1970 ifh = self.opener(self.indexfile, "a+",
1967 checkambig=self._checkambig)
1971 checkambig=self._checkambig)
1968 finally:
1972 finally:
1969 if dfh:
1973 if dfh:
1970 dfh.close()
1974 dfh.close()
1971 ifh.close()
1975 ifh.close()
1972
1976
1973 return nodes
1977 return nodes
1974
1978
1975 def iscensored(self, rev):
1979 def iscensored(self, rev):
1976 """Check if a file revision is censored."""
1980 """Check if a file revision is censored."""
1977 return False
1981 return False
1978
1982
1979 def _peek_iscensored(self, baserev, delta, flush):
1983 def _peek_iscensored(self, baserev, delta, flush):
1980 """Quickly check if a delta produces a censored revision."""
1984 """Quickly check if a delta produces a censored revision."""
1981 return False
1985 return False
1982
1986
1983 def getstrippoint(self, minlink):
1987 def getstrippoint(self, minlink):
1984 """find the minimum rev that must be stripped to strip the linkrev
1988 """find the minimum rev that must be stripped to strip the linkrev
1985
1989
1986 Returns a tuple containing the minimum rev and a set of all revs that
1990 Returns a tuple containing the minimum rev and a set of all revs that
1987 have linkrevs that will be broken by this strip.
1991 have linkrevs that will be broken by this strip.
1988 """
1992 """
1989 brokenrevs = set()
1993 brokenrevs = set()
1990 strippoint = len(self)
1994 strippoint = len(self)
1991
1995
1992 heads = {}
1996 heads = {}
1993 futurelargelinkrevs = set()
1997 futurelargelinkrevs = set()
1994 for head in self.headrevs():
1998 for head in self.headrevs():
1995 headlinkrev = self.linkrev(head)
1999 headlinkrev = self.linkrev(head)
1996 heads[head] = headlinkrev
2000 heads[head] = headlinkrev
1997 if headlinkrev >= minlink:
2001 if headlinkrev >= minlink:
1998 futurelargelinkrevs.add(headlinkrev)
2002 futurelargelinkrevs.add(headlinkrev)
1999
2003
2000 # This algorithm involves walking down the rev graph, starting at the
2004 # This algorithm involves walking down the rev graph, starting at the
2001 # heads. Since the revs are topologically sorted according to linkrev,
2005 # heads. Since the revs are topologically sorted according to linkrev,
2002 # once all head linkrevs are below the minlink, we know there are
2006 # once all head linkrevs are below the minlink, we know there are
2003 # no more revs that could have a linkrev greater than minlink.
2007 # no more revs that could have a linkrev greater than minlink.
2004 # So we can stop walking.
2008 # So we can stop walking.
2005 while futurelargelinkrevs:
2009 while futurelargelinkrevs:
2006 strippoint -= 1
2010 strippoint -= 1
2007 linkrev = heads.pop(strippoint)
2011 linkrev = heads.pop(strippoint)
2008
2012
2009 if linkrev < minlink:
2013 if linkrev < minlink:
2010 brokenrevs.add(strippoint)
2014 brokenrevs.add(strippoint)
2011 else:
2015 else:
2012 futurelargelinkrevs.remove(linkrev)
2016 futurelargelinkrevs.remove(linkrev)
2013
2017
2014 for p in self.parentrevs(strippoint):
2018 for p in self.parentrevs(strippoint):
2015 if p != nullrev:
2019 if p != nullrev:
2016 plinkrev = self.linkrev(p)
2020 plinkrev = self.linkrev(p)
2017 heads[p] = plinkrev
2021 heads[p] = plinkrev
2018 if plinkrev >= minlink:
2022 if plinkrev >= minlink:
2019 futurelargelinkrevs.add(plinkrev)
2023 futurelargelinkrevs.add(plinkrev)
2020
2024
2021 return strippoint, brokenrevs
2025 return strippoint, brokenrevs
2022
2026
2023 def strip(self, minlink, transaction):
2027 def strip(self, minlink, transaction):
2024 """truncate the revlog on the first revision with a linkrev >= minlink
2028 """truncate the revlog on the first revision with a linkrev >= minlink
2025
2029
2026 This function is called when we're stripping revision minlink and
2030 This function is called when we're stripping revision minlink and
2027 its descendants from the repository.
2031 its descendants from the repository.
2028
2032
2029 We have to remove all revisions with linkrev >= minlink, because
2033 We have to remove all revisions with linkrev >= minlink, because
2030 the equivalent changelog revisions will be renumbered after the
2034 the equivalent changelog revisions will be renumbered after the
2031 strip.
2035 strip.
2032
2036
2033 So we truncate the revlog on the first of these revisions, and
2037 So we truncate the revlog on the first of these revisions, and
2034 trust that the caller has saved the revisions that shouldn't be
2038 trust that the caller has saved the revisions that shouldn't be
2035 removed and that it'll re-add them after this truncation.
2039 removed and that it'll re-add them after this truncation.
2036 """
2040 """
2037 if len(self) == 0:
2041 if len(self) == 0:
2038 return
2042 return
2039
2043
2040 rev, _ = self.getstrippoint(minlink)
2044 rev, _ = self.getstrippoint(minlink)
2041 if rev == len(self):
2045 if rev == len(self):
2042 return
2046 return
2043
2047
2044 # first truncate the files on disk
2048 # first truncate the files on disk
2045 end = self.start(rev)
2049 end = self.start(rev)
2046 if not self._inline:
2050 if not self._inline:
2047 transaction.add(self.datafile, end)
2051 transaction.add(self.datafile, end)
2048 end = rev * self._io.size
2052 end = rev * self._io.size
2049 else:
2053 else:
2050 end += rev * self._io.size
2054 end += rev * self._io.size
2051
2055
2052 transaction.add(self.indexfile, end)
2056 transaction.add(self.indexfile, end)
2053
2057
2054 # then reset internal state in memory to forget those revisions
2058 # then reset internal state in memory to forget those revisions
2055 self._cache = None
2059 self._cache = None
2056 self._chaininfocache = {}
2060 self._chaininfocache = {}
2057 self._chunkclear()
2061 self._chunkclear()
2058 for x in xrange(rev, len(self)):
2062 for x in xrange(rev, len(self)):
2059 del self.nodemap[self.node(x)]
2063 del self.nodemap[self.node(x)]
2060
2064
2061 del self.index[rev:-1]
2065 del self.index[rev:-1]
2062
2066
2063 def checksize(self):
2067 def checksize(self):
2064 expected = 0
2068 expected = 0
2065 if len(self):
2069 if len(self):
2066 expected = max(0, self.end(len(self) - 1))
2070 expected = max(0, self.end(len(self) - 1))
2067
2071
2068 try:
2072 try:
2069 f = self.opener(self.datafile)
2073 f = self.opener(self.datafile)
2070 f.seek(0, 2)
2074 f.seek(0, 2)
2071 actual = f.tell()
2075 actual = f.tell()
2072 f.close()
2076 f.close()
2073 dd = actual - expected
2077 dd = actual - expected
2074 except IOError as inst:
2078 except IOError as inst:
2075 if inst.errno != errno.ENOENT:
2079 if inst.errno != errno.ENOENT:
2076 raise
2080 raise
2077 dd = 0
2081 dd = 0
2078
2082
2079 try:
2083 try:
2080 f = self.opener(self.indexfile)
2084 f = self.opener(self.indexfile)
2081 f.seek(0, 2)
2085 f.seek(0, 2)
2082 actual = f.tell()
2086 actual = f.tell()
2083 f.close()
2087 f.close()
2084 s = self._io.size
2088 s = self._io.size
2085 i = max(0, actual // s)
2089 i = max(0, actual // s)
2086 di = actual - (i * s)
2090 di = actual - (i * s)
2087 if self._inline:
2091 if self._inline:
2088 databytes = 0
2092 databytes = 0
2089 for r in self:
2093 for r in self:
2090 databytes += max(0, self.length(r))
2094 databytes += max(0, self.length(r))
2091 dd = 0
2095 dd = 0
2092 di = actual - len(self) * s - databytes
2096 di = actual - len(self) * s - databytes
2093 except IOError as inst:
2097 except IOError as inst:
2094 if inst.errno != errno.ENOENT:
2098 if inst.errno != errno.ENOENT:
2095 raise
2099 raise
2096 di = 0
2100 di = 0
2097
2101
2098 return (dd, di)
2102 return (dd, di)
2099
2103
2100 def files(self):
2104 def files(self):
2101 res = [self.indexfile]
2105 res = [self.indexfile]
2102 if not self._inline:
2106 if not self._inline:
2103 res.append(self.datafile)
2107 res.append(self.datafile)
2104 return res
2108 return res
2105
2109
2106 DELTAREUSEALWAYS = 'always'
2110 DELTAREUSEALWAYS = 'always'
2107 DELTAREUSESAMEREVS = 'samerevs'
2111 DELTAREUSESAMEREVS = 'samerevs'
2108 DELTAREUSENEVER = 'never'
2112 DELTAREUSENEVER = 'never'
2109
2113
2110 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2114 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2111
2115
2112 def clone(self, tr, destrevlog, addrevisioncb=None,
2116 def clone(self, tr, destrevlog, addrevisioncb=None,
2113 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2117 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2114 """Copy this revlog to another, possibly with format changes.
2118 """Copy this revlog to another, possibly with format changes.
2115
2119
2116 The destination revlog will contain the same revisions and nodes.
2120 The destination revlog will contain the same revisions and nodes.
2117 However, it may not be bit-for-bit identical due to e.g. delta encoding
2121 However, it may not be bit-for-bit identical due to e.g. delta encoding
2118 differences.
2122 differences.
2119
2123
2120 The ``deltareuse`` argument control how deltas from the existing revlog
2124 The ``deltareuse`` argument control how deltas from the existing revlog
2121 are preserved in the destination revlog. The argument can have the
2125 are preserved in the destination revlog. The argument can have the
2122 following values:
2126 following values:
2123
2127
2124 DELTAREUSEALWAYS
2128 DELTAREUSEALWAYS
2125 Deltas will always be reused (if possible), even if the destination
2129 Deltas will always be reused (if possible), even if the destination
2126 revlog would not select the same revisions for the delta. This is the
2130 revlog would not select the same revisions for the delta. This is the
2127 fastest mode of operation.
2131 fastest mode of operation.
2128 DELTAREUSESAMEREVS
2132 DELTAREUSESAMEREVS
2129 Deltas will be reused if the destination revlog would pick the same
2133 Deltas will be reused if the destination revlog would pick the same
2130 revisions for the delta. This mode strikes a balance between speed
2134 revisions for the delta. This mode strikes a balance between speed
2131 and optimization.
2135 and optimization.
2132 DELTAREUSENEVER
2136 DELTAREUSENEVER
2133 Deltas will never be reused. This is the slowest mode of execution.
2137 Deltas will never be reused. This is the slowest mode of execution.
2134 This mode can be used to recompute deltas (e.g. if the diff/delta
2138 This mode can be used to recompute deltas (e.g. if the diff/delta
2135 algorithm changes).
2139 algorithm changes).
2136
2140
2137 Delta computation can be slow, so the choice of delta reuse policy can
2141 Delta computation can be slow, so the choice of delta reuse policy can
2138 significantly affect run time.
2142 significantly affect run time.
2139
2143
2140 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2144 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2141 two extremes. Deltas will be reused if they are appropriate. But if the
2145 two extremes. Deltas will be reused if they are appropriate. But if the
2142 delta could choose a better revision, it will do so. This means if you
2146 delta could choose a better revision, it will do so. This means if you
2143 are converting a non-generaldelta revlog to a generaldelta revlog,
2147 are converting a non-generaldelta revlog to a generaldelta revlog,
2144 deltas will be recomputed if the delta's parent isn't a parent of the
2148 deltas will be recomputed if the delta's parent isn't a parent of the
2145 revision.
2149 revision.
2146
2150
2147 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2151 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2148 controls whether to compute deltas against both parents for merges.
2152 controls whether to compute deltas against both parents for merges.
2149 By default, the current default is used.
2153 By default, the current default is used.
2150 """
2154 """
2151 if deltareuse not in self.DELTAREUSEALL:
2155 if deltareuse not in self.DELTAREUSEALL:
2152 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2156 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2153
2157
2154 if len(destrevlog):
2158 if len(destrevlog):
2155 raise ValueError(_('destination revlog is not empty'))
2159 raise ValueError(_('destination revlog is not empty'))
2156
2160
2157 if getattr(self, 'filteredrevs', None):
2161 if getattr(self, 'filteredrevs', None):
2158 raise ValueError(_('source revlog has filtered revisions'))
2162 raise ValueError(_('source revlog has filtered revisions'))
2159 if getattr(destrevlog, 'filteredrevs', None):
2163 if getattr(destrevlog, 'filteredrevs', None):
2160 raise ValueError(_('destination revlog has filtered revisions'))
2164 raise ValueError(_('destination revlog has filtered revisions'))
2161
2165
2162 # lazydeltabase controls whether to reuse a cached delta, if possible.
2166 # lazydeltabase controls whether to reuse a cached delta, if possible.
2163 oldlazydeltabase = destrevlog._lazydeltabase
2167 oldlazydeltabase = destrevlog._lazydeltabase
2164 oldamd = destrevlog._aggressivemergedeltas
2168 oldamd = destrevlog._aggressivemergedeltas
2165
2169
2166 try:
2170 try:
2167 if deltareuse == self.DELTAREUSEALWAYS:
2171 if deltareuse == self.DELTAREUSEALWAYS:
2168 destrevlog._lazydeltabase = True
2172 destrevlog._lazydeltabase = True
2169 elif deltareuse == self.DELTAREUSESAMEREVS:
2173 elif deltareuse == self.DELTAREUSESAMEREVS:
2170 destrevlog._lazydeltabase = False
2174 destrevlog._lazydeltabase = False
2171
2175
2172 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2176 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2173
2177
2174 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2178 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2175 self.DELTAREUSESAMEREVS)
2179 self.DELTAREUSESAMEREVS)
2176
2180
2177 index = self.index
2181 index = self.index
2178 for rev in self:
2182 for rev in self:
2179 entry = index[rev]
2183 entry = index[rev]
2180
2184
2181 # Some classes override linkrev to take filtered revs into
2185 # Some classes override linkrev to take filtered revs into
2182 # account. Use raw entry from index.
2186 # account. Use raw entry from index.
2183 flags = entry[0] & 0xffff
2187 flags = entry[0] & 0xffff
2184 linkrev = entry[4]
2188 linkrev = entry[4]
2185 p1 = index[entry[5]][7]
2189 p1 = index[entry[5]][7]
2186 p2 = index[entry[6]][7]
2190 p2 = index[entry[6]][7]
2187 node = entry[7]
2191 node = entry[7]
2188
2192
2189 # (Possibly) reuse the delta from the revlog if allowed and
2193 # (Possibly) reuse the delta from the revlog if allowed and
2190 # the revlog chunk is a delta.
2194 # the revlog chunk is a delta.
2191 cachedelta = None
2195 cachedelta = None
2192 rawtext = None
2196 rawtext = None
2193 if populatecachedelta:
2197 if populatecachedelta:
2194 dp = self.deltaparent(rev)
2198 dp = self.deltaparent(rev)
2195 if dp != nullrev:
2199 if dp != nullrev:
2196 cachedelta = (dp, str(self._chunk(rev)))
2200 cachedelta = (dp, str(self._chunk(rev)))
2197
2201
2198 if not cachedelta:
2202 if not cachedelta:
2199 rawtext = self.revision(rev, raw=True)
2203 rawtext = self.revision(rev, raw=True)
2200
2204
2201 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2205 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2202 checkambig=False)
2206 checkambig=False)
2203 dfh = None
2207 dfh = None
2204 if not destrevlog._inline:
2208 if not destrevlog._inline:
2205 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2209 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2206 try:
2210 try:
2207 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2211 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2208 flags, cachedelta, ifh, dfh)
2212 flags, cachedelta, ifh, dfh)
2209 finally:
2213 finally:
2210 if dfh:
2214 if dfh:
2211 dfh.close()
2215 dfh.close()
2212 ifh.close()
2216 ifh.close()
2213
2217
2214 if addrevisioncb:
2218 if addrevisioncb:
2215 addrevisioncb(self, rev, node)
2219 addrevisioncb(self, rev, node)
2216 finally:
2220 finally:
2217 destrevlog._lazydeltabase = oldlazydeltabase
2221 destrevlog._lazydeltabase = oldlazydeltabase
2218 destrevlog._aggressivemergedeltas = oldamd
2222 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now