##// END OF EJS Templates
changelog: make sure datafile is 00changelog.d (API)...
Jun Wu -
r32307:3caec778 default
parent child Browse files
Show More
@@ -1,541 +1,543 b''
1 # changelog.py - changelog class for mercurial
1 # changelog.py - changelog class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import (
13 from .node import (
14 bin,
14 bin,
15 hex,
15 hex,
16 nullid,
16 nullid,
17 )
17 )
18
18
19 from . import (
19 from . import (
20 encoding,
20 encoding,
21 error,
21 error,
22 revlog,
22 revlog,
23 util,
23 util,
24 )
24 )
25
25
26 _defaultextra = {'branch': 'default'}
26 _defaultextra = {'branch': 'default'}
27
27
28 def _string_escape(text):
28 def _string_escape(text):
29 """
29 """
30 >>> d = {'nl': chr(10), 'bs': chr(92), 'cr': chr(13), 'nul': chr(0)}
30 >>> d = {'nl': chr(10), 'bs': chr(92), 'cr': chr(13), 'nul': chr(0)}
31 >>> s = "ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
31 >>> s = "ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
32 >>> s
32 >>> s
33 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
33 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
34 >>> res = _string_escape(s)
34 >>> res = _string_escape(s)
35 >>> s == util.unescapestr(res)
35 >>> s == util.unescapestr(res)
36 True
36 True
37 """
37 """
38 # subset of the string_escape codec
38 # subset of the string_escape codec
39 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
39 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
40 return text.replace('\0', '\\0')
40 return text.replace('\0', '\\0')
41
41
42 def decodeextra(text):
42 def decodeextra(text):
43 """
43 """
44 >>> sorted(decodeextra(encodeextra({'foo': 'bar', 'baz': chr(0) + '2'})
44 >>> sorted(decodeextra(encodeextra({'foo': 'bar', 'baz': chr(0) + '2'})
45 ... ).iteritems())
45 ... ).iteritems())
46 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
46 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
47 >>> sorted(decodeextra(encodeextra({'foo': 'bar',
47 >>> sorted(decodeextra(encodeextra({'foo': 'bar',
48 ... 'baz': chr(92) + chr(0) + '2'})
48 ... 'baz': chr(92) + chr(0) + '2'})
49 ... ).iteritems())
49 ... ).iteritems())
50 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
50 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
51 """
51 """
52 extra = _defaultextra.copy()
52 extra = _defaultextra.copy()
53 for l in text.split('\0'):
53 for l in text.split('\0'):
54 if l:
54 if l:
55 if '\\0' in l:
55 if '\\0' in l:
56 # fix up \0 without getting into trouble with \\0
56 # fix up \0 without getting into trouble with \\0
57 l = l.replace('\\\\', '\\\\\n')
57 l = l.replace('\\\\', '\\\\\n')
58 l = l.replace('\\0', '\0')
58 l = l.replace('\\0', '\0')
59 l = l.replace('\n', '')
59 l = l.replace('\n', '')
60 k, v = util.unescapestr(l).split(':', 1)
60 k, v = util.unescapestr(l).split(':', 1)
61 extra[k] = v
61 extra[k] = v
62 return extra
62 return extra
63
63
64 def encodeextra(d):
64 def encodeextra(d):
65 # keys must be sorted to produce a deterministic changelog entry
65 # keys must be sorted to produce a deterministic changelog entry
66 items = [_string_escape('%s:%s' % (k, d[k])) for k in sorted(d)]
66 items = [_string_escape('%s:%s' % (k, d[k])) for k in sorted(d)]
67 return "\0".join(items)
67 return "\0".join(items)
68
68
69 def stripdesc(desc):
69 def stripdesc(desc):
70 """strip trailing whitespace and leading and trailing empty lines"""
70 """strip trailing whitespace and leading and trailing empty lines"""
71 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
71 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
72
72
73 class appender(object):
73 class appender(object):
74 '''the changelog index must be updated last on disk, so we use this class
74 '''the changelog index must be updated last on disk, so we use this class
75 to delay writes to it'''
75 to delay writes to it'''
76 def __init__(self, vfs, name, mode, buf):
76 def __init__(self, vfs, name, mode, buf):
77 self.data = buf
77 self.data = buf
78 fp = vfs(name, mode)
78 fp = vfs(name, mode)
79 self.fp = fp
79 self.fp = fp
80 self.offset = fp.tell()
80 self.offset = fp.tell()
81 self.size = vfs.fstat(fp).st_size
81 self.size = vfs.fstat(fp).st_size
82 self._end = self.size
82 self._end = self.size
83
83
84 def end(self):
84 def end(self):
85 return self._end
85 return self._end
86 def tell(self):
86 def tell(self):
87 return self.offset
87 return self.offset
88 def flush(self):
88 def flush(self):
89 pass
89 pass
90 def close(self):
90 def close(self):
91 self.fp.close()
91 self.fp.close()
92
92
93 def seek(self, offset, whence=0):
93 def seek(self, offset, whence=0):
94 '''virtual file offset spans real file and data'''
94 '''virtual file offset spans real file and data'''
95 if whence == 0:
95 if whence == 0:
96 self.offset = offset
96 self.offset = offset
97 elif whence == 1:
97 elif whence == 1:
98 self.offset += offset
98 self.offset += offset
99 elif whence == 2:
99 elif whence == 2:
100 self.offset = self.end() + offset
100 self.offset = self.end() + offset
101 if self.offset < self.size:
101 if self.offset < self.size:
102 self.fp.seek(self.offset)
102 self.fp.seek(self.offset)
103
103
104 def read(self, count=-1):
104 def read(self, count=-1):
105 '''only trick here is reads that span real file and data'''
105 '''only trick here is reads that span real file and data'''
106 ret = ""
106 ret = ""
107 if self.offset < self.size:
107 if self.offset < self.size:
108 s = self.fp.read(count)
108 s = self.fp.read(count)
109 ret = s
109 ret = s
110 self.offset += len(s)
110 self.offset += len(s)
111 if count > 0:
111 if count > 0:
112 count -= len(s)
112 count -= len(s)
113 if count != 0:
113 if count != 0:
114 doff = self.offset - self.size
114 doff = self.offset - self.size
115 self.data.insert(0, "".join(self.data))
115 self.data.insert(0, "".join(self.data))
116 del self.data[1:]
116 del self.data[1:]
117 s = self.data[0][doff:doff + count]
117 s = self.data[0][doff:doff + count]
118 self.offset += len(s)
118 self.offset += len(s)
119 ret += s
119 ret += s
120 return ret
120 return ret
121
121
122 def write(self, s):
122 def write(self, s):
123 self.data.append(bytes(s))
123 self.data.append(bytes(s))
124 self.offset += len(s)
124 self.offset += len(s)
125 self._end += len(s)
125 self._end += len(s)
126
126
127 def _divertopener(opener, target):
127 def _divertopener(opener, target):
128 """build an opener that writes in 'target.a' instead of 'target'"""
128 """build an opener that writes in 'target.a' instead of 'target'"""
129 def _divert(name, mode='r', checkambig=False):
129 def _divert(name, mode='r', checkambig=False):
130 if name != target:
130 if name != target:
131 return opener(name, mode)
131 return opener(name, mode)
132 return opener(name + ".a", mode)
132 return opener(name + ".a", mode)
133 return _divert
133 return _divert
134
134
135 def _delayopener(opener, target, buf):
135 def _delayopener(opener, target, buf):
136 """build an opener that stores chunks in 'buf' instead of 'target'"""
136 """build an opener that stores chunks in 'buf' instead of 'target'"""
137 def _delay(name, mode='r', checkambig=False):
137 def _delay(name, mode='r', checkambig=False):
138 if name != target:
138 if name != target:
139 return opener(name, mode)
139 return opener(name, mode)
140 return appender(opener, name, mode, buf)
140 return appender(opener, name, mode, buf)
141 return _delay
141 return _delay
142
142
143 _changelogrevision = collections.namedtuple(u'changelogrevision',
143 _changelogrevision = collections.namedtuple(u'changelogrevision',
144 (u'manifest', u'user', u'date',
144 (u'manifest', u'user', u'date',
145 u'files', u'description',
145 u'files', u'description',
146 u'extra'))
146 u'extra'))
147
147
148 class changelogrevision(object):
148 class changelogrevision(object):
149 """Holds results of a parsed changelog revision.
149 """Holds results of a parsed changelog revision.
150
150
151 Changelog revisions consist of multiple pieces of data, including
151 Changelog revisions consist of multiple pieces of data, including
152 the manifest node, user, and date. This object exposes a view into
152 the manifest node, user, and date. This object exposes a view into
153 the parsed object.
153 the parsed object.
154 """
154 """
155
155
156 __slots__ = (
156 __slots__ = (
157 u'_offsets',
157 u'_offsets',
158 u'_text',
158 u'_text',
159 )
159 )
160
160
161 def __new__(cls, text):
161 def __new__(cls, text):
162 if not text:
162 if not text:
163 return _changelogrevision(
163 return _changelogrevision(
164 manifest=nullid,
164 manifest=nullid,
165 user='',
165 user='',
166 date=(0, 0),
166 date=(0, 0),
167 files=[],
167 files=[],
168 description='',
168 description='',
169 extra=_defaultextra,
169 extra=_defaultextra,
170 )
170 )
171
171
172 self = super(changelogrevision, cls).__new__(cls)
172 self = super(changelogrevision, cls).__new__(cls)
173 # We could return here and implement the following as an __init__.
173 # We could return here and implement the following as an __init__.
174 # But doing it here is equivalent and saves an extra function call.
174 # But doing it here is equivalent and saves an extra function call.
175
175
176 # format used:
176 # format used:
177 # nodeid\n : manifest node in ascii
177 # nodeid\n : manifest node in ascii
178 # user\n : user, no \n or \r allowed
178 # user\n : user, no \n or \r allowed
179 # time tz extra\n : date (time is int or float, timezone is int)
179 # time tz extra\n : date (time is int or float, timezone is int)
180 # : extra is metadata, encoded and separated by '\0'
180 # : extra is metadata, encoded and separated by '\0'
181 # : older versions ignore it
181 # : older versions ignore it
182 # files\n\n : files modified by the cset, no \n or \r allowed
182 # files\n\n : files modified by the cset, no \n or \r allowed
183 # (.*) : comment (free text, ideally utf-8)
183 # (.*) : comment (free text, ideally utf-8)
184 #
184 #
185 # changelog v0 doesn't use extra
185 # changelog v0 doesn't use extra
186
186
187 nl1 = text.index('\n')
187 nl1 = text.index('\n')
188 nl2 = text.index('\n', nl1 + 1)
188 nl2 = text.index('\n', nl1 + 1)
189 nl3 = text.index('\n', nl2 + 1)
189 nl3 = text.index('\n', nl2 + 1)
190
190
191 # The list of files may be empty. Which means nl3 is the first of the
191 # The list of files may be empty. Which means nl3 is the first of the
192 # double newline that precedes the description.
192 # double newline that precedes the description.
193 if text[nl3 + 1:nl3 + 2] == '\n':
193 if text[nl3 + 1:nl3 + 2] == '\n':
194 doublenl = nl3
194 doublenl = nl3
195 else:
195 else:
196 doublenl = text.index('\n\n', nl3 + 1)
196 doublenl = text.index('\n\n', nl3 + 1)
197
197
198 self._offsets = (nl1, nl2, nl3, doublenl)
198 self._offsets = (nl1, nl2, nl3, doublenl)
199 self._text = text
199 self._text = text
200
200
201 return self
201 return self
202
202
203 @property
203 @property
204 def manifest(self):
204 def manifest(self):
205 return bin(self._text[0:self._offsets[0]])
205 return bin(self._text[0:self._offsets[0]])
206
206
207 @property
207 @property
208 def user(self):
208 def user(self):
209 off = self._offsets
209 off = self._offsets
210 return encoding.tolocal(self._text[off[0] + 1:off[1]])
210 return encoding.tolocal(self._text[off[0] + 1:off[1]])
211
211
212 @property
212 @property
213 def _rawdate(self):
213 def _rawdate(self):
214 off = self._offsets
214 off = self._offsets
215 dateextra = self._text[off[1] + 1:off[2]]
215 dateextra = self._text[off[1] + 1:off[2]]
216 return dateextra.split(' ', 2)[0:2]
216 return dateextra.split(' ', 2)[0:2]
217
217
218 @property
218 @property
219 def _rawextra(self):
219 def _rawextra(self):
220 off = self._offsets
220 off = self._offsets
221 dateextra = self._text[off[1] + 1:off[2]]
221 dateextra = self._text[off[1] + 1:off[2]]
222 fields = dateextra.split(' ', 2)
222 fields = dateextra.split(' ', 2)
223 if len(fields) != 3:
223 if len(fields) != 3:
224 return None
224 return None
225
225
226 return fields[2]
226 return fields[2]
227
227
228 @property
228 @property
229 def date(self):
229 def date(self):
230 raw = self._rawdate
230 raw = self._rawdate
231 time = float(raw[0])
231 time = float(raw[0])
232 # Various tools did silly things with the timezone.
232 # Various tools did silly things with the timezone.
233 try:
233 try:
234 timezone = int(raw[1])
234 timezone = int(raw[1])
235 except ValueError:
235 except ValueError:
236 timezone = 0
236 timezone = 0
237
237
238 return time, timezone
238 return time, timezone
239
239
240 @property
240 @property
241 def extra(self):
241 def extra(self):
242 raw = self._rawextra
242 raw = self._rawextra
243 if raw is None:
243 if raw is None:
244 return _defaultextra
244 return _defaultextra
245
245
246 return decodeextra(raw)
246 return decodeextra(raw)
247
247
248 @property
248 @property
249 def files(self):
249 def files(self):
250 off = self._offsets
250 off = self._offsets
251 if off[2] == off[3]:
251 if off[2] == off[3]:
252 return []
252 return []
253
253
254 return self._text[off[2] + 1:off[3]].split('\n')
254 return self._text[off[2] + 1:off[3]].split('\n')
255
255
256 @property
256 @property
257 def description(self):
257 def description(self):
258 return encoding.tolocal(self._text[self._offsets[3] + 2:])
258 return encoding.tolocal(self._text[self._offsets[3] + 2:])
259
259
260 class changelog(revlog.revlog):
260 class changelog(revlog.revlog):
261 def __init__(self, opener, trypending=False):
261 def __init__(self, opener, trypending=False):
262 """Load a changelog revlog using an opener.
262 """Load a changelog revlog using an opener.
263
263
264 If ``trypending`` is true, we attempt to load the index from a
264 If ``trypending`` is true, we attempt to load the index from a
265 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
265 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
266 The ``00changelog.i.a`` file contains index (and possibly inline
266 The ``00changelog.i.a`` file contains index (and possibly inline
267 revision) data for a transaction that hasn't been finalized yet.
267 revision) data for a transaction that hasn't been finalized yet.
268 It exists in a separate file to facilitate readers (such as
268 It exists in a separate file to facilitate readers (such as
269 hooks processes) accessing data before a transaction is finalized.
269 hooks processes) accessing data before a transaction is finalized.
270 """
270 """
271 if trypending and opener.exists('00changelog.i.a'):
271 if trypending and opener.exists('00changelog.i.a'):
272 indexfile = '00changelog.i.a'
272 indexfile = '00changelog.i.a'
273 else:
273 else:
274 indexfile = '00changelog.i'
274 indexfile = '00changelog.i'
275
275
276 revlog.revlog.__init__(self, opener, indexfile, checkambig=True)
276 datafile = '00changelog.d'
277 revlog.revlog.__init__(self, opener, indexfile, datafile=datafile,
278 checkambig=True)
277
279
278 if self._initempty:
280 if self._initempty:
279 # changelogs don't benefit from generaldelta
281 # changelogs don't benefit from generaldelta
280 self.version &= ~revlog.REVLOGGENERALDELTA
282 self.version &= ~revlog.REVLOGGENERALDELTA
281 self._generaldelta = False
283 self._generaldelta = False
282
284
283 # Delta chains for changelogs tend to be very small because entries
285 # Delta chains for changelogs tend to be very small because entries
284 # tend to be small and don't delta well with each. So disable delta
286 # tend to be small and don't delta well with each. So disable delta
285 # chains.
287 # chains.
286 self.storedeltachains = False
288 self.storedeltachains = False
287
289
288 self._realopener = opener
290 self._realopener = opener
289 self._delayed = False
291 self._delayed = False
290 self._delaybuf = None
292 self._delaybuf = None
291 self._divert = False
293 self._divert = False
292 self.filteredrevs = frozenset()
294 self.filteredrevs = frozenset()
293
295
294 def tip(self):
296 def tip(self):
295 """filtered version of revlog.tip"""
297 """filtered version of revlog.tip"""
296 for i in xrange(len(self) -1, -2, -1):
298 for i in xrange(len(self) -1, -2, -1):
297 if i not in self.filteredrevs:
299 if i not in self.filteredrevs:
298 return self.node(i)
300 return self.node(i)
299
301
300 def __contains__(self, rev):
302 def __contains__(self, rev):
301 """filtered version of revlog.__contains__"""
303 """filtered version of revlog.__contains__"""
302 return (0 <= rev < len(self)
304 return (0 <= rev < len(self)
303 and rev not in self.filteredrevs)
305 and rev not in self.filteredrevs)
304
306
305 def __iter__(self):
307 def __iter__(self):
306 """filtered version of revlog.__iter__"""
308 """filtered version of revlog.__iter__"""
307 if len(self.filteredrevs) == 0:
309 if len(self.filteredrevs) == 0:
308 return revlog.revlog.__iter__(self)
310 return revlog.revlog.__iter__(self)
309
311
310 def filterediter():
312 def filterediter():
311 for i in xrange(len(self)):
313 for i in xrange(len(self)):
312 if i not in self.filteredrevs:
314 if i not in self.filteredrevs:
313 yield i
315 yield i
314
316
315 return filterediter()
317 return filterediter()
316
318
317 def revs(self, start=0, stop=None):
319 def revs(self, start=0, stop=None):
318 """filtered version of revlog.revs"""
320 """filtered version of revlog.revs"""
319 for i in super(changelog, self).revs(start, stop):
321 for i in super(changelog, self).revs(start, stop):
320 if i not in self.filteredrevs:
322 if i not in self.filteredrevs:
321 yield i
323 yield i
322
324
323 @util.propertycache
325 @util.propertycache
324 def nodemap(self):
326 def nodemap(self):
325 # XXX need filtering too
327 # XXX need filtering too
326 self.rev(self.node(0))
328 self.rev(self.node(0))
327 return self._nodecache
329 return self._nodecache
328
330
329 def reachableroots(self, minroot, heads, roots, includepath=False):
331 def reachableroots(self, minroot, heads, roots, includepath=False):
330 return self.index.reachableroots2(minroot, heads, roots, includepath)
332 return self.index.reachableroots2(minroot, heads, roots, includepath)
331
333
332 def headrevs(self):
334 def headrevs(self):
333 if self.filteredrevs:
335 if self.filteredrevs:
334 try:
336 try:
335 return self.index.headrevsfiltered(self.filteredrevs)
337 return self.index.headrevsfiltered(self.filteredrevs)
336 # AttributeError covers non-c-extension environments and
338 # AttributeError covers non-c-extension environments and
337 # old c extensions without filter handling.
339 # old c extensions without filter handling.
338 except AttributeError:
340 except AttributeError:
339 return self._headrevs()
341 return self._headrevs()
340
342
341 return super(changelog, self).headrevs()
343 return super(changelog, self).headrevs()
342
344
343 def strip(self, *args, **kwargs):
345 def strip(self, *args, **kwargs):
344 # XXX make something better than assert
346 # XXX make something better than assert
345 # We can't expect proper strip behavior if we are filtered.
347 # We can't expect proper strip behavior if we are filtered.
346 assert not self.filteredrevs
348 assert not self.filteredrevs
347 super(changelog, self).strip(*args, **kwargs)
349 super(changelog, self).strip(*args, **kwargs)
348
350
349 def rev(self, node):
351 def rev(self, node):
350 """filtered version of revlog.rev"""
352 """filtered version of revlog.rev"""
351 r = super(changelog, self).rev(node)
353 r = super(changelog, self).rev(node)
352 if r in self.filteredrevs:
354 if r in self.filteredrevs:
353 raise error.FilteredLookupError(hex(node), self.indexfile,
355 raise error.FilteredLookupError(hex(node), self.indexfile,
354 _('filtered node'))
356 _('filtered node'))
355 return r
357 return r
356
358
357 def node(self, rev):
359 def node(self, rev):
358 """filtered version of revlog.node"""
360 """filtered version of revlog.node"""
359 if rev in self.filteredrevs:
361 if rev in self.filteredrevs:
360 raise error.FilteredIndexError(rev)
362 raise error.FilteredIndexError(rev)
361 return super(changelog, self).node(rev)
363 return super(changelog, self).node(rev)
362
364
363 def linkrev(self, rev):
365 def linkrev(self, rev):
364 """filtered version of revlog.linkrev"""
366 """filtered version of revlog.linkrev"""
365 if rev in self.filteredrevs:
367 if rev in self.filteredrevs:
366 raise error.FilteredIndexError(rev)
368 raise error.FilteredIndexError(rev)
367 return super(changelog, self).linkrev(rev)
369 return super(changelog, self).linkrev(rev)
368
370
369 def parentrevs(self, rev):
371 def parentrevs(self, rev):
370 """filtered version of revlog.parentrevs"""
372 """filtered version of revlog.parentrevs"""
371 if rev in self.filteredrevs:
373 if rev in self.filteredrevs:
372 raise error.FilteredIndexError(rev)
374 raise error.FilteredIndexError(rev)
373 return super(changelog, self).parentrevs(rev)
375 return super(changelog, self).parentrevs(rev)
374
376
375 def flags(self, rev):
377 def flags(self, rev):
376 """filtered version of revlog.flags"""
378 """filtered version of revlog.flags"""
377 if rev in self.filteredrevs:
379 if rev in self.filteredrevs:
378 raise error.FilteredIndexError(rev)
380 raise error.FilteredIndexError(rev)
379 return super(changelog, self).flags(rev)
381 return super(changelog, self).flags(rev)
380
382
381 def delayupdate(self, tr):
383 def delayupdate(self, tr):
382 "delay visibility of index updates to other readers"
384 "delay visibility of index updates to other readers"
383
385
384 if not self._delayed:
386 if not self._delayed:
385 if len(self) == 0:
387 if len(self) == 0:
386 self._divert = True
388 self._divert = True
387 if self._realopener.exists(self.indexfile + '.a'):
389 if self._realopener.exists(self.indexfile + '.a'):
388 self._realopener.unlink(self.indexfile + '.a')
390 self._realopener.unlink(self.indexfile + '.a')
389 self.opener = _divertopener(self._realopener, self.indexfile)
391 self.opener = _divertopener(self._realopener, self.indexfile)
390 else:
392 else:
391 self._delaybuf = []
393 self._delaybuf = []
392 self.opener = _delayopener(self._realopener, self.indexfile,
394 self.opener = _delayopener(self._realopener, self.indexfile,
393 self._delaybuf)
395 self._delaybuf)
394 self._delayed = True
396 self._delayed = True
395 tr.addpending('cl-%i' % id(self), self._writepending)
397 tr.addpending('cl-%i' % id(self), self._writepending)
396 tr.addfinalize('cl-%i' % id(self), self._finalize)
398 tr.addfinalize('cl-%i' % id(self), self._finalize)
397
399
398 def _finalize(self, tr):
400 def _finalize(self, tr):
399 "finalize index updates"
401 "finalize index updates"
400 self._delayed = False
402 self._delayed = False
401 self.opener = self._realopener
403 self.opener = self._realopener
402 # move redirected index data back into place
404 # move redirected index data back into place
403 if self._divert:
405 if self._divert:
404 assert not self._delaybuf
406 assert not self._delaybuf
405 tmpname = self.indexfile + ".a"
407 tmpname = self.indexfile + ".a"
406 nfile = self.opener.open(tmpname)
408 nfile = self.opener.open(tmpname)
407 nfile.close()
409 nfile.close()
408 self.opener.rename(tmpname, self.indexfile, checkambig=True)
410 self.opener.rename(tmpname, self.indexfile, checkambig=True)
409 elif self._delaybuf:
411 elif self._delaybuf:
410 fp = self.opener(self.indexfile, 'a', checkambig=True)
412 fp = self.opener(self.indexfile, 'a', checkambig=True)
411 fp.write("".join(self._delaybuf))
413 fp.write("".join(self._delaybuf))
412 fp.close()
414 fp.close()
413 self._delaybuf = None
415 self._delaybuf = None
414 self._divert = False
416 self._divert = False
415 # split when we're done
417 # split when we're done
416 self.checkinlinesize(tr)
418 self.checkinlinesize(tr)
417
419
418 def _writepending(self, tr):
420 def _writepending(self, tr):
419 "create a file containing the unfinalized state for pretxnchangegroup"
421 "create a file containing the unfinalized state for pretxnchangegroup"
420 if self._delaybuf:
422 if self._delaybuf:
421 # make a temporary copy of the index
423 # make a temporary copy of the index
422 fp1 = self._realopener(self.indexfile)
424 fp1 = self._realopener(self.indexfile)
423 pendingfilename = self.indexfile + ".a"
425 pendingfilename = self.indexfile + ".a"
424 # register as a temp file to ensure cleanup on failure
426 # register as a temp file to ensure cleanup on failure
425 tr.registertmp(pendingfilename)
427 tr.registertmp(pendingfilename)
426 # write existing data
428 # write existing data
427 fp2 = self._realopener(pendingfilename, "w")
429 fp2 = self._realopener(pendingfilename, "w")
428 fp2.write(fp1.read())
430 fp2.write(fp1.read())
429 # add pending data
431 # add pending data
430 fp2.write("".join(self._delaybuf))
432 fp2.write("".join(self._delaybuf))
431 fp2.close()
433 fp2.close()
432 # switch modes so finalize can simply rename
434 # switch modes so finalize can simply rename
433 self._delaybuf = None
435 self._delaybuf = None
434 self._divert = True
436 self._divert = True
435 self.opener = _divertopener(self._realopener, self.indexfile)
437 self.opener = _divertopener(self._realopener, self.indexfile)
436
438
437 if self._divert:
439 if self._divert:
438 return True
440 return True
439
441
440 return False
442 return False
441
443
442 def checkinlinesize(self, tr, fp=None):
444 def checkinlinesize(self, tr, fp=None):
443 if not self._delayed:
445 if not self._delayed:
444 revlog.revlog.checkinlinesize(self, tr, fp)
446 revlog.revlog.checkinlinesize(self, tr, fp)
445
447
446 def read(self, node):
448 def read(self, node):
447 """Obtain data from a parsed changelog revision.
449 """Obtain data from a parsed changelog revision.
448
450
449 Returns a 6-tuple of:
451 Returns a 6-tuple of:
450
452
451 - manifest node in binary
453 - manifest node in binary
452 - author/user as a localstr
454 - author/user as a localstr
453 - date as a 2-tuple of (time, timezone)
455 - date as a 2-tuple of (time, timezone)
454 - list of files
456 - list of files
455 - commit message as a localstr
457 - commit message as a localstr
456 - dict of extra metadata
458 - dict of extra metadata
457
459
458 Unless you need to access all fields, consider calling
460 Unless you need to access all fields, consider calling
459 ``changelogrevision`` instead, as it is faster for partial object
461 ``changelogrevision`` instead, as it is faster for partial object
460 access.
462 access.
461 """
463 """
462 c = changelogrevision(self.revision(node))
464 c = changelogrevision(self.revision(node))
463 return (
465 return (
464 c.manifest,
466 c.manifest,
465 c.user,
467 c.user,
466 c.date,
468 c.date,
467 c.files,
469 c.files,
468 c.description,
470 c.description,
469 c.extra
471 c.extra
470 )
472 )
471
473
472 def changelogrevision(self, nodeorrev):
474 def changelogrevision(self, nodeorrev):
473 """Obtain a ``changelogrevision`` for a node or revision."""
475 """Obtain a ``changelogrevision`` for a node or revision."""
474 return changelogrevision(self.revision(nodeorrev))
476 return changelogrevision(self.revision(nodeorrev))
475
477
476 def readfiles(self, node):
478 def readfiles(self, node):
477 """
479 """
478 short version of read that only returns the files modified by the cset
480 short version of read that only returns the files modified by the cset
479 """
481 """
480 text = self.revision(node)
482 text = self.revision(node)
481 if not text:
483 if not text:
482 return []
484 return []
483 last = text.index("\n\n")
485 last = text.index("\n\n")
484 l = text[:last].split('\n')
486 l = text[:last].split('\n')
485 return l[3:]
487 return l[3:]
486
488
487 def add(self, manifest, files, desc, transaction, p1, p2,
489 def add(self, manifest, files, desc, transaction, p1, p2,
488 user, date=None, extra=None):
490 user, date=None, extra=None):
489 # Convert to UTF-8 encoded bytestrings as the very first
491 # Convert to UTF-8 encoded bytestrings as the very first
490 # thing: calling any method on a localstr object will turn it
492 # thing: calling any method on a localstr object will turn it
491 # into a str object and the cached UTF-8 string is thus lost.
493 # into a str object and the cached UTF-8 string is thus lost.
492 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
494 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
493
495
494 user = user.strip()
496 user = user.strip()
495 # An empty username or a username with a "\n" will make the
497 # An empty username or a username with a "\n" will make the
496 # revision text contain two "\n\n" sequences -> corrupt
498 # revision text contain two "\n\n" sequences -> corrupt
497 # repository since read cannot unpack the revision.
499 # repository since read cannot unpack the revision.
498 if not user:
500 if not user:
499 raise error.RevlogError(_("empty username"))
501 raise error.RevlogError(_("empty username"))
500 if "\n" in user:
502 if "\n" in user:
501 raise error.RevlogError(_("username %s contains a newline")
503 raise error.RevlogError(_("username %s contains a newline")
502 % repr(user))
504 % repr(user))
503
505
504 desc = stripdesc(desc)
506 desc = stripdesc(desc)
505
507
506 if date:
508 if date:
507 parseddate = "%d %d" % util.parsedate(date)
509 parseddate = "%d %d" % util.parsedate(date)
508 else:
510 else:
509 parseddate = "%d %d" % util.makedate()
511 parseddate = "%d %d" % util.makedate()
510 if extra:
512 if extra:
511 branch = extra.get("branch")
513 branch = extra.get("branch")
512 if branch in ("default", ""):
514 if branch in ("default", ""):
513 del extra["branch"]
515 del extra["branch"]
514 elif branch in (".", "null", "tip"):
516 elif branch in (".", "null", "tip"):
515 raise error.RevlogError(_('the name \'%s\' is reserved')
517 raise error.RevlogError(_('the name \'%s\' is reserved')
516 % branch)
518 % branch)
517 if extra:
519 if extra:
518 extra = encodeextra(extra)
520 extra = encodeextra(extra)
519 parseddate = "%s %s" % (parseddate, extra)
521 parseddate = "%s %s" % (parseddate, extra)
520 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
522 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
521 text = "\n".join(l)
523 text = "\n".join(l)
522 return self.addrevision(text, transaction, len(self), p1, p2)
524 return self.addrevision(text, transaction, len(self), p1, p2)
523
525
524 def branchinfo(self, rev):
526 def branchinfo(self, rev):
525 """return the branch name and open/close state of a revision
527 """return the branch name and open/close state of a revision
526
528
527 This function exists because creating a changectx object
529 This function exists because creating a changectx object
528 just to access this is costly."""
530 just to access this is costly."""
529 extra = self.read(rev)[5]
531 extra = self.read(rev)[5]
530 return encoding.tolocal(extra.get("branch")), 'close' in extra
532 return encoding.tolocal(extra.get("branch")), 'close' in extra
531
533
532 def _addrevision(self, node, rawtext, transaction, *args, **kwargs):
534 def _addrevision(self, node, rawtext, transaction, *args, **kwargs):
533 # overlay over the standard revlog._addrevision to track the new
535 # overlay over the standard revlog._addrevision to track the new
534 # revision on the transaction.
536 # revision on the transaction.
535 rev = len(self)
537 rev = len(self)
536 node = super(changelog, self)._addrevision(node, rawtext, transaction,
538 node = super(changelog, self)._addrevision(node, rawtext, transaction,
537 *args, **kwargs)
539 *args, **kwargs)
538 revs = transaction.changes.get('revs')
540 revs = transaction.changes.get('revs')
539 if revs is not None:
541 if revs is not None:
540 revs.add(rev)
542 revs.add(rev)
541 return node
543 return node
@@ -1,2150 +1,2150 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import errno
17 import errno
18 import hashlib
18 import hashlib
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullid,
27 nullid,
28 nullrev,
28 nullrev,
29 )
29 )
30 from .i18n import _
30 from .i18n import _
31 from . import (
31 from . import (
32 ancestor,
32 ancestor,
33 error,
33 error,
34 mdiff,
34 mdiff,
35 parsers,
35 parsers,
36 pycompat,
36 pycompat,
37 templatefilters,
37 templatefilters,
38 util,
38 util,
39 )
39 )
40
40
41 _pack = struct.pack
41 _pack = struct.pack
42 _unpack = struct.unpack
42 _unpack = struct.unpack
43 # Aliased for performance.
43 # Aliased for performance.
44 _zlibdecompress = zlib.decompress
44 _zlibdecompress = zlib.decompress
45
45
46 # revlog header flags
46 # revlog header flags
47 REVLOGV0 = 0
47 REVLOGV0 = 0
48 REVLOGNG = 1
48 REVLOGNG = 1
49 REVLOGNGINLINEDATA = (1 << 16)
49 REVLOGNGINLINEDATA = (1 << 16)
50 REVLOGGENERALDELTA = (1 << 17)
50 REVLOGGENERALDELTA = (1 << 17)
51 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
52 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 REVLOG_DEFAULT_FORMAT = REVLOGNG
53 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
54 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
55
55
56 # revlog index flags
56 # revlog index flags
57 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
58 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
58 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
59 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
59 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
60 REVIDX_DEFAULT_FLAGS = 0
60 REVIDX_DEFAULT_FLAGS = 0
61 # stable order in which flags need to be processed and their processors applied
61 # stable order in which flags need to be processed and their processors applied
62 REVIDX_FLAGS_ORDER = [
62 REVIDX_FLAGS_ORDER = [
63 REVIDX_ISCENSORED,
63 REVIDX_ISCENSORED,
64 REVIDX_ELLIPSIS,
64 REVIDX_ELLIPSIS,
65 REVIDX_EXTSTORED,
65 REVIDX_EXTSTORED,
66 ]
66 ]
67 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
67 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
68
68
69 # max size of revlog with inline data
69 # max size of revlog with inline data
70 _maxinline = 131072
70 _maxinline = 131072
71 _chunksize = 1048576
71 _chunksize = 1048576
72
72
73 RevlogError = error.RevlogError
73 RevlogError = error.RevlogError
74 LookupError = error.LookupError
74 LookupError = error.LookupError
75 CensoredNodeError = error.CensoredNodeError
75 CensoredNodeError = error.CensoredNodeError
76 ProgrammingError = error.ProgrammingError
76 ProgrammingError = error.ProgrammingError
77
77
78 # Store flag processors (cf. 'addflagprocessor()' to register)
78 # Store flag processors (cf. 'addflagprocessor()' to register)
79 _flagprocessors = {
79 _flagprocessors = {
80 REVIDX_ISCENSORED: None,
80 REVIDX_ISCENSORED: None,
81 }
81 }
82
82
83 def addflagprocessor(flag, processor):
83 def addflagprocessor(flag, processor):
84 """Register a flag processor on a revision data flag.
84 """Register a flag processor on a revision data flag.
85
85
86 Invariant:
86 Invariant:
87 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
87 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
88 - Only one flag processor can be registered on a specific flag.
88 - Only one flag processor can be registered on a specific flag.
89 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
89 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
90 following signatures:
90 following signatures:
91 - (read) f(self, rawtext) -> text, bool
91 - (read) f(self, rawtext) -> text, bool
92 - (write) f(self, text) -> rawtext, bool
92 - (write) f(self, text) -> rawtext, bool
93 - (raw) f(self, rawtext) -> bool
93 - (raw) f(self, rawtext) -> bool
94 "text" is presented to the user. "rawtext" is stored in revlog data, not
94 "text" is presented to the user. "rawtext" is stored in revlog data, not
95 directly visible to the user.
95 directly visible to the user.
96 The boolean returned by these transforms is used to determine whether
96 The boolean returned by these transforms is used to determine whether
97 the returned text can be used for hash integrity checking. For example,
97 the returned text can be used for hash integrity checking. For example,
98 if "write" returns False, then "text" is used to generate hash. If
98 if "write" returns False, then "text" is used to generate hash. If
99 "write" returns True, that basically means "rawtext" returned by "write"
99 "write" returns True, that basically means "rawtext" returned by "write"
100 should be used to generate hash. Usually, "write" and "read" return
100 should be used to generate hash. Usually, "write" and "read" return
101 different booleans. And "raw" returns a same boolean as "write".
101 different booleans. And "raw" returns a same boolean as "write".
102
102
103 Note: The 'raw' transform is used for changegroup generation and in some
103 Note: The 'raw' transform is used for changegroup generation and in some
104 debug commands. In this case the transform only indicates whether the
104 debug commands. In this case the transform only indicates whether the
105 contents can be used for hash integrity checks.
105 contents can be used for hash integrity checks.
106 """
106 """
107 if not flag & REVIDX_KNOWN_FLAGS:
107 if not flag & REVIDX_KNOWN_FLAGS:
108 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
108 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
109 raise ProgrammingError(msg)
109 raise ProgrammingError(msg)
110 if flag not in REVIDX_FLAGS_ORDER:
110 if flag not in REVIDX_FLAGS_ORDER:
111 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
111 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
112 raise ProgrammingError(msg)
112 raise ProgrammingError(msg)
113 if flag in _flagprocessors:
113 if flag in _flagprocessors:
114 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
114 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
115 raise error.Abort(msg)
115 raise error.Abort(msg)
116 _flagprocessors[flag] = processor
116 _flagprocessors[flag] = processor
117
117
118 def getoffset(q):
118 def getoffset(q):
119 return int(q >> 16)
119 return int(q >> 16)
120
120
121 def gettype(q):
121 def gettype(q):
122 return int(q & 0xFFFF)
122 return int(q & 0xFFFF)
123
123
124 def offset_type(offset, type):
124 def offset_type(offset, type):
125 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
125 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
126 raise ValueError('unknown revlog index flags')
126 raise ValueError('unknown revlog index flags')
127 return int(int(offset) << 16 | type)
127 return int(int(offset) << 16 | type)
128
128
129 _nullhash = hashlib.sha1(nullid)
129 _nullhash = hashlib.sha1(nullid)
130
130
131 def hash(text, p1, p2):
131 def hash(text, p1, p2):
132 """generate a hash from the given text and its parent hashes
132 """generate a hash from the given text and its parent hashes
133
133
134 This hash combines both the current file contents and its history
134 This hash combines both the current file contents and its history
135 in a manner that makes it easy to distinguish nodes with the same
135 in a manner that makes it easy to distinguish nodes with the same
136 content in the revision graph.
136 content in the revision graph.
137 """
137 """
138 # As of now, if one of the parent node is null, p2 is null
138 # As of now, if one of the parent node is null, p2 is null
139 if p2 == nullid:
139 if p2 == nullid:
140 # deep copy of a hash is faster than creating one
140 # deep copy of a hash is faster than creating one
141 s = _nullhash.copy()
141 s = _nullhash.copy()
142 s.update(p1)
142 s.update(p1)
143 else:
143 else:
144 # none of the parent nodes are nullid
144 # none of the parent nodes are nullid
145 l = [p1, p2]
145 l = [p1, p2]
146 l.sort()
146 l.sort()
147 s = hashlib.sha1(l[0])
147 s = hashlib.sha1(l[0])
148 s.update(l[1])
148 s.update(l[1])
149 s.update(text)
149 s.update(text)
150 return s.digest()
150 return s.digest()
151
151
152 # index v0:
152 # index v0:
153 # 4 bytes: offset
153 # 4 bytes: offset
154 # 4 bytes: compressed length
154 # 4 bytes: compressed length
155 # 4 bytes: base rev
155 # 4 bytes: base rev
156 # 4 bytes: link rev
156 # 4 bytes: link rev
157 # 20 bytes: parent 1 nodeid
157 # 20 bytes: parent 1 nodeid
158 # 20 bytes: parent 2 nodeid
158 # 20 bytes: parent 2 nodeid
159 # 20 bytes: nodeid
159 # 20 bytes: nodeid
160 indexformatv0 = ">4l20s20s20s"
160 indexformatv0 = ">4l20s20s20s"
161
161
162 class revlogoldio(object):
162 class revlogoldio(object):
163 def __init__(self):
163 def __init__(self):
164 self.size = struct.calcsize(indexformatv0)
164 self.size = struct.calcsize(indexformatv0)
165
165
166 def parseindex(self, data, inline):
166 def parseindex(self, data, inline):
167 s = self.size
167 s = self.size
168 index = []
168 index = []
169 nodemap = {nullid: nullrev}
169 nodemap = {nullid: nullrev}
170 n = off = 0
170 n = off = 0
171 l = len(data)
171 l = len(data)
172 while off + s <= l:
172 while off + s <= l:
173 cur = data[off:off + s]
173 cur = data[off:off + s]
174 off += s
174 off += s
175 e = _unpack(indexformatv0, cur)
175 e = _unpack(indexformatv0, cur)
176 # transform to revlogv1 format
176 # transform to revlogv1 format
177 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
177 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
178 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
178 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
179 index.append(e2)
179 index.append(e2)
180 nodemap[e[6]] = n
180 nodemap[e[6]] = n
181 n += 1
181 n += 1
182
182
183 # add the magic null revision at -1
183 # add the magic null revision at -1
184 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
184 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
185
185
186 return index, nodemap, None
186 return index, nodemap, None
187
187
188 def packentry(self, entry, node, version, rev):
188 def packentry(self, entry, node, version, rev):
189 if gettype(entry[0]):
189 if gettype(entry[0]):
190 raise RevlogError(_("index entry flags need RevlogNG"))
190 raise RevlogError(_("index entry flags need RevlogNG"))
191 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
191 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
192 node(entry[5]), node(entry[6]), entry[7])
192 node(entry[5]), node(entry[6]), entry[7])
193 return _pack(indexformatv0, *e2)
193 return _pack(indexformatv0, *e2)
194
194
195 # index ng:
195 # index ng:
196 # 6 bytes: offset
196 # 6 bytes: offset
197 # 2 bytes: flags
197 # 2 bytes: flags
198 # 4 bytes: compressed length
198 # 4 bytes: compressed length
199 # 4 bytes: uncompressed length
199 # 4 bytes: uncompressed length
200 # 4 bytes: base rev
200 # 4 bytes: base rev
201 # 4 bytes: link rev
201 # 4 bytes: link rev
202 # 4 bytes: parent 1 rev
202 # 4 bytes: parent 1 rev
203 # 4 bytes: parent 2 rev
203 # 4 bytes: parent 2 rev
204 # 32 bytes: nodeid
204 # 32 bytes: nodeid
205 indexformatng = ">Qiiiiii20s12x"
205 indexformatng = ">Qiiiiii20s12x"
206 versionformat = ">I"
206 versionformat = ">I"
207
207
208 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
208 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
209 # signed integer)
209 # signed integer)
210 _maxentrysize = 0x7fffffff
210 _maxentrysize = 0x7fffffff
211
211
212 class revlogio(object):
212 class revlogio(object):
213 def __init__(self):
213 def __init__(self):
214 self.size = struct.calcsize(indexformatng)
214 self.size = struct.calcsize(indexformatng)
215
215
216 def parseindex(self, data, inline):
216 def parseindex(self, data, inline):
217 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline)
218 index, cache = parsers.parse_index2(data, inline)
219 return index, getattr(index, 'nodemap', None), cache
219 return index, getattr(index, 'nodemap', None), cache
220
220
221 def packentry(self, entry, node, version, rev):
221 def packentry(self, entry, node, version, rev):
222 p = _pack(indexformatng, *entry)
222 p = _pack(indexformatng, *entry)
223 if rev == 0:
223 if rev == 0:
224 p = _pack(versionformat, version) + p[4:]
224 p = _pack(versionformat, version) + p[4:]
225 return p
225 return p
226
226
227 class revlog(object):
227 class revlog(object):
228 """
228 """
229 the underlying revision storage object
229 the underlying revision storage object
230
230
231 A revlog consists of two parts, an index and the revision data.
231 A revlog consists of two parts, an index and the revision data.
232
232
233 The index is a file with a fixed record size containing
233 The index is a file with a fixed record size containing
234 information on each revision, including its nodeid (hash), the
234 information on each revision, including its nodeid (hash), the
235 nodeids of its parents, the position and offset of its data within
235 nodeids of its parents, the position and offset of its data within
236 the data file, and the revision it's based on. Finally, each entry
236 the data file, and the revision it's based on. Finally, each entry
237 contains a linkrev entry that can serve as a pointer to external
237 contains a linkrev entry that can serve as a pointer to external
238 data.
238 data.
239
239
240 The revision data itself is a linear collection of data chunks.
240 The revision data itself is a linear collection of data chunks.
241 Each chunk represents a revision and is usually represented as a
241 Each chunk represents a revision and is usually represented as a
242 delta against the previous chunk. To bound lookup time, runs of
242 delta against the previous chunk. To bound lookup time, runs of
243 deltas are limited to about 2 times the length of the original
243 deltas are limited to about 2 times the length of the original
244 version data. This makes retrieval of a version proportional to
244 version data. This makes retrieval of a version proportional to
245 its size, or O(1) relative to the number of revisions.
245 its size, or O(1) relative to the number of revisions.
246
246
247 Both pieces of the revlog are written to in an append-only
247 Both pieces of the revlog are written to in an append-only
248 fashion, which means we never need to rewrite a file to insert or
248 fashion, which means we never need to rewrite a file to insert or
249 remove data, and can use some simple techniques to avoid the need
249 remove data, and can use some simple techniques to avoid the need
250 for locking while reading.
250 for locking while reading.
251
251
252 If checkambig, indexfile is opened with checkambig=True at
252 If checkambig, indexfile is opened with checkambig=True at
253 writing, to avoid file stat ambiguity.
253 writing, to avoid file stat ambiguity.
254 """
254 """
255 def __init__(self, opener, indexfile, checkambig=False):
255 def __init__(self, opener, indexfile, datafile=None, checkambig=False):
256 """
256 """
257 create a revlog object
257 create a revlog object
258
258
259 opener is a function that abstracts the file opening operation
259 opener is a function that abstracts the file opening operation
260 and can be used to implement COW semantics or the like.
260 and can be used to implement COW semantics or the like.
261 """
261 """
262 self.indexfile = indexfile
262 self.indexfile = indexfile
263 self.datafile = indexfile[:-2] + ".d"
263 self.datafile = datafile or (indexfile[:-2] + ".d")
264 self.opener = opener
264 self.opener = opener
265 # When True, indexfile is opened with checkambig=True at writing, to
265 # When True, indexfile is opened with checkambig=True at writing, to
266 # avoid file stat ambiguity.
266 # avoid file stat ambiguity.
267 self._checkambig = checkambig
267 self._checkambig = checkambig
268 # 3-tuple of (node, rev, text) for a raw revision.
268 # 3-tuple of (node, rev, text) for a raw revision.
269 self._cache = None
269 self._cache = None
270 # Maps rev to chain base rev.
270 # Maps rev to chain base rev.
271 self._chainbasecache = util.lrucachedict(100)
271 self._chainbasecache = util.lrucachedict(100)
272 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
272 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
273 self._chunkcache = (0, '')
273 self._chunkcache = (0, '')
274 # How much data to read and cache into the raw revlog data cache.
274 # How much data to read and cache into the raw revlog data cache.
275 self._chunkcachesize = 65536
275 self._chunkcachesize = 65536
276 self._maxchainlen = None
276 self._maxchainlen = None
277 self._aggressivemergedeltas = False
277 self._aggressivemergedeltas = False
278 self.index = []
278 self.index = []
279 # Mapping of partial identifiers to full nodes.
279 # Mapping of partial identifiers to full nodes.
280 self._pcache = {}
280 self._pcache = {}
281 # Mapping of revision integer to full node.
281 # Mapping of revision integer to full node.
282 self._nodecache = {nullid: nullrev}
282 self._nodecache = {nullid: nullrev}
283 self._nodepos = None
283 self._nodepos = None
284 self._compengine = 'zlib'
284 self._compengine = 'zlib'
285
285
286 v = REVLOG_DEFAULT_VERSION
286 v = REVLOG_DEFAULT_VERSION
287 opts = getattr(opener, 'options', None)
287 opts = getattr(opener, 'options', None)
288 if opts is not None:
288 if opts is not None:
289 if 'revlogv1' in opts:
289 if 'revlogv1' in opts:
290 if 'generaldelta' in opts:
290 if 'generaldelta' in opts:
291 v |= REVLOGGENERALDELTA
291 v |= REVLOGGENERALDELTA
292 else:
292 else:
293 v = 0
293 v = 0
294 if 'chunkcachesize' in opts:
294 if 'chunkcachesize' in opts:
295 self._chunkcachesize = opts['chunkcachesize']
295 self._chunkcachesize = opts['chunkcachesize']
296 if 'maxchainlen' in opts:
296 if 'maxchainlen' in opts:
297 self._maxchainlen = opts['maxchainlen']
297 self._maxchainlen = opts['maxchainlen']
298 if 'aggressivemergedeltas' in opts:
298 if 'aggressivemergedeltas' in opts:
299 self._aggressivemergedeltas = opts['aggressivemergedeltas']
299 self._aggressivemergedeltas = opts['aggressivemergedeltas']
300 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
300 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
301 if 'compengine' in opts:
301 if 'compengine' in opts:
302 self._compengine = opts['compengine']
302 self._compengine = opts['compengine']
303
303
304 if self._chunkcachesize <= 0:
304 if self._chunkcachesize <= 0:
305 raise RevlogError(_('revlog chunk cache size %r is not greater '
305 raise RevlogError(_('revlog chunk cache size %r is not greater '
306 'than 0') % self._chunkcachesize)
306 'than 0') % self._chunkcachesize)
307 elif self._chunkcachesize & (self._chunkcachesize - 1):
307 elif self._chunkcachesize & (self._chunkcachesize - 1):
308 raise RevlogError(_('revlog chunk cache size %r is not a power '
308 raise RevlogError(_('revlog chunk cache size %r is not a power '
309 'of 2') % self._chunkcachesize)
309 'of 2') % self._chunkcachesize)
310
310
311 indexdata = ''
311 indexdata = ''
312 self._initempty = True
312 self._initempty = True
313 try:
313 try:
314 f = self.opener(self.indexfile)
314 f = self.opener(self.indexfile)
315 indexdata = f.read()
315 indexdata = f.read()
316 f.close()
316 f.close()
317 if len(indexdata) > 0:
317 if len(indexdata) > 0:
318 v = struct.unpack(versionformat, indexdata[:4])[0]
318 v = struct.unpack(versionformat, indexdata[:4])[0]
319 self._initempty = False
319 self._initempty = False
320 except IOError as inst:
320 except IOError as inst:
321 if inst.errno != errno.ENOENT:
321 if inst.errno != errno.ENOENT:
322 raise
322 raise
323
323
324 self.version = v
324 self.version = v
325 self._inline = v & REVLOGNGINLINEDATA
325 self._inline = v & REVLOGNGINLINEDATA
326 self._generaldelta = v & REVLOGGENERALDELTA
326 self._generaldelta = v & REVLOGGENERALDELTA
327 flags = v & ~0xFFFF
327 flags = v & ~0xFFFF
328 fmt = v & 0xFFFF
328 fmt = v & 0xFFFF
329 if fmt == REVLOGV0 and flags:
329 if fmt == REVLOGV0 and flags:
330 raise RevlogError(_("index %s unknown flags %#04x for format v0")
330 raise RevlogError(_("index %s unknown flags %#04x for format v0")
331 % (self.indexfile, flags >> 16))
331 % (self.indexfile, flags >> 16))
332 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
332 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
333 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
333 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
334 % (self.indexfile, flags >> 16))
334 % (self.indexfile, flags >> 16))
335 elif fmt > REVLOGNG:
335 elif fmt > REVLOGNG:
336 raise RevlogError(_("index %s unknown format %d")
336 raise RevlogError(_("index %s unknown format %d")
337 % (self.indexfile, fmt))
337 % (self.indexfile, fmt))
338
338
339 self.storedeltachains = True
339 self.storedeltachains = True
340
340
341 self._io = revlogio()
341 self._io = revlogio()
342 if self.version == REVLOGV0:
342 if self.version == REVLOGV0:
343 self._io = revlogoldio()
343 self._io = revlogoldio()
344 try:
344 try:
345 d = self._io.parseindex(indexdata, self._inline)
345 d = self._io.parseindex(indexdata, self._inline)
346 except (ValueError, IndexError):
346 except (ValueError, IndexError):
347 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
347 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
348 self.index, nodemap, self._chunkcache = d
348 self.index, nodemap, self._chunkcache = d
349 if nodemap is not None:
349 if nodemap is not None:
350 self.nodemap = self._nodecache = nodemap
350 self.nodemap = self._nodecache = nodemap
351 if not self._chunkcache:
351 if not self._chunkcache:
352 self._chunkclear()
352 self._chunkclear()
353 # revnum -> (chain-length, sum-delta-length)
353 # revnum -> (chain-length, sum-delta-length)
354 self._chaininfocache = {}
354 self._chaininfocache = {}
355 # revlog header -> revlog compressor
355 # revlog header -> revlog compressor
356 self._decompressors = {}
356 self._decompressors = {}
357
357
358 @util.propertycache
358 @util.propertycache
359 def _compressor(self):
359 def _compressor(self):
360 return util.compengines[self._compengine].revlogcompressor()
360 return util.compengines[self._compengine].revlogcompressor()
361
361
362 def tip(self):
362 def tip(self):
363 return self.node(len(self.index) - 2)
363 return self.node(len(self.index) - 2)
364 def __contains__(self, rev):
364 def __contains__(self, rev):
365 return 0 <= rev < len(self)
365 return 0 <= rev < len(self)
366 def __len__(self):
366 def __len__(self):
367 return len(self.index) - 1
367 return len(self.index) - 1
368 def __iter__(self):
368 def __iter__(self):
369 return iter(xrange(len(self)))
369 return iter(xrange(len(self)))
370 def revs(self, start=0, stop=None):
370 def revs(self, start=0, stop=None):
371 """iterate over all rev in this revlog (from start to stop)"""
371 """iterate over all rev in this revlog (from start to stop)"""
372 step = 1
372 step = 1
373 if stop is not None:
373 if stop is not None:
374 if start > stop:
374 if start > stop:
375 step = -1
375 step = -1
376 stop += step
376 stop += step
377 else:
377 else:
378 stop = len(self)
378 stop = len(self)
379 return xrange(start, stop, step)
379 return xrange(start, stop, step)
380
380
381 @util.propertycache
381 @util.propertycache
382 def nodemap(self):
382 def nodemap(self):
383 self.rev(self.node(0))
383 self.rev(self.node(0))
384 return self._nodecache
384 return self._nodecache
385
385
386 def hasnode(self, node):
386 def hasnode(self, node):
387 try:
387 try:
388 self.rev(node)
388 self.rev(node)
389 return True
389 return True
390 except KeyError:
390 except KeyError:
391 return False
391 return False
392
392
393 def clearcaches(self):
393 def clearcaches(self):
394 self._cache = None
394 self._cache = None
395 self._chainbasecache.clear()
395 self._chainbasecache.clear()
396 self._chunkcache = (0, '')
396 self._chunkcache = (0, '')
397 self._pcache = {}
397 self._pcache = {}
398
398
399 try:
399 try:
400 self._nodecache.clearcaches()
400 self._nodecache.clearcaches()
401 except AttributeError:
401 except AttributeError:
402 self._nodecache = {nullid: nullrev}
402 self._nodecache = {nullid: nullrev}
403 self._nodepos = None
403 self._nodepos = None
404
404
405 def rev(self, node):
405 def rev(self, node):
406 try:
406 try:
407 return self._nodecache[node]
407 return self._nodecache[node]
408 except TypeError:
408 except TypeError:
409 raise
409 raise
410 except RevlogError:
410 except RevlogError:
411 # parsers.c radix tree lookup failed
411 # parsers.c radix tree lookup failed
412 raise LookupError(node, self.indexfile, _('no node'))
412 raise LookupError(node, self.indexfile, _('no node'))
413 except KeyError:
413 except KeyError:
414 # pure python cache lookup failed
414 # pure python cache lookup failed
415 n = self._nodecache
415 n = self._nodecache
416 i = self.index
416 i = self.index
417 p = self._nodepos
417 p = self._nodepos
418 if p is None:
418 if p is None:
419 p = len(i) - 2
419 p = len(i) - 2
420 for r in xrange(p, -1, -1):
420 for r in xrange(p, -1, -1):
421 v = i[r][7]
421 v = i[r][7]
422 n[v] = r
422 n[v] = r
423 if v == node:
423 if v == node:
424 self._nodepos = r - 1
424 self._nodepos = r - 1
425 return r
425 return r
426 raise LookupError(node, self.indexfile, _('no node'))
426 raise LookupError(node, self.indexfile, _('no node'))
427
427
428 # Accessors for index entries.
428 # Accessors for index entries.
429
429
430 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
430 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
431 # are flags.
431 # are flags.
432 def start(self, rev):
432 def start(self, rev):
433 return int(self.index[rev][0] >> 16)
433 return int(self.index[rev][0] >> 16)
434
434
435 def flags(self, rev):
435 def flags(self, rev):
436 return self.index[rev][0] & 0xFFFF
436 return self.index[rev][0] & 0xFFFF
437
437
438 def length(self, rev):
438 def length(self, rev):
439 return self.index[rev][1]
439 return self.index[rev][1]
440
440
441 def rawsize(self, rev):
441 def rawsize(self, rev):
442 """return the length of the uncompressed text for a given revision"""
442 """return the length of the uncompressed text for a given revision"""
443 l = self.index[rev][2]
443 l = self.index[rev][2]
444 if l >= 0:
444 if l >= 0:
445 return l
445 return l
446
446
447 t = self.revision(rev, raw=True)
447 t = self.revision(rev, raw=True)
448 return len(t)
448 return len(t)
449
449
450 def size(self, rev):
450 def size(self, rev):
451 """length of non-raw text (processed by a "read" flag processor)"""
451 """length of non-raw text (processed by a "read" flag processor)"""
452 # fast path: if no "read" flag processor could change the content,
452 # fast path: if no "read" flag processor could change the content,
453 # size is rawsize. note: ELLIPSIS is known to not change the content.
453 # size is rawsize. note: ELLIPSIS is known to not change the content.
454 flags = self.flags(rev)
454 flags = self.flags(rev)
455 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
455 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
456 return self.rawsize(rev)
456 return self.rawsize(rev)
457
457
458 return len(self.revision(rev, raw=False))
458 return len(self.revision(rev, raw=False))
459
459
460 def chainbase(self, rev):
460 def chainbase(self, rev):
461 base = self._chainbasecache.get(rev)
461 base = self._chainbasecache.get(rev)
462 if base is not None:
462 if base is not None:
463 return base
463 return base
464
464
465 index = self.index
465 index = self.index
466 base = index[rev][3]
466 base = index[rev][3]
467 while base != rev:
467 while base != rev:
468 rev = base
468 rev = base
469 base = index[rev][3]
469 base = index[rev][3]
470
470
471 self._chainbasecache[rev] = base
471 self._chainbasecache[rev] = base
472 return base
472 return base
473
473
474 def linkrev(self, rev):
474 def linkrev(self, rev):
475 return self.index[rev][4]
475 return self.index[rev][4]
476
476
477 def parentrevs(self, rev):
477 def parentrevs(self, rev):
478 return self.index[rev][5:7]
478 return self.index[rev][5:7]
479
479
480 def node(self, rev):
480 def node(self, rev):
481 return self.index[rev][7]
481 return self.index[rev][7]
482
482
483 # Derived from index values.
483 # Derived from index values.
484
484
485 def end(self, rev):
485 def end(self, rev):
486 return self.start(rev) + self.length(rev)
486 return self.start(rev) + self.length(rev)
487
487
488 def parents(self, node):
488 def parents(self, node):
489 i = self.index
489 i = self.index
490 d = i[self.rev(node)]
490 d = i[self.rev(node)]
491 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
491 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
492
492
493 def chainlen(self, rev):
493 def chainlen(self, rev):
494 return self._chaininfo(rev)[0]
494 return self._chaininfo(rev)[0]
495
495
496 def _chaininfo(self, rev):
496 def _chaininfo(self, rev):
497 chaininfocache = self._chaininfocache
497 chaininfocache = self._chaininfocache
498 if rev in chaininfocache:
498 if rev in chaininfocache:
499 return chaininfocache[rev]
499 return chaininfocache[rev]
500 index = self.index
500 index = self.index
501 generaldelta = self._generaldelta
501 generaldelta = self._generaldelta
502 iterrev = rev
502 iterrev = rev
503 e = index[iterrev]
503 e = index[iterrev]
504 clen = 0
504 clen = 0
505 compresseddeltalen = 0
505 compresseddeltalen = 0
506 while iterrev != e[3]:
506 while iterrev != e[3]:
507 clen += 1
507 clen += 1
508 compresseddeltalen += e[1]
508 compresseddeltalen += e[1]
509 if generaldelta:
509 if generaldelta:
510 iterrev = e[3]
510 iterrev = e[3]
511 else:
511 else:
512 iterrev -= 1
512 iterrev -= 1
513 if iterrev in chaininfocache:
513 if iterrev in chaininfocache:
514 t = chaininfocache[iterrev]
514 t = chaininfocache[iterrev]
515 clen += t[0]
515 clen += t[0]
516 compresseddeltalen += t[1]
516 compresseddeltalen += t[1]
517 break
517 break
518 e = index[iterrev]
518 e = index[iterrev]
519 else:
519 else:
520 # Add text length of base since decompressing that also takes
520 # Add text length of base since decompressing that also takes
521 # work. For cache hits the length is already included.
521 # work. For cache hits the length is already included.
522 compresseddeltalen += e[1]
522 compresseddeltalen += e[1]
523 r = (clen, compresseddeltalen)
523 r = (clen, compresseddeltalen)
524 chaininfocache[rev] = r
524 chaininfocache[rev] = r
525 return r
525 return r
526
526
527 def _deltachain(self, rev, stoprev=None):
527 def _deltachain(self, rev, stoprev=None):
528 """Obtain the delta chain for a revision.
528 """Obtain the delta chain for a revision.
529
529
530 ``stoprev`` specifies a revision to stop at. If not specified, we
530 ``stoprev`` specifies a revision to stop at. If not specified, we
531 stop at the base of the chain.
531 stop at the base of the chain.
532
532
533 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
533 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
534 revs in ascending order and ``stopped`` is a bool indicating whether
534 revs in ascending order and ``stopped`` is a bool indicating whether
535 ``stoprev`` was hit.
535 ``stoprev`` was hit.
536 """
536 """
537 chain = []
537 chain = []
538
538
539 # Alias to prevent attribute lookup in tight loop.
539 # Alias to prevent attribute lookup in tight loop.
540 index = self.index
540 index = self.index
541 generaldelta = self._generaldelta
541 generaldelta = self._generaldelta
542
542
543 iterrev = rev
543 iterrev = rev
544 e = index[iterrev]
544 e = index[iterrev]
545 while iterrev != e[3] and iterrev != stoprev:
545 while iterrev != e[3] and iterrev != stoprev:
546 chain.append(iterrev)
546 chain.append(iterrev)
547 if generaldelta:
547 if generaldelta:
548 iterrev = e[3]
548 iterrev = e[3]
549 else:
549 else:
550 iterrev -= 1
550 iterrev -= 1
551 e = index[iterrev]
551 e = index[iterrev]
552
552
553 if iterrev == stoprev:
553 if iterrev == stoprev:
554 stopped = True
554 stopped = True
555 else:
555 else:
556 chain.append(iterrev)
556 chain.append(iterrev)
557 stopped = False
557 stopped = False
558
558
559 chain.reverse()
559 chain.reverse()
560 return chain, stopped
560 return chain, stopped
561
561
562 def ancestors(self, revs, stoprev=0, inclusive=False):
562 def ancestors(self, revs, stoprev=0, inclusive=False):
563 """Generate the ancestors of 'revs' in reverse topological order.
563 """Generate the ancestors of 'revs' in reverse topological order.
564 Does not generate revs lower than stoprev.
564 Does not generate revs lower than stoprev.
565
565
566 See the documentation for ancestor.lazyancestors for more details."""
566 See the documentation for ancestor.lazyancestors for more details."""
567
567
568 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
568 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
569 inclusive=inclusive)
569 inclusive=inclusive)
570
570
571 def descendants(self, revs):
571 def descendants(self, revs):
572 """Generate the descendants of 'revs' in revision order.
572 """Generate the descendants of 'revs' in revision order.
573
573
574 Yield a sequence of revision numbers starting with a child of
574 Yield a sequence of revision numbers starting with a child of
575 some rev in revs, i.e., each revision is *not* considered a
575 some rev in revs, i.e., each revision is *not* considered a
576 descendant of itself. Results are ordered by revision number (a
576 descendant of itself. Results are ordered by revision number (a
577 topological sort)."""
577 topological sort)."""
578 first = min(revs)
578 first = min(revs)
579 if first == nullrev:
579 if first == nullrev:
580 for i in self:
580 for i in self:
581 yield i
581 yield i
582 return
582 return
583
583
584 seen = set(revs)
584 seen = set(revs)
585 for i in self.revs(start=first + 1):
585 for i in self.revs(start=first + 1):
586 for x in self.parentrevs(i):
586 for x in self.parentrevs(i):
587 if x != nullrev and x in seen:
587 if x != nullrev and x in seen:
588 seen.add(i)
588 seen.add(i)
589 yield i
589 yield i
590 break
590 break
591
591
592 def findcommonmissing(self, common=None, heads=None):
592 def findcommonmissing(self, common=None, heads=None):
593 """Return a tuple of the ancestors of common and the ancestors of heads
593 """Return a tuple of the ancestors of common and the ancestors of heads
594 that are not ancestors of common. In revset terminology, we return the
594 that are not ancestors of common. In revset terminology, we return the
595 tuple:
595 tuple:
596
596
597 ::common, (::heads) - (::common)
597 ::common, (::heads) - (::common)
598
598
599 The list is sorted by revision number, meaning it is
599 The list is sorted by revision number, meaning it is
600 topologically sorted.
600 topologically sorted.
601
601
602 'heads' and 'common' are both lists of node IDs. If heads is
602 'heads' and 'common' are both lists of node IDs. If heads is
603 not supplied, uses all of the revlog's heads. If common is not
603 not supplied, uses all of the revlog's heads. If common is not
604 supplied, uses nullid."""
604 supplied, uses nullid."""
605 if common is None:
605 if common is None:
606 common = [nullid]
606 common = [nullid]
607 if heads is None:
607 if heads is None:
608 heads = self.heads()
608 heads = self.heads()
609
609
610 common = [self.rev(n) for n in common]
610 common = [self.rev(n) for n in common]
611 heads = [self.rev(n) for n in heads]
611 heads = [self.rev(n) for n in heads]
612
612
613 # we want the ancestors, but inclusive
613 # we want the ancestors, but inclusive
614 class lazyset(object):
614 class lazyset(object):
615 def __init__(self, lazyvalues):
615 def __init__(self, lazyvalues):
616 self.addedvalues = set()
616 self.addedvalues = set()
617 self.lazyvalues = lazyvalues
617 self.lazyvalues = lazyvalues
618
618
619 def __contains__(self, value):
619 def __contains__(self, value):
620 return value in self.addedvalues or value in self.lazyvalues
620 return value in self.addedvalues or value in self.lazyvalues
621
621
622 def __iter__(self):
622 def __iter__(self):
623 added = self.addedvalues
623 added = self.addedvalues
624 for r in added:
624 for r in added:
625 yield r
625 yield r
626 for r in self.lazyvalues:
626 for r in self.lazyvalues:
627 if not r in added:
627 if not r in added:
628 yield r
628 yield r
629
629
630 def add(self, value):
630 def add(self, value):
631 self.addedvalues.add(value)
631 self.addedvalues.add(value)
632
632
633 def update(self, values):
633 def update(self, values):
634 self.addedvalues.update(values)
634 self.addedvalues.update(values)
635
635
636 has = lazyset(self.ancestors(common))
636 has = lazyset(self.ancestors(common))
637 has.add(nullrev)
637 has.add(nullrev)
638 has.update(common)
638 has.update(common)
639
639
640 # take all ancestors from heads that aren't in has
640 # take all ancestors from heads that aren't in has
641 missing = set()
641 missing = set()
642 visit = collections.deque(r for r in heads if r not in has)
642 visit = collections.deque(r for r in heads if r not in has)
643 while visit:
643 while visit:
644 r = visit.popleft()
644 r = visit.popleft()
645 if r in missing:
645 if r in missing:
646 continue
646 continue
647 else:
647 else:
648 missing.add(r)
648 missing.add(r)
649 for p in self.parentrevs(r):
649 for p in self.parentrevs(r):
650 if p not in has:
650 if p not in has:
651 visit.append(p)
651 visit.append(p)
652 missing = list(missing)
652 missing = list(missing)
653 missing.sort()
653 missing.sort()
654 return has, [self.node(miss) for miss in missing]
654 return has, [self.node(miss) for miss in missing]
655
655
656 def incrementalmissingrevs(self, common=None):
656 def incrementalmissingrevs(self, common=None):
657 """Return an object that can be used to incrementally compute the
657 """Return an object that can be used to incrementally compute the
658 revision numbers of the ancestors of arbitrary sets that are not
658 revision numbers of the ancestors of arbitrary sets that are not
659 ancestors of common. This is an ancestor.incrementalmissingancestors
659 ancestors of common. This is an ancestor.incrementalmissingancestors
660 object.
660 object.
661
661
662 'common' is a list of revision numbers. If common is not supplied, uses
662 'common' is a list of revision numbers. If common is not supplied, uses
663 nullrev.
663 nullrev.
664 """
664 """
665 if common is None:
665 if common is None:
666 common = [nullrev]
666 common = [nullrev]
667
667
668 return ancestor.incrementalmissingancestors(self.parentrevs, common)
668 return ancestor.incrementalmissingancestors(self.parentrevs, common)
669
669
670 def findmissingrevs(self, common=None, heads=None):
670 def findmissingrevs(self, common=None, heads=None):
671 """Return the revision numbers of the ancestors of heads that
671 """Return the revision numbers of the ancestors of heads that
672 are not ancestors of common.
672 are not ancestors of common.
673
673
674 More specifically, return a list of revision numbers corresponding to
674 More specifically, return a list of revision numbers corresponding to
675 nodes N such that every N satisfies the following constraints:
675 nodes N such that every N satisfies the following constraints:
676
676
677 1. N is an ancestor of some node in 'heads'
677 1. N is an ancestor of some node in 'heads'
678 2. N is not an ancestor of any node in 'common'
678 2. N is not an ancestor of any node in 'common'
679
679
680 The list is sorted by revision number, meaning it is
680 The list is sorted by revision number, meaning it is
681 topologically sorted.
681 topologically sorted.
682
682
683 'heads' and 'common' are both lists of revision numbers. If heads is
683 'heads' and 'common' are both lists of revision numbers. If heads is
684 not supplied, uses all of the revlog's heads. If common is not
684 not supplied, uses all of the revlog's heads. If common is not
685 supplied, uses nullid."""
685 supplied, uses nullid."""
686 if common is None:
686 if common is None:
687 common = [nullrev]
687 common = [nullrev]
688 if heads is None:
688 if heads is None:
689 heads = self.headrevs()
689 heads = self.headrevs()
690
690
691 inc = self.incrementalmissingrevs(common=common)
691 inc = self.incrementalmissingrevs(common=common)
692 return inc.missingancestors(heads)
692 return inc.missingancestors(heads)
693
693
694 def findmissing(self, common=None, heads=None):
694 def findmissing(self, common=None, heads=None):
695 """Return the ancestors of heads that are not ancestors of common.
695 """Return the ancestors of heads that are not ancestors of common.
696
696
697 More specifically, return a list of nodes N such that every N
697 More specifically, return a list of nodes N such that every N
698 satisfies the following constraints:
698 satisfies the following constraints:
699
699
700 1. N is an ancestor of some node in 'heads'
700 1. N is an ancestor of some node in 'heads'
701 2. N is not an ancestor of any node in 'common'
701 2. N is not an ancestor of any node in 'common'
702
702
703 The list is sorted by revision number, meaning it is
703 The list is sorted by revision number, meaning it is
704 topologically sorted.
704 topologically sorted.
705
705
706 'heads' and 'common' are both lists of node IDs. If heads is
706 'heads' and 'common' are both lists of node IDs. If heads is
707 not supplied, uses all of the revlog's heads. If common is not
707 not supplied, uses all of the revlog's heads. If common is not
708 supplied, uses nullid."""
708 supplied, uses nullid."""
709 if common is None:
709 if common is None:
710 common = [nullid]
710 common = [nullid]
711 if heads is None:
711 if heads is None:
712 heads = self.heads()
712 heads = self.heads()
713
713
714 common = [self.rev(n) for n in common]
714 common = [self.rev(n) for n in common]
715 heads = [self.rev(n) for n in heads]
715 heads = [self.rev(n) for n in heads]
716
716
717 inc = self.incrementalmissingrevs(common=common)
717 inc = self.incrementalmissingrevs(common=common)
718 return [self.node(r) for r in inc.missingancestors(heads)]
718 return [self.node(r) for r in inc.missingancestors(heads)]
719
719
720 def nodesbetween(self, roots=None, heads=None):
720 def nodesbetween(self, roots=None, heads=None):
721 """Return a topological path from 'roots' to 'heads'.
721 """Return a topological path from 'roots' to 'heads'.
722
722
723 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
723 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
724 topologically sorted list of all nodes N that satisfy both of
724 topologically sorted list of all nodes N that satisfy both of
725 these constraints:
725 these constraints:
726
726
727 1. N is a descendant of some node in 'roots'
727 1. N is a descendant of some node in 'roots'
728 2. N is an ancestor of some node in 'heads'
728 2. N is an ancestor of some node in 'heads'
729
729
730 Every node is considered to be both a descendant and an ancestor
730 Every node is considered to be both a descendant and an ancestor
731 of itself, so every reachable node in 'roots' and 'heads' will be
731 of itself, so every reachable node in 'roots' and 'heads' will be
732 included in 'nodes'.
732 included in 'nodes'.
733
733
734 'outroots' is the list of reachable nodes in 'roots', i.e., the
734 'outroots' is the list of reachable nodes in 'roots', i.e., the
735 subset of 'roots' that is returned in 'nodes'. Likewise,
735 subset of 'roots' that is returned in 'nodes'. Likewise,
736 'outheads' is the subset of 'heads' that is also in 'nodes'.
736 'outheads' is the subset of 'heads' that is also in 'nodes'.
737
737
738 'roots' and 'heads' are both lists of node IDs. If 'roots' is
738 'roots' and 'heads' are both lists of node IDs. If 'roots' is
739 unspecified, uses nullid as the only root. If 'heads' is
739 unspecified, uses nullid as the only root. If 'heads' is
740 unspecified, uses list of all of the revlog's heads."""
740 unspecified, uses list of all of the revlog's heads."""
741 nonodes = ([], [], [])
741 nonodes = ([], [], [])
742 if roots is not None:
742 if roots is not None:
743 roots = list(roots)
743 roots = list(roots)
744 if not roots:
744 if not roots:
745 return nonodes
745 return nonodes
746 lowestrev = min([self.rev(n) for n in roots])
746 lowestrev = min([self.rev(n) for n in roots])
747 else:
747 else:
748 roots = [nullid] # Everybody's a descendant of nullid
748 roots = [nullid] # Everybody's a descendant of nullid
749 lowestrev = nullrev
749 lowestrev = nullrev
750 if (lowestrev == nullrev) and (heads is None):
750 if (lowestrev == nullrev) and (heads is None):
751 # We want _all_ the nodes!
751 # We want _all_ the nodes!
752 return ([self.node(r) for r in self], [nullid], list(self.heads()))
752 return ([self.node(r) for r in self], [nullid], list(self.heads()))
753 if heads is None:
753 if heads is None:
754 # All nodes are ancestors, so the latest ancestor is the last
754 # All nodes are ancestors, so the latest ancestor is the last
755 # node.
755 # node.
756 highestrev = len(self) - 1
756 highestrev = len(self) - 1
757 # Set ancestors to None to signal that every node is an ancestor.
757 # Set ancestors to None to signal that every node is an ancestor.
758 ancestors = None
758 ancestors = None
759 # Set heads to an empty dictionary for later discovery of heads
759 # Set heads to an empty dictionary for later discovery of heads
760 heads = {}
760 heads = {}
761 else:
761 else:
762 heads = list(heads)
762 heads = list(heads)
763 if not heads:
763 if not heads:
764 return nonodes
764 return nonodes
765 ancestors = set()
765 ancestors = set()
766 # Turn heads into a dictionary so we can remove 'fake' heads.
766 # Turn heads into a dictionary so we can remove 'fake' heads.
767 # Also, later we will be using it to filter out the heads we can't
767 # Also, later we will be using it to filter out the heads we can't
768 # find from roots.
768 # find from roots.
769 heads = dict.fromkeys(heads, False)
769 heads = dict.fromkeys(heads, False)
770 # Start at the top and keep marking parents until we're done.
770 # Start at the top and keep marking parents until we're done.
771 nodestotag = set(heads)
771 nodestotag = set(heads)
772 # Remember where the top was so we can use it as a limit later.
772 # Remember where the top was so we can use it as a limit later.
773 highestrev = max([self.rev(n) for n in nodestotag])
773 highestrev = max([self.rev(n) for n in nodestotag])
774 while nodestotag:
774 while nodestotag:
775 # grab a node to tag
775 # grab a node to tag
776 n = nodestotag.pop()
776 n = nodestotag.pop()
777 # Never tag nullid
777 # Never tag nullid
778 if n == nullid:
778 if n == nullid:
779 continue
779 continue
780 # A node's revision number represents its place in a
780 # A node's revision number represents its place in a
781 # topologically sorted list of nodes.
781 # topologically sorted list of nodes.
782 r = self.rev(n)
782 r = self.rev(n)
783 if r >= lowestrev:
783 if r >= lowestrev:
784 if n not in ancestors:
784 if n not in ancestors:
785 # If we are possibly a descendant of one of the roots
785 # If we are possibly a descendant of one of the roots
786 # and we haven't already been marked as an ancestor
786 # and we haven't already been marked as an ancestor
787 ancestors.add(n) # Mark as ancestor
787 ancestors.add(n) # Mark as ancestor
788 # Add non-nullid parents to list of nodes to tag.
788 # Add non-nullid parents to list of nodes to tag.
789 nodestotag.update([p for p in self.parents(n) if
789 nodestotag.update([p for p in self.parents(n) if
790 p != nullid])
790 p != nullid])
791 elif n in heads: # We've seen it before, is it a fake head?
791 elif n in heads: # We've seen it before, is it a fake head?
792 # So it is, real heads should not be the ancestors of
792 # So it is, real heads should not be the ancestors of
793 # any other heads.
793 # any other heads.
794 heads.pop(n)
794 heads.pop(n)
795 if not ancestors:
795 if not ancestors:
796 return nonodes
796 return nonodes
797 # Now that we have our set of ancestors, we want to remove any
797 # Now that we have our set of ancestors, we want to remove any
798 # roots that are not ancestors.
798 # roots that are not ancestors.
799
799
800 # If one of the roots was nullid, everything is included anyway.
800 # If one of the roots was nullid, everything is included anyway.
801 if lowestrev > nullrev:
801 if lowestrev > nullrev:
802 # But, since we weren't, let's recompute the lowest rev to not
802 # But, since we weren't, let's recompute the lowest rev to not
803 # include roots that aren't ancestors.
803 # include roots that aren't ancestors.
804
804
805 # Filter out roots that aren't ancestors of heads
805 # Filter out roots that aren't ancestors of heads
806 roots = [root for root in roots if root in ancestors]
806 roots = [root for root in roots if root in ancestors]
807 # Recompute the lowest revision
807 # Recompute the lowest revision
808 if roots:
808 if roots:
809 lowestrev = min([self.rev(root) for root in roots])
809 lowestrev = min([self.rev(root) for root in roots])
810 else:
810 else:
811 # No more roots? Return empty list
811 # No more roots? Return empty list
812 return nonodes
812 return nonodes
813 else:
813 else:
814 # We are descending from nullid, and don't need to care about
814 # We are descending from nullid, and don't need to care about
815 # any other roots.
815 # any other roots.
816 lowestrev = nullrev
816 lowestrev = nullrev
817 roots = [nullid]
817 roots = [nullid]
818 # Transform our roots list into a set.
818 # Transform our roots list into a set.
819 descendants = set(roots)
819 descendants = set(roots)
820 # Also, keep the original roots so we can filter out roots that aren't
820 # Also, keep the original roots so we can filter out roots that aren't
821 # 'real' roots (i.e. are descended from other roots).
821 # 'real' roots (i.e. are descended from other roots).
822 roots = descendants.copy()
822 roots = descendants.copy()
823 # Our topologically sorted list of output nodes.
823 # Our topologically sorted list of output nodes.
824 orderedout = []
824 orderedout = []
825 # Don't start at nullid since we don't want nullid in our output list,
825 # Don't start at nullid since we don't want nullid in our output list,
826 # and if nullid shows up in descendants, empty parents will look like
826 # and if nullid shows up in descendants, empty parents will look like
827 # they're descendants.
827 # they're descendants.
828 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
828 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
829 n = self.node(r)
829 n = self.node(r)
830 isdescendant = False
830 isdescendant = False
831 if lowestrev == nullrev: # Everybody is a descendant of nullid
831 if lowestrev == nullrev: # Everybody is a descendant of nullid
832 isdescendant = True
832 isdescendant = True
833 elif n in descendants:
833 elif n in descendants:
834 # n is already a descendant
834 # n is already a descendant
835 isdescendant = True
835 isdescendant = True
836 # This check only needs to be done here because all the roots
836 # This check only needs to be done here because all the roots
837 # will start being marked is descendants before the loop.
837 # will start being marked is descendants before the loop.
838 if n in roots:
838 if n in roots:
839 # If n was a root, check if it's a 'real' root.
839 # If n was a root, check if it's a 'real' root.
840 p = tuple(self.parents(n))
840 p = tuple(self.parents(n))
841 # If any of its parents are descendants, it's not a root.
841 # If any of its parents are descendants, it's not a root.
842 if (p[0] in descendants) or (p[1] in descendants):
842 if (p[0] in descendants) or (p[1] in descendants):
843 roots.remove(n)
843 roots.remove(n)
844 else:
844 else:
845 p = tuple(self.parents(n))
845 p = tuple(self.parents(n))
846 # A node is a descendant if either of its parents are
846 # A node is a descendant if either of its parents are
847 # descendants. (We seeded the dependents list with the roots
847 # descendants. (We seeded the dependents list with the roots
848 # up there, remember?)
848 # up there, remember?)
849 if (p[0] in descendants) or (p[1] in descendants):
849 if (p[0] in descendants) or (p[1] in descendants):
850 descendants.add(n)
850 descendants.add(n)
851 isdescendant = True
851 isdescendant = True
852 if isdescendant and ((ancestors is None) or (n in ancestors)):
852 if isdescendant and ((ancestors is None) or (n in ancestors)):
853 # Only include nodes that are both descendants and ancestors.
853 # Only include nodes that are both descendants and ancestors.
854 orderedout.append(n)
854 orderedout.append(n)
855 if (ancestors is not None) and (n in heads):
855 if (ancestors is not None) and (n in heads):
856 # We're trying to figure out which heads are reachable
856 # We're trying to figure out which heads are reachable
857 # from roots.
857 # from roots.
858 # Mark this head as having been reached
858 # Mark this head as having been reached
859 heads[n] = True
859 heads[n] = True
860 elif ancestors is None:
860 elif ancestors is None:
861 # Otherwise, we're trying to discover the heads.
861 # Otherwise, we're trying to discover the heads.
862 # Assume this is a head because if it isn't, the next step
862 # Assume this is a head because if it isn't, the next step
863 # will eventually remove it.
863 # will eventually remove it.
864 heads[n] = True
864 heads[n] = True
865 # But, obviously its parents aren't.
865 # But, obviously its parents aren't.
866 for p in self.parents(n):
866 for p in self.parents(n):
867 heads.pop(p, None)
867 heads.pop(p, None)
868 heads = [head for head, flag in heads.iteritems() if flag]
868 heads = [head for head, flag in heads.iteritems() if flag]
869 roots = list(roots)
869 roots = list(roots)
870 assert orderedout
870 assert orderedout
871 assert roots
871 assert roots
872 assert heads
872 assert heads
873 return (orderedout, roots, heads)
873 return (orderedout, roots, heads)
874
874
875 def headrevs(self):
875 def headrevs(self):
876 try:
876 try:
877 return self.index.headrevs()
877 return self.index.headrevs()
878 except AttributeError:
878 except AttributeError:
879 return self._headrevs()
879 return self._headrevs()
880
880
881 def computephases(self, roots):
881 def computephases(self, roots):
882 return self.index.computephasesmapsets(roots)
882 return self.index.computephasesmapsets(roots)
883
883
884 def _headrevs(self):
884 def _headrevs(self):
885 count = len(self)
885 count = len(self)
886 if not count:
886 if not count:
887 return [nullrev]
887 return [nullrev]
888 # we won't iter over filtered rev so nobody is a head at start
888 # we won't iter over filtered rev so nobody is a head at start
889 ishead = [0] * (count + 1)
889 ishead = [0] * (count + 1)
890 index = self.index
890 index = self.index
891 for r in self:
891 for r in self:
892 ishead[r] = 1 # I may be an head
892 ishead[r] = 1 # I may be an head
893 e = index[r]
893 e = index[r]
894 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
894 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
895 return [r for r, val in enumerate(ishead) if val]
895 return [r for r, val in enumerate(ishead) if val]
896
896
897 def heads(self, start=None, stop=None):
897 def heads(self, start=None, stop=None):
898 """return the list of all nodes that have no children
898 """return the list of all nodes that have no children
899
899
900 if start is specified, only heads that are descendants of
900 if start is specified, only heads that are descendants of
901 start will be returned
901 start will be returned
902 if stop is specified, it will consider all the revs from stop
902 if stop is specified, it will consider all the revs from stop
903 as if they had no children
903 as if they had no children
904 """
904 """
905 if start is None and stop is None:
905 if start is None and stop is None:
906 if not len(self):
906 if not len(self):
907 return [nullid]
907 return [nullid]
908 return [self.node(r) for r in self.headrevs()]
908 return [self.node(r) for r in self.headrevs()]
909
909
910 if start is None:
910 if start is None:
911 start = nullid
911 start = nullid
912 if stop is None:
912 if stop is None:
913 stop = []
913 stop = []
914 stoprevs = set([self.rev(n) for n in stop])
914 stoprevs = set([self.rev(n) for n in stop])
915 startrev = self.rev(start)
915 startrev = self.rev(start)
916 reachable = {startrev}
916 reachable = {startrev}
917 heads = {startrev}
917 heads = {startrev}
918
918
919 parentrevs = self.parentrevs
919 parentrevs = self.parentrevs
920 for r in self.revs(start=startrev + 1):
920 for r in self.revs(start=startrev + 1):
921 for p in parentrevs(r):
921 for p in parentrevs(r):
922 if p in reachable:
922 if p in reachable:
923 if r not in stoprevs:
923 if r not in stoprevs:
924 reachable.add(r)
924 reachable.add(r)
925 heads.add(r)
925 heads.add(r)
926 if p in heads and p not in stoprevs:
926 if p in heads and p not in stoprevs:
927 heads.remove(p)
927 heads.remove(p)
928
928
929 return [self.node(r) for r in heads]
929 return [self.node(r) for r in heads]
930
930
931 def children(self, node):
931 def children(self, node):
932 """find the children of a given node"""
932 """find the children of a given node"""
933 c = []
933 c = []
934 p = self.rev(node)
934 p = self.rev(node)
935 for r in self.revs(start=p + 1):
935 for r in self.revs(start=p + 1):
936 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
936 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
937 if prevs:
937 if prevs:
938 for pr in prevs:
938 for pr in prevs:
939 if pr == p:
939 if pr == p:
940 c.append(self.node(r))
940 c.append(self.node(r))
941 elif p == nullrev:
941 elif p == nullrev:
942 c.append(self.node(r))
942 c.append(self.node(r))
943 return c
943 return c
944
944
945 def descendant(self, start, end):
945 def descendant(self, start, end):
946 if start == nullrev:
946 if start == nullrev:
947 return True
947 return True
948 for i in self.descendants([start]):
948 for i in self.descendants([start]):
949 if i == end:
949 if i == end:
950 return True
950 return True
951 elif i > end:
951 elif i > end:
952 break
952 break
953 return False
953 return False
954
954
955 def commonancestorsheads(self, a, b):
955 def commonancestorsheads(self, a, b):
956 """calculate all the heads of the common ancestors of nodes a and b"""
956 """calculate all the heads of the common ancestors of nodes a and b"""
957 a, b = self.rev(a), self.rev(b)
957 a, b = self.rev(a), self.rev(b)
958 try:
958 try:
959 ancs = self.index.commonancestorsheads(a, b)
959 ancs = self.index.commonancestorsheads(a, b)
960 except (AttributeError, OverflowError): # C implementation failed
960 except (AttributeError, OverflowError): # C implementation failed
961 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
961 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
962 return pycompat.maplist(self.node, ancs)
962 return pycompat.maplist(self.node, ancs)
963
963
964 def isancestor(self, a, b):
964 def isancestor(self, a, b):
965 """return True if node a is an ancestor of node b
965 """return True if node a is an ancestor of node b
966
966
967 The implementation of this is trivial but the use of
967 The implementation of this is trivial but the use of
968 commonancestorsheads is not."""
968 commonancestorsheads is not."""
969 return a in self.commonancestorsheads(a, b)
969 return a in self.commonancestorsheads(a, b)
970
970
971 def ancestor(self, a, b):
971 def ancestor(self, a, b):
972 """calculate the "best" common ancestor of nodes a and b"""
972 """calculate the "best" common ancestor of nodes a and b"""
973
973
974 a, b = self.rev(a), self.rev(b)
974 a, b = self.rev(a), self.rev(b)
975 try:
975 try:
976 ancs = self.index.ancestors(a, b)
976 ancs = self.index.ancestors(a, b)
977 except (AttributeError, OverflowError):
977 except (AttributeError, OverflowError):
978 ancs = ancestor.ancestors(self.parentrevs, a, b)
978 ancs = ancestor.ancestors(self.parentrevs, a, b)
979 if ancs:
979 if ancs:
980 # choose a consistent winner when there's a tie
980 # choose a consistent winner when there's a tie
981 return min(map(self.node, ancs))
981 return min(map(self.node, ancs))
982 return nullid
982 return nullid
983
983
984 def _match(self, id):
984 def _match(self, id):
985 if isinstance(id, int):
985 if isinstance(id, int):
986 # rev
986 # rev
987 return self.node(id)
987 return self.node(id)
988 if len(id) == 20:
988 if len(id) == 20:
989 # possibly a binary node
989 # possibly a binary node
990 # odds of a binary node being all hex in ASCII are 1 in 10**25
990 # odds of a binary node being all hex in ASCII are 1 in 10**25
991 try:
991 try:
992 node = id
992 node = id
993 self.rev(node) # quick search the index
993 self.rev(node) # quick search the index
994 return node
994 return node
995 except LookupError:
995 except LookupError:
996 pass # may be partial hex id
996 pass # may be partial hex id
997 try:
997 try:
998 # str(rev)
998 # str(rev)
999 rev = int(id)
999 rev = int(id)
1000 if str(rev) != id:
1000 if str(rev) != id:
1001 raise ValueError
1001 raise ValueError
1002 if rev < 0:
1002 if rev < 0:
1003 rev = len(self) + rev
1003 rev = len(self) + rev
1004 if rev < 0 or rev >= len(self):
1004 if rev < 0 or rev >= len(self):
1005 raise ValueError
1005 raise ValueError
1006 return self.node(rev)
1006 return self.node(rev)
1007 except (ValueError, OverflowError):
1007 except (ValueError, OverflowError):
1008 pass
1008 pass
1009 if len(id) == 40:
1009 if len(id) == 40:
1010 try:
1010 try:
1011 # a full hex nodeid?
1011 # a full hex nodeid?
1012 node = bin(id)
1012 node = bin(id)
1013 self.rev(node)
1013 self.rev(node)
1014 return node
1014 return node
1015 except (TypeError, LookupError):
1015 except (TypeError, LookupError):
1016 pass
1016 pass
1017
1017
1018 def _partialmatch(self, id):
1018 def _partialmatch(self, id):
1019 try:
1019 try:
1020 partial = self.index.partialmatch(id)
1020 partial = self.index.partialmatch(id)
1021 if partial and self.hasnode(partial):
1021 if partial and self.hasnode(partial):
1022 return partial
1022 return partial
1023 return None
1023 return None
1024 except RevlogError:
1024 except RevlogError:
1025 # parsers.c radix tree lookup gave multiple matches
1025 # parsers.c radix tree lookup gave multiple matches
1026 # fast path: for unfiltered changelog, radix tree is accurate
1026 # fast path: for unfiltered changelog, radix tree is accurate
1027 if not getattr(self, 'filteredrevs', None):
1027 if not getattr(self, 'filteredrevs', None):
1028 raise LookupError(id, self.indexfile,
1028 raise LookupError(id, self.indexfile,
1029 _('ambiguous identifier'))
1029 _('ambiguous identifier'))
1030 # fall through to slow path that filters hidden revisions
1030 # fall through to slow path that filters hidden revisions
1031 except (AttributeError, ValueError):
1031 except (AttributeError, ValueError):
1032 # we are pure python, or key was too short to search radix tree
1032 # we are pure python, or key was too short to search radix tree
1033 pass
1033 pass
1034
1034
1035 if id in self._pcache:
1035 if id in self._pcache:
1036 return self._pcache[id]
1036 return self._pcache[id]
1037
1037
1038 if len(id) < 40:
1038 if len(id) < 40:
1039 try:
1039 try:
1040 # hex(node)[:...]
1040 # hex(node)[:...]
1041 l = len(id) // 2 # grab an even number of digits
1041 l = len(id) // 2 # grab an even number of digits
1042 prefix = bin(id[:l * 2])
1042 prefix = bin(id[:l * 2])
1043 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1043 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1044 nl = [n for n in nl if hex(n).startswith(id) and
1044 nl = [n for n in nl if hex(n).startswith(id) and
1045 self.hasnode(n)]
1045 self.hasnode(n)]
1046 if len(nl) > 0:
1046 if len(nl) > 0:
1047 if len(nl) == 1:
1047 if len(nl) == 1:
1048 self._pcache[id] = nl[0]
1048 self._pcache[id] = nl[0]
1049 return nl[0]
1049 return nl[0]
1050 raise LookupError(id, self.indexfile,
1050 raise LookupError(id, self.indexfile,
1051 _('ambiguous identifier'))
1051 _('ambiguous identifier'))
1052 return None
1052 return None
1053 except TypeError:
1053 except TypeError:
1054 pass
1054 pass
1055
1055
1056 def lookup(self, id):
1056 def lookup(self, id):
1057 """locate a node based on:
1057 """locate a node based on:
1058 - revision number or str(revision number)
1058 - revision number or str(revision number)
1059 - nodeid or subset of hex nodeid
1059 - nodeid or subset of hex nodeid
1060 """
1060 """
1061 n = self._match(id)
1061 n = self._match(id)
1062 if n is not None:
1062 if n is not None:
1063 return n
1063 return n
1064 n = self._partialmatch(id)
1064 n = self._partialmatch(id)
1065 if n:
1065 if n:
1066 return n
1066 return n
1067
1067
1068 raise LookupError(id, self.indexfile, _('no match found'))
1068 raise LookupError(id, self.indexfile, _('no match found'))
1069
1069
1070 def cmp(self, node, text):
1070 def cmp(self, node, text):
1071 """compare text with a given file revision
1071 """compare text with a given file revision
1072
1072
1073 returns True if text is different than what is stored.
1073 returns True if text is different than what is stored.
1074 """
1074 """
1075 p1, p2 = self.parents(node)
1075 p1, p2 = self.parents(node)
1076 return hash(text, p1, p2) != node
1076 return hash(text, p1, p2) != node
1077
1077
1078 def _cachesegment(self, offset, data):
1078 def _cachesegment(self, offset, data):
1079 """Add a segment to the revlog cache.
1079 """Add a segment to the revlog cache.
1080
1080
1081 Accepts an absolute offset and the data that is at that location.
1081 Accepts an absolute offset and the data that is at that location.
1082 """
1082 """
1083 o, d = self._chunkcache
1083 o, d = self._chunkcache
1084 # try to add to existing cache
1084 # try to add to existing cache
1085 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1085 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1086 self._chunkcache = o, d + data
1086 self._chunkcache = o, d + data
1087 else:
1087 else:
1088 self._chunkcache = offset, data
1088 self._chunkcache = offset, data
1089
1089
1090 def _readsegment(self, offset, length, df=None):
1090 def _readsegment(self, offset, length, df=None):
1091 """Load a segment of raw data from the revlog.
1091 """Load a segment of raw data from the revlog.
1092
1092
1093 Accepts an absolute offset, length to read, and an optional existing
1093 Accepts an absolute offset, length to read, and an optional existing
1094 file handle to read from.
1094 file handle to read from.
1095
1095
1096 If an existing file handle is passed, it will be seeked and the
1096 If an existing file handle is passed, it will be seeked and the
1097 original seek position will NOT be restored.
1097 original seek position will NOT be restored.
1098
1098
1099 Returns a str or buffer of raw byte data.
1099 Returns a str or buffer of raw byte data.
1100 """
1100 """
1101 if df is not None:
1101 if df is not None:
1102 closehandle = False
1102 closehandle = False
1103 else:
1103 else:
1104 if self._inline:
1104 if self._inline:
1105 df = self.opener(self.indexfile)
1105 df = self.opener(self.indexfile)
1106 else:
1106 else:
1107 df = self.opener(self.datafile)
1107 df = self.opener(self.datafile)
1108 closehandle = True
1108 closehandle = True
1109
1109
1110 # Cache data both forward and backward around the requested
1110 # Cache data both forward and backward around the requested
1111 # data, in a fixed size window. This helps speed up operations
1111 # data, in a fixed size window. This helps speed up operations
1112 # involving reading the revlog backwards.
1112 # involving reading the revlog backwards.
1113 cachesize = self._chunkcachesize
1113 cachesize = self._chunkcachesize
1114 realoffset = offset & ~(cachesize - 1)
1114 realoffset = offset & ~(cachesize - 1)
1115 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1115 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1116 - realoffset)
1116 - realoffset)
1117 df.seek(realoffset)
1117 df.seek(realoffset)
1118 d = df.read(reallength)
1118 d = df.read(reallength)
1119 if closehandle:
1119 if closehandle:
1120 df.close()
1120 df.close()
1121 self._cachesegment(realoffset, d)
1121 self._cachesegment(realoffset, d)
1122 if offset != realoffset or reallength != length:
1122 if offset != realoffset or reallength != length:
1123 return util.buffer(d, offset - realoffset, length)
1123 return util.buffer(d, offset - realoffset, length)
1124 return d
1124 return d
1125
1125
1126 def _getsegment(self, offset, length, df=None):
1126 def _getsegment(self, offset, length, df=None):
1127 """Obtain a segment of raw data from the revlog.
1127 """Obtain a segment of raw data from the revlog.
1128
1128
1129 Accepts an absolute offset, length of bytes to obtain, and an
1129 Accepts an absolute offset, length of bytes to obtain, and an
1130 optional file handle to the already-opened revlog. If the file
1130 optional file handle to the already-opened revlog. If the file
1131 handle is used, it's original seek position will not be preserved.
1131 handle is used, it's original seek position will not be preserved.
1132
1132
1133 Requests for data may be returned from a cache.
1133 Requests for data may be returned from a cache.
1134
1134
1135 Returns a str or a buffer instance of raw byte data.
1135 Returns a str or a buffer instance of raw byte data.
1136 """
1136 """
1137 o, d = self._chunkcache
1137 o, d = self._chunkcache
1138 l = len(d)
1138 l = len(d)
1139
1139
1140 # is it in the cache?
1140 # is it in the cache?
1141 cachestart = offset - o
1141 cachestart = offset - o
1142 cacheend = cachestart + length
1142 cacheend = cachestart + length
1143 if cachestart >= 0 and cacheend <= l:
1143 if cachestart >= 0 and cacheend <= l:
1144 if cachestart == 0 and cacheend == l:
1144 if cachestart == 0 and cacheend == l:
1145 return d # avoid a copy
1145 return d # avoid a copy
1146 return util.buffer(d, cachestart, cacheend - cachestart)
1146 return util.buffer(d, cachestart, cacheend - cachestart)
1147
1147
1148 return self._readsegment(offset, length, df=df)
1148 return self._readsegment(offset, length, df=df)
1149
1149
1150 def _getsegmentforrevs(self, startrev, endrev, df=None):
1150 def _getsegmentforrevs(self, startrev, endrev, df=None):
1151 """Obtain a segment of raw data corresponding to a range of revisions.
1151 """Obtain a segment of raw data corresponding to a range of revisions.
1152
1152
1153 Accepts the start and end revisions and an optional already-open
1153 Accepts the start and end revisions and an optional already-open
1154 file handle to be used for reading. If the file handle is read, its
1154 file handle to be used for reading. If the file handle is read, its
1155 seek position will not be preserved.
1155 seek position will not be preserved.
1156
1156
1157 Requests for data may be satisfied by a cache.
1157 Requests for data may be satisfied by a cache.
1158
1158
1159 Returns a 2-tuple of (offset, data) for the requested range of
1159 Returns a 2-tuple of (offset, data) for the requested range of
1160 revisions. Offset is the integer offset from the beginning of the
1160 revisions. Offset is the integer offset from the beginning of the
1161 revlog and data is a str or buffer of the raw byte data.
1161 revlog and data is a str or buffer of the raw byte data.
1162
1162
1163 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1163 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1164 to determine where each revision's data begins and ends.
1164 to determine where each revision's data begins and ends.
1165 """
1165 """
1166 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1166 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1167 # (functions are expensive).
1167 # (functions are expensive).
1168 index = self.index
1168 index = self.index
1169 istart = index[startrev]
1169 istart = index[startrev]
1170 start = int(istart[0] >> 16)
1170 start = int(istart[0] >> 16)
1171 if startrev == endrev:
1171 if startrev == endrev:
1172 end = start + istart[1]
1172 end = start + istart[1]
1173 else:
1173 else:
1174 iend = index[endrev]
1174 iend = index[endrev]
1175 end = int(iend[0] >> 16) + iend[1]
1175 end = int(iend[0] >> 16) + iend[1]
1176
1176
1177 if self._inline:
1177 if self._inline:
1178 start += (startrev + 1) * self._io.size
1178 start += (startrev + 1) * self._io.size
1179 end += (endrev + 1) * self._io.size
1179 end += (endrev + 1) * self._io.size
1180 length = end - start
1180 length = end - start
1181
1181
1182 return start, self._getsegment(start, length, df=df)
1182 return start, self._getsegment(start, length, df=df)
1183
1183
1184 def _chunk(self, rev, df=None):
1184 def _chunk(self, rev, df=None):
1185 """Obtain a single decompressed chunk for a revision.
1185 """Obtain a single decompressed chunk for a revision.
1186
1186
1187 Accepts an integer revision and an optional already-open file handle
1187 Accepts an integer revision and an optional already-open file handle
1188 to be used for reading. If used, the seek position of the file will not
1188 to be used for reading. If used, the seek position of the file will not
1189 be preserved.
1189 be preserved.
1190
1190
1191 Returns a str holding uncompressed data for the requested revision.
1191 Returns a str holding uncompressed data for the requested revision.
1192 """
1192 """
1193 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1193 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1194
1194
1195 def _chunks(self, revs, df=None):
1195 def _chunks(self, revs, df=None):
1196 """Obtain decompressed chunks for the specified revisions.
1196 """Obtain decompressed chunks for the specified revisions.
1197
1197
1198 Accepts an iterable of numeric revisions that are assumed to be in
1198 Accepts an iterable of numeric revisions that are assumed to be in
1199 ascending order. Also accepts an optional already-open file handle
1199 ascending order. Also accepts an optional already-open file handle
1200 to be used for reading. If used, the seek position of the file will
1200 to be used for reading. If used, the seek position of the file will
1201 not be preserved.
1201 not be preserved.
1202
1202
1203 This function is similar to calling ``self._chunk()`` multiple times,
1203 This function is similar to calling ``self._chunk()`` multiple times,
1204 but is faster.
1204 but is faster.
1205
1205
1206 Returns a list with decompressed data for each requested revision.
1206 Returns a list with decompressed data for each requested revision.
1207 """
1207 """
1208 if not revs:
1208 if not revs:
1209 return []
1209 return []
1210 start = self.start
1210 start = self.start
1211 length = self.length
1211 length = self.length
1212 inline = self._inline
1212 inline = self._inline
1213 iosize = self._io.size
1213 iosize = self._io.size
1214 buffer = util.buffer
1214 buffer = util.buffer
1215
1215
1216 l = []
1216 l = []
1217 ladd = l.append
1217 ladd = l.append
1218
1218
1219 try:
1219 try:
1220 offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)
1220 offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)
1221 except OverflowError:
1221 except OverflowError:
1222 # issue4215 - we can't cache a run of chunks greater than
1222 # issue4215 - we can't cache a run of chunks greater than
1223 # 2G on Windows
1223 # 2G on Windows
1224 return [self._chunk(rev, df=df) for rev in revs]
1224 return [self._chunk(rev, df=df) for rev in revs]
1225
1225
1226 decomp = self.decompress
1226 decomp = self.decompress
1227 for rev in revs:
1227 for rev in revs:
1228 chunkstart = start(rev)
1228 chunkstart = start(rev)
1229 if inline:
1229 if inline:
1230 chunkstart += (rev + 1) * iosize
1230 chunkstart += (rev + 1) * iosize
1231 chunklength = length(rev)
1231 chunklength = length(rev)
1232 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1232 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1233
1233
1234 return l
1234 return l
1235
1235
1236 def _chunkclear(self):
1236 def _chunkclear(self):
1237 """Clear the raw chunk cache."""
1237 """Clear the raw chunk cache."""
1238 self._chunkcache = (0, '')
1238 self._chunkcache = (0, '')
1239
1239
1240 def deltaparent(self, rev):
1240 def deltaparent(self, rev):
1241 """return deltaparent of the given revision"""
1241 """return deltaparent of the given revision"""
1242 base = self.index[rev][3]
1242 base = self.index[rev][3]
1243 if base == rev:
1243 if base == rev:
1244 return nullrev
1244 return nullrev
1245 elif self._generaldelta:
1245 elif self._generaldelta:
1246 return base
1246 return base
1247 else:
1247 else:
1248 return rev - 1
1248 return rev - 1
1249
1249
1250 def revdiff(self, rev1, rev2):
1250 def revdiff(self, rev1, rev2):
1251 """return or calculate a delta between two revisions
1251 """return or calculate a delta between two revisions
1252
1252
1253 The delta calculated is in binary form and is intended to be written to
1253 The delta calculated is in binary form and is intended to be written to
1254 revlog data directly. So this function needs raw revision data.
1254 revlog data directly. So this function needs raw revision data.
1255 """
1255 """
1256 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1256 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1257 return bytes(self._chunk(rev2))
1257 return bytes(self._chunk(rev2))
1258
1258
1259 return mdiff.textdiff(self.revision(rev1, raw=True),
1259 return mdiff.textdiff(self.revision(rev1, raw=True),
1260 self.revision(rev2, raw=True))
1260 self.revision(rev2, raw=True))
1261
1261
1262 def revision(self, nodeorrev, _df=None, raw=False):
1262 def revision(self, nodeorrev, _df=None, raw=False):
1263 """return an uncompressed revision of a given node or revision
1263 """return an uncompressed revision of a given node or revision
1264 number.
1264 number.
1265
1265
1266 _df - an existing file handle to read from. (internal-only)
1266 _df - an existing file handle to read from. (internal-only)
1267 raw - an optional argument specifying if the revision data is to be
1267 raw - an optional argument specifying if the revision data is to be
1268 treated as raw data when applying flag transforms. 'raw' should be set
1268 treated as raw data when applying flag transforms. 'raw' should be set
1269 to True when generating changegroups or in debug commands.
1269 to True when generating changegroups or in debug commands.
1270 """
1270 """
1271 if isinstance(nodeorrev, int):
1271 if isinstance(nodeorrev, int):
1272 rev = nodeorrev
1272 rev = nodeorrev
1273 node = self.node(rev)
1273 node = self.node(rev)
1274 else:
1274 else:
1275 node = nodeorrev
1275 node = nodeorrev
1276 rev = None
1276 rev = None
1277
1277
1278 cachedrev = None
1278 cachedrev = None
1279 flags = None
1279 flags = None
1280 rawtext = None
1280 rawtext = None
1281 if node == nullid:
1281 if node == nullid:
1282 return ""
1282 return ""
1283 if self._cache:
1283 if self._cache:
1284 if self._cache[0] == node:
1284 if self._cache[0] == node:
1285 # _cache only stores rawtext
1285 # _cache only stores rawtext
1286 if raw:
1286 if raw:
1287 return self._cache[2]
1287 return self._cache[2]
1288 # duplicated, but good for perf
1288 # duplicated, but good for perf
1289 if rev is None:
1289 if rev is None:
1290 rev = self.rev(node)
1290 rev = self.rev(node)
1291 if flags is None:
1291 if flags is None:
1292 flags = self.flags(rev)
1292 flags = self.flags(rev)
1293 # no extra flags set, no flag processor runs, text = rawtext
1293 # no extra flags set, no flag processor runs, text = rawtext
1294 if flags == REVIDX_DEFAULT_FLAGS:
1294 if flags == REVIDX_DEFAULT_FLAGS:
1295 return self._cache[2]
1295 return self._cache[2]
1296 # rawtext is reusable. need to run flag processor
1296 # rawtext is reusable. need to run flag processor
1297 rawtext = self._cache[2]
1297 rawtext = self._cache[2]
1298
1298
1299 cachedrev = self._cache[1]
1299 cachedrev = self._cache[1]
1300
1300
1301 # look up what we need to read
1301 # look up what we need to read
1302 if rawtext is None:
1302 if rawtext is None:
1303 if rev is None:
1303 if rev is None:
1304 rev = self.rev(node)
1304 rev = self.rev(node)
1305
1305
1306 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1306 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1307 if stopped:
1307 if stopped:
1308 rawtext = self._cache[2]
1308 rawtext = self._cache[2]
1309
1309
1310 # drop cache to save memory
1310 # drop cache to save memory
1311 self._cache = None
1311 self._cache = None
1312
1312
1313 bins = self._chunks(chain, df=_df)
1313 bins = self._chunks(chain, df=_df)
1314 if rawtext is None:
1314 if rawtext is None:
1315 rawtext = bytes(bins[0])
1315 rawtext = bytes(bins[0])
1316 bins = bins[1:]
1316 bins = bins[1:]
1317
1317
1318 rawtext = mdiff.patches(rawtext, bins)
1318 rawtext = mdiff.patches(rawtext, bins)
1319 self._cache = (node, rev, rawtext)
1319 self._cache = (node, rev, rawtext)
1320
1320
1321 if flags is None:
1321 if flags is None:
1322 if rev is None:
1322 if rev is None:
1323 rev = self.rev(node)
1323 rev = self.rev(node)
1324 flags = self.flags(rev)
1324 flags = self.flags(rev)
1325
1325
1326 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1326 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1327 if validatehash:
1327 if validatehash:
1328 self.checkhash(text, node, rev=rev)
1328 self.checkhash(text, node, rev=rev)
1329
1329
1330 return text
1330 return text
1331
1331
1332 def hash(self, text, p1, p2):
1332 def hash(self, text, p1, p2):
1333 """Compute a node hash.
1333 """Compute a node hash.
1334
1334
1335 Available as a function so that subclasses can replace the hash
1335 Available as a function so that subclasses can replace the hash
1336 as needed.
1336 as needed.
1337 """
1337 """
1338 return hash(text, p1, p2)
1338 return hash(text, p1, p2)
1339
1339
1340 def _processflags(self, text, flags, operation, raw=False):
1340 def _processflags(self, text, flags, operation, raw=False):
1341 """Inspect revision data flags and applies transforms defined by
1341 """Inspect revision data flags and applies transforms defined by
1342 registered flag processors.
1342 registered flag processors.
1343
1343
1344 ``text`` - the revision data to process
1344 ``text`` - the revision data to process
1345 ``flags`` - the revision flags
1345 ``flags`` - the revision flags
1346 ``operation`` - the operation being performed (read or write)
1346 ``operation`` - the operation being performed (read or write)
1347 ``raw`` - an optional argument describing if the raw transform should be
1347 ``raw`` - an optional argument describing if the raw transform should be
1348 applied.
1348 applied.
1349
1349
1350 This method processes the flags in the order (or reverse order if
1350 This method processes the flags in the order (or reverse order if
1351 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1351 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1352 flag processors registered for present flags. The order of flags defined
1352 flag processors registered for present flags. The order of flags defined
1353 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1353 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1354
1354
1355 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1355 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1356 processed text and ``validatehash`` is a bool indicating whether the
1356 processed text and ``validatehash`` is a bool indicating whether the
1357 returned text should be checked for hash integrity.
1357 returned text should be checked for hash integrity.
1358
1358
1359 Note: If the ``raw`` argument is set, it has precedence over the
1359 Note: If the ``raw`` argument is set, it has precedence over the
1360 operation and will only update the value of ``validatehash``.
1360 operation and will only update the value of ``validatehash``.
1361 """
1361 """
1362 # fast path: no flag processors will run
1362 # fast path: no flag processors will run
1363 if flags == 0:
1363 if flags == 0:
1364 return text, True
1364 return text, True
1365 if not operation in ('read', 'write'):
1365 if not operation in ('read', 'write'):
1366 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1366 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1367 # Check all flags are known.
1367 # Check all flags are known.
1368 if flags & ~REVIDX_KNOWN_FLAGS:
1368 if flags & ~REVIDX_KNOWN_FLAGS:
1369 raise RevlogError(_("incompatible revision flag '%#x'") %
1369 raise RevlogError(_("incompatible revision flag '%#x'") %
1370 (flags & ~REVIDX_KNOWN_FLAGS))
1370 (flags & ~REVIDX_KNOWN_FLAGS))
1371 validatehash = True
1371 validatehash = True
1372 # Depending on the operation (read or write), the order might be
1372 # Depending on the operation (read or write), the order might be
1373 # reversed due to non-commutative transforms.
1373 # reversed due to non-commutative transforms.
1374 orderedflags = REVIDX_FLAGS_ORDER
1374 orderedflags = REVIDX_FLAGS_ORDER
1375 if operation == 'write':
1375 if operation == 'write':
1376 orderedflags = reversed(orderedflags)
1376 orderedflags = reversed(orderedflags)
1377
1377
1378 for flag in orderedflags:
1378 for flag in orderedflags:
1379 # If a flagprocessor has been registered for a known flag, apply the
1379 # If a flagprocessor has been registered for a known flag, apply the
1380 # related operation transform and update result tuple.
1380 # related operation transform and update result tuple.
1381 if flag & flags:
1381 if flag & flags:
1382 vhash = True
1382 vhash = True
1383
1383
1384 if flag not in _flagprocessors:
1384 if flag not in _flagprocessors:
1385 message = _("missing processor for flag '%#x'") % (flag)
1385 message = _("missing processor for flag '%#x'") % (flag)
1386 raise RevlogError(message)
1386 raise RevlogError(message)
1387
1387
1388 processor = _flagprocessors[flag]
1388 processor = _flagprocessors[flag]
1389 if processor is not None:
1389 if processor is not None:
1390 readtransform, writetransform, rawtransform = processor
1390 readtransform, writetransform, rawtransform = processor
1391
1391
1392 if raw:
1392 if raw:
1393 vhash = rawtransform(self, text)
1393 vhash = rawtransform(self, text)
1394 elif operation == 'read':
1394 elif operation == 'read':
1395 text, vhash = readtransform(self, text)
1395 text, vhash = readtransform(self, text)
1396 else: # write operation
1396 else: # write operation
1397 text, vhash = writetransform(self, text)
1397 text, vhash = writetransform(self, text)
1398 validatehash = validatehash and vhash
1398 validatehash = validatehash and vhash
1399
1399
1400 return text, validatehash
1400 return text, validatehash
1401
1401
1402 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1402 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1403 """Check node hash integrity.
1403 """Check node hash integrity.
1404
1404
1405 Available as a function so that subclasses can extend hash mismatch
1405 Available as a function so that subclasses can extend hash mismatch
1406 behaviors as needed.
1406 behaviors as needed.
1407 """
1407 """
1408 if p1 is None and p2 is None:
1408 if p1 is None and p2 is None:
1409 p1, p2 = self.parents(node)
1409 p1, p2 = self.parents(node)
1410 if node != self.hash(text, p1, p2):
1410 if node != self.hash(text, p1, p2):
1411 revornode = rev
1411 revornode = rev
1412 if revornode is None:
1412 if revornode is None:
1413 revornode = templatefilters.short(hex(node))
1413 revornode = templatefilters.short(hex(node))
1414 raise RevlogError(_("integrity check failed on %s:%s")
1414 raise RevlogError(_("integrity check failed on %s:%s")
1415 % (self.indexfile, revornode))
1415 % (self.indexfile, revornode))
1416
1416
1417 def checkinlinesize(self, tr, fp=None):
1417 def checkinlinesize(self, tr, fp=None):
1418 """Check if the revlog is too big for inline and convert if so.
1418 """Check if the revlog is too big for inline and convert if so.
1419
1419
1420 This should be called after revisions are added to the revlog. If the
1420 This should be called after revisions are added to the revlog. If the
1421 revlog has grown too large to be an inline revlog, it will convert it
1421 revlog has grown too large to be an inline revlog, it will convert it
1422 to use multiple index and data files.
1422 to use multiple index and data files.
1423 """
1423 """
1424 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1424 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1425 return
1425 return
1426
1426
1427 trinfo = tr.find(self.indexfile)
1427 trinfo = tr.find(self.indexfile)
1428 if trinfo is None:
1428 if trinfo is None:
1429 raise RevlogError(_("%s not found in the transaction")
1429 raise RevlogError(_("%s not found in the transaction")
1430 % self.indexfile)
1430 % self.indexfile)
1431
1431
1432 trindex = trinfo[2]
1432 trindex = trinfo[2]
1433 if trindex is not None:
1433 if trindex is not None:
1434 dataoff = self.start(trindex)
1434 dataoff = self.start(trindex)
1435 else:
1435 else:
1436 # revlog was stripped at start of transaction, use all leftover data
1436 # revlog was stripped at start of transaction, use all leftover data
1437 trindex = len(self) - 1
1437 trindex = len(self) - 1
1438 dataoff = self.end(-2)
1438 dataoff = self.end(-2)
1439
1439
1440 tr.add(self.datafile, dataoff)
1440 tr.add(self.datafile, dataoff)
1441
1441
1442 if fp:
1442 if fp:
1443 fp.flush()
1443 fp.flush()
1444 fp.close()
1444 fp.close()
1445
1445
1446 df = self.opener(self.datafile, 'w')
1446 df = self.opener(self.datafile, 'w')
1447 try:
1447 try:
1448 for r in self:
1448 for r in self:
1449 df.write(self._getsegmentforrevs(r, r)[1])
1449 df.write(self._getsegmentforrevs(r, r)[1])
1450 finally:
1450 finally:
1451 df.close()
1451 df.close()
1452
1452
1453 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1453 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1454 checkambig=self._checkambig)
1454 checkambig=self._checkambig)
1455 self.version &= ~(REVLOGNGINLINEDATA)
1455 self.version &= ~(REVLOGNGINLINEDATA)
1456 self._inline = False
1456 self._inline = False
1457 for i in self:
1457 for i in self:
1458 e = self._io.packentry(self.index[i], self.node, self.version, i)
1458 e = self._io.packentry(self.index[i], self.node, self.version, i)
1459 fp.write(e)
1459 fp.write(e)
1460
1460
1461 # if we don't call close, the temp file will never replace the
1461 # if we don't call close, the temp file will never replace the
1462 # real index
1462 # real index
1463 fp.close()
1463 fp.close()
1464
1464
1465 tr.replace(self.indexfile, trindex * self._io.size)
1465 tr.replace(self.indexfile, trindex * self._io.size)
1466 self._chunkclear()
1466 self._chunkclear()
1467
1467
1468 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1468 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1469 node=None, flags=REVIDX_DEFAULT_FLAGS):
1469 node=None, flags=REVIDX_DEFAULT_FLAGS):
1470 """add a revision to the log
1470 """add a revision to the log
1471
1471
1472 text - the revision data to add
1472 text - the revision data to add
1473 transaction - the transaction object used for rollback
1473 transaction - the transaction object used for rollback
1474 link - the linkrev data to add
1474 link - the linkrev data to add
1475 p1, p2 - the parent nodeids of the revision
1475 p1, p2 - the parent nodeids of the revision
1476 cachedelta - an optional precomputed delta
1476 cachedelta - an optional precomputed delta
1477 node - nodeid of revision; typically node is not specified, and it is
1477 node - nodeid of revision; typically node is not specified, and it is
1478 computed by default as hash(text, p1, p2), however subclasses might
1478 computed by default as hash(text, p1, p2), however subclasses might
1479 use different hashing method (and override checkhash() in such case)
1479 use different hashing method (and override checkhash() in such case)
1480 flags - the known flags to set on the revision
1480 flags - the known flags to set on the revision
1481 """
1481 """
1482 if link == nullrev:
1482 if link == nullrev:
1483 raise RevlogError(_("attempted to add linkrev -1 to %s")
1483 raise RevlogError(_("attempted to add linkrev -1 to %s")
1484 % self.indexfile)
1484 % self.indexfile)
1485
1485
1486 if flags:
1486 if flags:
1487 node = node or self.hash(text, p1, p2)
1487 node = node or self.hash(text, p1, p2)
1488
1488
1489 rawtext, validatehash = self._processflags(text, flags, 'write')
1489 rawtext, validatehash = self._processflags(text, flags, 'write')
1490
1490
1491 # If the flag processor modifies the revision data, ignore any provided
1491 # If the flag processor modifies the revision data, ignore any provided
1492 # cachedelta.
1492 # cachedelta.
1493 if rawtext != text:
1493 if rawtext != text:
1494 cachedelta = None
1494 cachedelta = None
1495
1495
1496 if len(rawtext) > _maxentrysize:
1496 if len(rawtext) > _maxentrysize:
1497 raise RevlogError(
1497 raise RevlogError(
1498 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1498 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1499 % (self.indexfile, len(rawtext)))
1499 % (self.indexfile, len(rawtext)))
1500
1500
1501 node = node or self.hash(rawtext, p1, p2)
1501 node = node or self.hash(rawtext, p1, p2)
1502 if node in self.nodemap:
1502 if node in self.nodemap:
1503 return node
1503 return node
1504
1504
1505 if validatehash:
1505 if validatehash:
1506 self.checkhash(rawtext, node, p1=p1, p2=p2)
1506 self.checkhash(rawtext, node, p1=p1, p2=p2)
1507
1507
1508 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1508 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1509 flags, cachedelta=cachedelta)
1509 flags, cachedelta=cachedelta)
1510
1510
1511 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1511 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1512 cachedelta=None):
1512 cachedelta=None):
1513 """add a raw revision with known flags, node and parents
1513 """add a raw revision with known flags, node and parents
1514 useful when reusing a revision not stored in this revlog (ex: received
1514 useful when reusing a revision not stored in this revlog (ex: received
1515 over wire, or read from an external bundle).
1515 over wire, or read from an external bundle).
1516 """
1516 """
1517 dfh = None
1517 dfh = None
1518 if not self._inline:
1518 if not self._inline:
1519 dfh = self.opener(self.datafile, "a+")
1519 dfh = self.opener(self.datafile, "a+")
1520 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1520 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1521 try:
1521 try:
1522 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1522 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1523 flags, cachedelta, ifh, dfh)
1523 flags, cachedelta, ifh, dfh)
1524 finally:
1524 finally:
1525 if dfh:
1525 if dfh:
1526 dfh.close()
1526 dfh.close()
1527 ifh.close()
1527 ifh.close()
1528
1528
1529 def compress(self, data):
1529 def compress(self, data):
1530 """Generate a possibly-compressed representation of data."""
1530 """Generate a possibly-compressed representation of data."""
1531 if not data:
1531 if not data:
1532 return '', data
1532 return '', data
1533
1533
1534 compressed = self._compressor.compress(data)
1534 compressed = self._compressor.compress(data)
1535
1535
1536 if compressed:
1536 if compressed:
1537 # The revlog compressor added the header in the returned data.
1537 # The revlog compressor added the header in the returned data.
1538 return '', compressed
1538 return '', compressed
1539
1539
1540 if data[0:1] == '\0':
1540 if data[0:1] == '\0':
1541 return '', data
1541 return '', data
1542 return 'u', data
1542 return 'u', data
1543
1543
1544 def decompress(self, data):
1544 def decompress(self, data):
1545 """Decompress a revlog chunk.
1545 """Decompress a revlog chunk.
1546
1546
1547 The chunk is expected to begin with a header identifying the
1547 The chunk is expected to begin with a header identifying the
1548 format type so it can be routed to an appropriate decompressor.
1548 format type so it can be routed to an appropriate decompressor.
1549 """
1549 """
1550 if not data:
1550 if not data:
1551 return data
1551 return data
1552
1552
1553 # Revlogs are read much more frequently than they are written and many
1553 # Revlogs are read much more frequently than they are written and many
1554 # chunks only take microseconds to decompress, so performance is
1554 # chunks only take microseconds to decompress, so performance is
1555 # important here.
1555 # important here.
1556 #
1556 #
1557 # We can make a few assumptions about revlogs:
1557 # We can make a few assumptions about revlogs:
1558 #
1558 #
1559 # 1) the majority of chunks will be compressed (as opposed to inline
1559 # 1) the majority of chunks will be compressed (as opposed to inline
1560 # raw data).
1560 # raw data).
1561 # 2) decompressing *any* data will likely by at least 10x slower than
1561 # 2) decompressing *any* data will likely by at least 10x slower than
1562 # returning raw inline data.
1562 # returning raw inline data.
1563 # 3) we want to prioritize common and officially supported compression
1563 # 3) we want to prioritize common and officially supported compression
1564 # engines
1564 # engines
1565 #
1565 #
1566 # It follows that we want to optimize for "decompress compressed data
1566 # It follows that we want to optimize for "decompress compressed data
1567 # when encoded with common and officially supported compression engines"
1567 # when encoded with common and officially supported compression engines"
1568 # case over "raw data" and "data encoded by less common or non-official
1568 # case over "raw data" and "data encoded by less common or non-official
1569 # compression engines." That is why we have the inline lookup first
1569 # compression engines." That is why we have the inline lookup first
1570 # followed by the compengines lookup.
1570 # followed by the compengines lookup.
1571 #
1571 #
1572 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1572 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1573 # compressed chunks. And this matters for changelog and manifest reads.
1573 # compressed chunks. And this matters for changelog and manifest reads.
1574 t = data[0:1]
1574 t = data[0:1]
1575
1575
1576 if t == 'x':
1576 if t == 'x':
1577 try:
1577 try:
1578 return _zlibdecompress(data)
1578 return _zlibdecompress(data)
1579 except zlib.error as e:
1579 except zlib.error as e:
1580 raise RevlogError(_('revlog decompress error: %s') % str(e))
1580 raise RevlogError(_('revlog decompress error: %s') % str(e))
1581 # '\0' is more common than 'u' so it goes first.
1581 # '\0' is more common than 'u' so it goes first.
1582 elif t == '\0':
1582 elif t == '\0':
1583 return data
1583 return data
1584 elif t == 'u':
1584 elif t == 'u':
1585 return util.buffer(data, 1)
1585 return util.buffer(data, 1)
1586
1586
1587 try:
1587 try:
1588 compressor = self._decompressors[t]
1588 compressor = self._decompressors[t]
1589 except KeyError:
1589 except KeyError:
1590 try:
1590 try:
1591 engine = util.compengines.forrevlogheader(t)
1591 engine = util.compengines.forrevlogheader(t)
1592 compressor = engine.revlogcompressor()
1592 compressor = engine.revlogcompressor()
1593 self._decompressors[t] = compressor
1593 self._decompressors[t] = compressor
1594 except KeyError:
1594 except KeyError:
1595 raise RevlogError(_('unknown compression type %r') % t)
1595 raise RevlogError(_('unknown compression type %r') % t)
1596
1596
1597 return compressor.decompress(data)
1597 return compressor.decompress(data)
1598
1598
1599 def _isgooddelta(self, d, textlen):
1599 def _isgooddelta(self, d, textlen):
1600 """Returns True if the given delta is good. Good means that it is within
1600 """Returns True if the given delta is good. Good means that it is within
1601 the disk span, disk size, and chain length bounds that we know to be
1601 the disk span, disk size, and chain length bounds that we know to be
1602 performant."""
1602 performant."""
1603 if d is None:
1603 if d is None:
1604 return False
1604 return False
1605
1605
1606 # - 'dist' is the distance from the base revision -- bounding it limits
1606 # - 'dist' is the distance from the base revision -- bounding it limits
1607 # the amount of I/O we need to do.
1607 # the amount of I/O we need to do.
1608 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1608 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1609 # to apply -- bounding it limits the amount of CPU we consume.
1609 # to apply -- bounding it limits the amount of CPU we consume.
1610 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1610 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1611 if (dist > textlen * 4 or l > textlen or
1611 if (dist > textlen * 4 or l > textlen or
1612 compresseddeltalen > textlen * 2 or
1612 compresseddeltalen > textlen * 2 or
1613 (self._maxchainlen and chainlen > self._maxchainlen)):
1613 (self._maxchainlen and chainlen > self._maxchainlen)):
1614 return False
1614 return False
1615
1615
1616 return True
1616 return True
1617
1617
1618 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1618 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1619 cachedelta, ifh, dfh, alwayscache=False):
1619 cachedelta, ifh, dfh, alwayscache=False):
1620 """internal function to add revisions to the log
1620 """internal function to add revisions to the log
1621
1621
1622 see addrevision for argument descriptions.
1622 see addrevision for argument descriptions.
1623
1623
1624 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1624 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1625
1625
1626 invariants:
1626 invariants:
1627 - rawtext is optional (can be None); if not set, cachedelta must be set.
1627 - rawtext is optional (can be None); if not set, cachedelta must be set.
1628 if both are set, they must correspond to each other.
1628 if both are set, they must correspond to each other.
1629 """
1629 """
1630 btext = [rawtext]
1630 btext = [rawtext]
1631 def buildtext():
1631 def buildtext():
1632 if btext[0] is not None:
1632 if btext[0] is not None:
1633 return btext[0]
1633 return btext[0]
1634 baserev = cachedelta[0]
1634 baserev = cachedelta[0]
1635 delta = cachedelta[1]
1635 delta = cachedelta[1]
1636 # special case deltas which replace entire base; no need to decode
1636 # special case deltas which replace entire base; no need to decode
1637 # base revision. this neatly avoids censored bases, which throw when
1637 # base revision. this neatly avoids censored bases, which throw when
1638 # they're decoded.
1638 # they're decoded.
1639 hlen = struct.calcsize(">lll")
1639 hlen = struct.calcsize(">lll")
1640 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1640 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1641 len(delta) - hlen):
1641 len(delta) - hlen):
1642 btext[0] = delta[hlen:]
1642 btext[0] = delta[hlen:]
1643 else:
1643 else:
1644 if self._inline:
1644 if self._inline:
1645 fh = ifh
1645 fh = ifh
1646 else:
1646 else:
1647 fh = dfh
1647 fh = dfh
1648 basetext = self.revision(baserev, _df=fh, raw=True)
1648 basetext = self.revision(baserev, _df=fh, raw=True)
1649 btext[0] = mdiff.patch(basetext, delta)
1649 btext[0] = mdiff.patch(basetext, delta)
1650
1650
1651 try:
1651 try:
1652 res = self._processflags(btext[0], flags, 'read', raw=True)
1652 res = self._processflags(btext[0], flags, 'read', raw=True)
1653 btext[0], validatehash = res
1653 btext[0], validatehash = res
1654 if validatehash:
1654 if validatehash:
1655 self.checkhash(btext[0], node, p1=p1, p2=p2)
1655 self.checkhash(btext[0], node, p1=p1, p2=p2)
1656 if flags & REVIDX_ISCENSORED:
1656 if flags & REVIDX_ISCENSORED:
1657 raise RevlogError(_('node %s is not censored') % node)
1657 raise RevlogError(_('node %s is not censored') % node)
1658 except CensoredNodeError:
1658 except CensoredNodeError:
1659 # must pass the censored index flag to add censored revisions
1659 # must pass the censored index flag to add censored revisions
1660 if not flags & REVIDX_ISCENSORED:
1660 if not flags & REVIDX_ISCENSORED:
1661 raise
1661 raise
1662 return btext[0]
1662 return btext[0]
1663
1663
1664 def builddelta(rev):
1664 def builddelta(rev):
1665 # can we use the cached delta?
1665 # can we use the cached delta?
1666 if cachedelta and cachedelta[0] == rev:
1666 if cachedelta and cachedelta[0] == rev:
1667 delta = cachedelta[1]
1667 delta = cachedelta[1]
1668 else:
1668 else:
1669 t = buildtext()
1669 t = buildtext()
1670 if self.iscensored(rev):
1670 if self.iscensored(rev):
1671 # deltas based on a censored revision must replace the
1671 # deltas based on a censored revision must replace the
1672 # full content in one patch, so delta works everywhere
1672 # full content in one patch, so delta works everywhere
1673 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1673 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1674 delta = header + t
1674 delta = header + t
1675 else:
1675 else:
1676 if self._inline:
1676 if self._inline:
1677 fh = ifh
1677 fh = ifh
1678 else:
1678 else:
1679 fh = dfh
1679 fh = dfh
1680 ptext = self.revision(rev, _df=fh, raw=True)
1680 ptext = self.revision(rev, _df=fh, raw=True)
1681 delta = mdiff.textdiff(ptext, t)
1681 delta = mdiff.textdiff(ptext, t)
1682 header, data = self.compress(delta)
1682 header, data = self.compress(delta)
1683 deltalen = len(header) + len(data)
1683 deltalen = len(header) + len(data)
1684 chainbase = self.chainbase(rev)
1684 chainbase = self.chainbase(rev)
1685 dist = deltalen + offset - self.start(chainbase)
1685 dist = deltalen + offset - self.start(chainbase)
1686 if self._generaldelta:
1686 if self._generaldelta:
1687 base = rev
1687 base = rev
1688 else:
1688 else:
1689 base = chainbase
1689 base = chainbase
1690 chainlen, compresseddeltalen = self._chaininfo(rev)
1690 chainlen, compresseddeltalen = self._chaininfo(rev)
1691 chainlen += 1
1691 chainlen += 1
1692 compresseddeltalen += deltalen
1692 compresseddeltalen += deltalen
1693 return (dist, deltalen, (header, data), base,
1693 return (dist, deltalen, (header, data), base,
1694 chainbase, chainlen, compresseddeltalen)
1694 chainbase, chainlen, compresseddeltalen)
1695
1695
1696 curr = len(self)
1696 curr = len(self)
1697 prev = curr - 1
1697 prev = curr - 1
1698 offset = self.end(prev)
1698 offset = self.end(prev)
1699 delta = None
1699 delta = None
1700 p1r, p2r = self.rev(p1), self.rev(p2)
1700 p1r, p2r = self.rev(p1), self.rev(p2)
1701
1701
1702 # full versions are inserted when the needed deltas
1702 # full versions are inserted when the needed deltas
1703 # become comparable to the uncompressed text
1703 # become comparable to the uncompressed text
1704 if rawtext is None:
1704 if rawtext is None:
1705 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1705 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1706 cachedelta[1])
1706 cachedelta[1])
1707 else:
1707 else:
1708 textlen = len(rawtext)
1708 textlen = len(rawtext)
1709
1709
1710 # should we try to build a delta?
1710 # should we try to build a delta?
1711 if prev != nullrev and self.storedeltachains:
1711 if prev != nullrev and self.storedeltachains:
1712 tested = set()
1712 tested = set()
1713 # This condition is true most of the time when processing
1713 # This condition is true most of the time when processing
1714 # changegroup data into a generaldelta repo. The only time it
1714 # changegroup data into a generaldelta repo. The only time it
1715 # isn't true is if this is the first revision in a delta chain
1715 # isn't true is if this is the first revision in a delta chain
1716 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1716 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1717 if cachedelta and self._generaldelta and self._lazydeltabase:
1717 if cachedelta and self._generaldelta and self._lazydeltabase:
1718 # Assume what we received from the server is a good choice
1718 # Assume what we received from the server is a good choice
1719 # build delta will reuse the cache
1719 # build delta will reuse the cache
1720 candidatedelta = builddelta(cachedelta[0])
1720 candidatedelta = builddelta(cachedelta[0])
1721 tested.add(cachedelta[0])
1721 tested.add(cachedelta[0])
1722 if self._isgooddelta(candidatedelta, textlen):
1722 if self._isgooddelta(candidatedelta, textlen):
1723 delta = candidatedelta
1723 delta = candidatedelta
1724 if delta is None and self._generaldelta:
1724 if delta is None and self._generaldelta:
1725 # exclude already lazy tested base if any
1725 # exclude already lazy tested base if any
1726 parents = [p for p in (p1r, p2r)
1726 parents = [p for p in (p1r, p2r)
1727 if p != nullrev and p not in tested]
1727 if p != nullrev and p not in tested]
1728 if parents and not self._aggressivemergedeltas:
1728 if parents and not self._aggressivemergedeltas:
1729 # Pick whichever parent is closer to us (to minimize the
1729 # Pick whichever parent is closer to us (to minimize the
1730 # chance of having to build a fulltext).
1730 # chance of having to build a fulltext).
1731 parents = [max(parents)]
1731 parents = [max(parents)]
1732 tested.update(parents)
1732 tested.update(parents)
1733 pdeltas = []
1733 pdeltas = []
1734 for p in parents:
1734 for p in parents:
1735 pd = builddelta(p)
1735 pd = builddelta(p)
1736 if self._isgooddelta(pd, textlen):
1736 if self._isgooddelta(pd, textlen):
1737 pdeltas.append(pd)
1737 pdeltas.append(pd)
1738 if pdeltas:
1738 if pdeltas:
1739 delta = min(pdeltas, key=lambda x: x[1])
1739 delta = min(pdeltas, key=lambda x: x[1])
1740 if delta is None and prev not in tested:
1740 if delta is None and prev not in tested:
1741 # other approach failed try against prev to hopefully save us a
1741 # other approach failed try against prev to hopefully save us a
1742 # fulltext.
1742 # fulltext.
1743 candidatedelta = builddelta(prev)
1743 candidatedelta = builddelta(prev)
1744 if self._isgooddelta(candidatedelta, textlen):
1744 if self._isgooddelta(candidatedelta, textlen):
1745 delta = candidatedelta
1745 delta = candidatedelta
1746 if delta is not None:
1746 if delta is not None:
1747 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1747 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1748 else:
1748 else:
1749 rawtext = buildtext()
1749 rawtext = buildtext()
1750 data = self.compress(rawtext)
1750 data = self.compress(rawtext)
1751 l = len(data[1]) + len(data[0])
1751 l = len(data[1]) + len(data[0])
1752 base = chainbase = curr
1752 base = chainbase = curr
1753
1753
1754 e = (offset_type(offset, flags), l, textlen,
1754 e = (offset_type(offset, flags), l, textlen,
1755 base, link, p1r, p2r, node)
1755 base, link, p1r, p2r, node)
1756 self.index.insert(-1, e)
1756 self.index.insert(-1, e)
1757 self.nodemap[node] = curr
1757 self.nodemap[node] = curr
1758
1758
1759 entry = self._io.packentry(e, self.node, self.version, curr)
1759 entry = self._io.packentry(e, self.node, self.version, curr)
1760 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1760 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1761
1761
1762 if alwayscache and rawtext is None:
1762 if alwayscache and rawtext is None:
1763 rawtext = buildtext()
1763 rawtext = buildtext()
1764
1764
1765 if type(rawtext) == str: # only accept immutable objects
1765 if type(rawtext) == str: # only accept immutable objects
1766 self._cache = (node, curr, rawtext)
1766 self._cache = (node, curr, rawtext)
1767 self._chainbasecache[curr] = chainbase
1767 self._chainbasecache[curr] = chainbase
1768 return node
1768 return node
1769
1769
1770 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1770 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1771 # Files opened in a+ mode have inconsistent behavior on various
1771 # Files opened in a+ mode have inconsistent behavior on various
1772 # platforms. Windows requires that a file positioning call be made
1772 # platforms. Windows requires that a file positioning call be made
1773 # when the file handle transitions between reads and writes. See
1773 # when the file handle transitions between reads and writes. See
1774 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1774 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1775 # platforms, Python or the platform itself can be buggy. Some versions
1775 # platforms, Python or the platform itself can be buggy. Some versions
1776 # of Solaris have been observed to not append at the end of the file
1776 # of Solaris have been observed to not append at the end of the file
1777 # if the file was seeked to before the end. See issue4943 for more.
1777 # if the file was seeked to before the end. See issue4943 for more.
1778 #
1778 #
1779 # We work around this issue by inserting a seek() before writing.
1779 # We work around this issue by inserting a seek() before writing.
1780 # Note: This is likely not necessary on Python 3.
1780 # Note: This is likely not necessary on Python 3.
1781 ifh.seek(0, os.SEEK_END)
1781 ifh.seek(0, os.SEEK_END)
1782 if dfh:
1782 if dfh:
1783 dfh.seek(0, os.SEEK_END)
1783 dfh.seek(0, os.SEEK_END)
1784
1784
1785 curr = len(self) - 1
1785 curr = len(self) - 1
1786 if not self._inline:
1786 if not self._inline:
1787 transaction.add(self.datafile, offset)
1787 transaction.add(self.datafile, offset)
1788 transaction.add(self.indexfile, curr * len(entry))
1788 transaction.add(self.indexfile, curr * len(entry))
1789 if data[0]:
1789 if data[0]:
1790 dfh.write(data[0])
1790 dfh.write(data[0])
1791 dfh.write(data[1])
1791 dfh.write(data[1])
1792 ifh.write(entry)
1792 ifh.write(entry)
1793 else:
1793 else:
1794 offset += curr * self._io.size
1794 offset += curr * self._io.size
1795 transaction.add(self.indexfile, offset, curr)
1795 transaction.add(self.indexfile, offset, curr)
1796 ifh.write(entry)
1796 ifh.write(entry)
1797 ifh.write(data[0])
1797 ifh.write(data[0])
1798 ifh.write(data[1])
1798 ifh.write(data[1])
1799 self.checkinlinesize(transaction, ifh)
1799 self.checkinlinesize(transaction, ifh)
1800
1800
1801 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1801 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1802 """
1802 """
1803 add a delta group
1803 add a delta group
1804
1804
1805 given a set of deltas, add them to the revision log. the
1805 given a set of deltas, add them to the revision log. the
1806 first delta is against its parent, which should be in our
1806 first delta is against its parent, which should be in our
1807 log, the rest are against the previous delta.
1807 log, the rest are against the previous delta.
1808
1808
1809 If ``addrevisioncb`` is defined, it will be called with arguments of
1809 If ``addrevisioncb`` is defined, it will be called with arguments of
1810 this revlog and the node that was added.
1810 this revlog and the node that was added.
1811 """
1811 """
1812
1812
1813 # track the base of the current delta log
1813 # track the base of the current delta log
1814 content = []
1814 content = []
1815 node = None
1815 node = None
1816
1816
1817 r = len(self)
1817 r = len(self)
1818 end = 0
1818 end = 0
1819 if r:
1819 if r:
1820 end = self.end(r - 1)
1820 end = self.end(r - 1)
1821 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1821 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1822 isize = r * self._io.size
1822 isize = r * self._io.size
1823 if self._inline:
1823 if self._inline:
1824 transaction.add(self.indexfile, end + isize, r)
1824 transaction.add(self.indexfile, end + isize, r)
1825 dfh = None
1825 dfh = None
1826 else:
1826 else:
1827 transaction.add(self.indexfile, isize, r)
1827 transaction.add(self.indexfile, isize, r)
1828 transaction.add(self.datafile, end)
1828 transaction.add(self.datafile, end)
1829 dfh = self.opener(self.datafile, "a+")
1829 dfh = self.opener(self.datafile, "a+")
1830 def flush():
1830 def flush():
1831 if dfh:
1831 if dfh:
1832 dfh.flush()
1832 dfh.flush()
1833 ifh.flush()
1833 ifh.flush()
1834 try:
1834 try:
1835 # loop through our set of deltas
1835 # loop through our set of deltas
1836 chain = None
1836 chain = None
1837 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1837 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1838 node = chunkdata['node']
1838 node = chunkdata['node']
1839 p1 = chunkdata['p1']
1839 p1 = chunkdata['p1']
1840 p2 = chunkdata['p2']
1840 p2 = chunkdata['p2']
1841 cs = chunkdata['cs']
1841 cs = chunkdata['cs']
1842 deltabase = chunkdata['deltabase']
1842 deltabase = chunkdata['deltabase']
1843 delta = chunkdata['delta']
1843 delta = chunkdata['delta']
1844 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1844 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1845
1845
1846 content.append(node)
1846 content.append(node)
1847
1847
1848 link = linkmapper(cs)
1848 link = linkmapper(cs)
1849 if node in self.nodemap:
1849 if node in self.nodemap:
1850 # this can happen if two branches make the same change
1850 # this can happen if two branches make the same change
1851 chain = node
1851 chain = node
1852 continue
1852 continue
1853
1853
1854 for p in (p1, p2):
1854 for p in (p1, p2):
1855 if p not in self.nodemap:
1855 if p not in self.nodemap:
1856 raise LookupError(p, self.indexfile,
1856 raise LookupError(p, self.indexfile,
1857 _('unknown parent'))
1857 _('unknown parent'))
1858
1858
1859 if deltabase not in self.nodemap:
1859 if deltabase not in self.nodemap:
1860 raise LookupError(deltabase, self.indexfile,
1860 raise LookupError(deltabase, self.indexfile,
1861 _('unknown delta base'))
1861 _('unknown delta base'))
1862
1862
1863 baserev = self.rev(deltabase)
1863 baserev = self.rev(deltabase)
1864
1864
1865 if baserev != nullrev and self.iscensored(baserev):
1865 if baserev != nullrev and self.iscensored(baserev):
1866 # if base is censored, delta must be full replacement in a
1866 # if base is censored, delta must be full replacement in a
1867 # single patch operation
1867 # single patch operation
1868 hlen = struct.calcsize(">lll")
1868 hlen = struct.calcsize(">lll")
1869 oldlen = self.rawsize(baserev)
1869 oldlen = self.rawsize(baserev)
1870 newlen = len(delta) - hlen
1870 newlen = len(delta) - hlen
1871 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1871 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1872 raise error.CensoredBaseError(self.indexfile,
1872 raise error.CensoredBaseError(self.indexfile,
1873 self.node(baserev))
1873 self.node(baserev))
1874
1874
1875 if not flags and self._peek_iscensored(baserev, delta, flush):
1875 if not flags and self._peek_iscensored(baserev, delta, flush):
1876 flags |= REVIDX_ISCENSORED
1876 flags |= REVIDX_ISCENSORED
1877
1877
1878 # We assume consumers of addrevisioncb will want to retrieve
1878 # We assume consumers of addrevisioncb will want to retrieve
1879 # the added revision, which will require a call to
1879 # the added revision, which will require a call to
1880 # revision(). revision() will fast path if there is a cache
1880 # revision(). revision() will fast path if there is a cache
1881 # hit. So, we tell _addrevision() to always cache in this case.
1881 # hit. So, we tell _addrevision() to always cache in this case.
1882 # We're only using addgroup() in the context of changegroup
1882 # We're only using addgroup() in the context of changegroup
1883 # generation so the revision data can always be handled as raw
1883 # generation so the revision data can always be handled as raw
1884 # by the flagprocessor.
1884 # by the flagprocessor.
1885 chain = self._addrevision(node, None, transaction, link,
1885 chain = self._addrevision(node, None, transaction, link,
1886 p1, p2, flags, (baserev, delta),
1886 p1, p2, flags, (baserev, delta),
1887 ifh, dfh,
1887 ifh, dfh,
1888 alwayscache=bool(addrevisioncb))
1888 alwayscache=bool(addrevisioncb))
1889
1889
1890 if addrevisioncb:
1890 if addrevisioncb:
1891 addrevisioncb(self, chain)
1891 addrevisioncb(self, chain)
1892
1892
1893 if not dfh and not self._inline:
1893 if not dfh and not self._inline:
1894 # addrevision switched from inline to conventional
1894 # addrevision switched from inline to conventional
1895 # reopen the index
1895 # reopen the index
1896 ifh.close()
1896 ifh.close()
1897 dfh = self.opener(self.datafile, "a+")
1897 dfh = self.opener(self.datafile, "a+")
1898 ifh = self.opener(self.indexfile, "a+",
1898 ifh = self.opener(self.indexfile, "a+",
1899 checkambig=self._checkambig)
1899 checkambig=self._checkambig)
1900 finally:
1900 finally:
1901 if dfh:
1901 if dfh:
1902 dfh.close()
1902 dfh.close()
1903 ifh.close()
1903 ifh.close()
1904
1904
1905 return content
1905 return content
1906
1906
1907 def iscensored(self, rev):
1907 def iscensored(self, rev):
1908 """Check if a file revision is censored."""
1908 """Check if a file revision is censored."""
1909 return False
1909 return False
1910
1910
1911 def _peek_iscensored(self, baserev, delta, flush):
1911 def _peek_iscensored(self, baserev, delta, flush):
1912 """Quickly check if a delta produces a censored revision."""
1912 """Quickly check if a delta produces a censored revision."""
1913 return False
1913 return False
1914
1914
1915 def getstrippoint(self, minlink):
1915 def getstrippoint(self, minlink):
1916 """find the minimum rev that must be stripped to strip the linkrev
1916 """find the minimum rev that must be stripped to strip the linkrev
1917
1917
1918 Returns a tuple containing the minimum rev and a set of all revs that
1918 Returns a tuple containing the minimum rev and a set of all revs that
1919 have linkrevs that will be broken by this strip.
1919 have linkrevs that will be broken by this strip.
1920 """
1920 """
1921 brokenrevs = set()
1921 brokenrevs = set()
1922 strippoint = len(self)
1922 strippoint = len(self)
1923
1923
1924 heads = {}
1924 heads = {}
1925 futurelargelinkrevs = set()
1925 futurelargelinkrevs = set()
1926 for head in self.headrevs():
1926 for head in self.headrevs():
1927 headlinkrev = self.linkrev(head)
1927 headlinkrev = self.linkrev(head)
1928 heads[head] = headlinkrev
1928 heads[head] = headlinkrev
1929 if headlinkrev >= minlink:
1929 if headlinkrev >= minlink:
1930 futurelargelinkrevs.add(headlinkrev)
1930 futurelargelinkrevs.add(headlinkrev)
1931
1931
1932 # This algorithm involves walking down the rev graph, starting at the
1932 # This algorithm involves walking down the rev graph, starting at the
1933 # heads. Since the revs are topologically sorted according to linkrev,
1933 # heads. Since the revs are topologically sorted according to linkrev,
1934 # once all head linkrevs are below the minlink, we know there are
1934 # once all head linkrevs are below the minlink, we know there are
1935 # no more revs that could have a linkrev greater than minlink.
1935 # no more revs that could have a linkrev greater than minlink.
1936 # So we can stop walking.
1936 # So we can stop walking.
1937 while futurelargelinkrevs:
1937 while futurelargelinkrevs:
1938 strippoint -= 1
1938 strippoint -= 1
1939 linkrev = heads.pop(strippoint)
1939 linkrev = heads.pop(strippoint)
1940
1940
1941 if linkrev < minlink:
1941 if linkrev < minlink:
1942 brokenrevs.add(strippoint)
1942 brokenrevs.add(strippoint)
1943 else:
1943 else:
1944 futurelargelinkrevs.remove(linkrev)
1944 futurelargelinkrevs.remove(linkrev)
1945
1945
1946 for p in self.parentrevs(strippoint):
1946 for p in self.parentrevs(strippoint):
1947 if p != nullrev:
1947 if p != nullrev:
1948 plinkrev = self.linkrev(p)
1948 plinkrev = self.linkrev(p)
1949 heads[p] = plinkrev
1949 heads[p] = plinkrev
1950 if plinkrev >= minlink:
1950 if plinkrev >= minlink:
1951 futurelargelinkrevs.add(plinkrev)
1951 futurelargelinkrevs.add(plinkrev)
1952
1952
1953 return strippoint, brokenrevs
1953 return strippoint, brokenrevs
1954
1954
1955 def strip(self, minlink, transaction):
1955 def strip(self, minlink, transaction):
1956 """truncate the revlog on the first revision with a linkrev >= minlink
1956 """truncate the revlog on the first revision with a linkrev >= minlink
1957
1957
1958 This function is called when we're stripping revision minlink and
1958 This function is called when we're stripping revision minlink and
1959 its descendants from the repository.
1959 its descendants from the repository.
1960
1960
1961 We have to remove all revisions with linkrev >= minlink, because
1961 We have to remove all revisions with linkrev >= minlink, because
1962 the equivalent changelog revisions will be renumbered after the
1962 the equivalent changelog revisions will be renumbered after the
1963 strip.
1963 strip.
1964
1964
1965 So we truncate the revlog on the first of these revisions, and
1965 So we truncate the revlog on the first of these revisions, and
1966 trust that the caller has saved the revisions that shouldn't be
1966 trust that the caller has saved the revisions that shouldn't be
1967 removed and that it'll re-add them after this truncation.
1967 removed and that it'll re-add them after this truncation.
1968 """
1968 """
1969 if len(self) == 0:
1969 if len(self) == 0:
1970 return
1970 return
1971
1971
1972 rev, _ = self.getstrippoint(minlink)
1972 rev, _ = self.getstrippoint(minlink)
1973 if rev == len(self):
1973 if rev == len(self):
1974 return
1974 return
1975
1975
1976 # first truncate the files on disk
1976 # first truncate the files on disk
1977 end = self.start(rev)
1977 end = self.start(rev)
1978 if not self._inline:
1978 if not self._inline:
1979 transaction.add(self.datafile, end)
1979 transaction.add(self.datafile, end)
1980 end = rev * self._io.size
1980 end = rev * self._io.size
1981 else:
1981 else:
1982 end += rev * self._io.size
1982 end += rev * self._io.size
1983
1983
1984 transaction.add(self.indexfile, end)
1984 transaction.add(self.indexfile, end)
1985
1985
1986 # then reset internal state in memory to forget those revisions
1986 # then reset internal state in memory to forget those revisions
1987 self._cache = None
1987 self._cache = None
1988 self._chaininfocache = {}
1988 self._chaininfocache = {}
1989 self._chunkclear()
1989 self._chunkclear()
1990 for x in xrange(rev, len(self)):
1990 for x in xrange(rev, len(self)):
1991 del self.nodemap[self.node(x)]
1991 del self.nodemap[self.node(x)]
1992
1992
1993 del self.index[rev:-1]
1993 del self.index[rev:-1]
1994
1994
1995 def checksize(self):
1995 def checksize(self):
1996 expected = 0
1996 expected = 0
1997 if len(self):
1997 if len(self):
1998 expected = max(0, self.end(len(self) - 1))
1998 expected = max(0, self.end(len(self) - 1))
1999
1999
2000 try:
2000 try:
2001 f = self.opener(self.datafile)
2001 f = self.opener(self.datafile)
2002 f.seek(0, 2)
2002 f.seek(0, 2)
2003 actual = f.tell()
2003 actual = f.tell()
2004 f.close()
2004 f.close()
2005 dd = actual - expected
2005 dd = actual - expected
2006 except IOError as inst:
2006 except IOError as inst:
2007 if inst.errno != errno.ENOENT:
2007 if inst.errno != errno.ENOENT:
2008 raise
2008 raise
2009 dd = 0
2009 dd = 0
2010
2010
2011 try:
2011 try:
2012 f = self.opener(self.indexfile)
2012 f = self.opener(self.indexfile)
2013 f.seek(0, 2)
2013 f.seek(0, 2)
2014 actual = f.tell()
2014 actual = f.tell()
2015 f.close()
2015 f.close()
2016 s = self._io.size
2016 s = self._io.size
2017 i = max(0, actual // s)
2017 i = max(0, actual // s)
2018 di = actual - (i * s)
2018 di = actual - (i * s)
2019 if self._inline:
2019 if self._inline:
2020 databytes = 0
2020 databytes = 0
2021 for r in self:
2021 for r in self:
2022 databytes += max(0, self.length(r))
2022 databytes += max(0, self.length(r))
2023 dd = 0
2023 dd = 0
2024 di = actual - len(self) * s - databytes
2024 di = actual - len(self) * s - databytes
2025 except IOError as inst:
2025 except IOError as inst:
2026 if inst.errno != errno.ENOENT:
2026 if inst.errno != errno.ENOENT:
2027 raise
2027 raise
2028 di = 0
2028 di = 0
2029
2029
2030 return (dd, di)
2030 return (dd, di)
2031
2031
2032 def files(self):
2032 def files(self):
2033 res = [self.indexfile]
2033 res = [self.indexfile]
2034 if not self._inline:
2034 if not self._inline:
2035 res.append(self.datafile)
2035 res.append(self.datafile)
2036 return res
2036 return res
2037
2037
2038 DELTAREUSEALWAYS = 'always'
2038 DELTAREUSEALWAYS = 'always'
2039 DELTAREUSESAMEREVS = 'samerevs'
2039 DELTAREUSESAMEREVS = 'samerevs'
2040 DELTAREUSENEVER = 'never'
2040 DELTAREUSENEVER = 'never'
2041
2041
2042 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2042 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2043
2043
2044 def clone(self, tr, destrevlog, addrevisioncb=None,
2044 def clone(self, tr, destrevlog, addrevisioncb=None,
2045 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2045 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2046 """Copy this revlog to another, possibly with format changes.
2046 """Copy this revlog to another, possibly with format changes.
2047
2047
2048 The destination revlog will contain the same revisions and nodes.
2048 The destination revlog will contain the same revisions and nodes.
2049 However, it may not be bit-for-bit identical due to e.g. delta encoding
2049 However, it may not be bit-for-bit identical due to e.g. delta encoding
2050 differences.
2050 differences.
2051
2051
2052 The ``deltareuse`` argument control how deltas from the existing revlog
2052 The ``deltareuse`` argument control how deltas from the existing revlog
2053 are preserved in the destination revlog. The argument can have the
2053 are preserved in the destination revlog. The argument can have the
2054 following values:
2054 following values:
2055
2055
2056 DELTAREUSEALWAYS
2056 DELTAREUSEALWAYS
2057 Deltas will always be reused (if possible), even if the destination
2057 Deltas will always be reused (if possible), even if the destination
2058 revlog would not select the same revisions for the delta. This is the
2058 revlog would not select the same revisions for the delta. This is the
2059 fastest mode of operation.
2059 fastest mode of operation.
2060 DELTAREUSESAMEREVS
2060 DELTAREUSESAMEREVS
2061 Deltas will be reused if the destination revlog would pick the same
2061 Deltas will be reused if the destination revlog would pick the same
2062 revisions for the delta. This mode strikes a balance between speed
2062 revisions for the delta. This mode strikes a balance between speed
2063 and optimization.
2063 and optimization.
2064 DELTAREUSENEVER
2064 DELTAREUSENEVER
2065 Deltas will never be reused. This is the slowest mode of execution.
2065 Deltas will never be reused. This is the slowest mode of execution.
2066 This mode can be used to recompute deltas (e.g. if the diff/delta
2066 This mode can be used to recompute deltas (e.g. if the diff/delta
2067 algorithm changes).
2067 algorithm changes).
2068
2068
2069 Delta computation can be slow, so the choice of delta reuse policy can
2069 Delta computation can be slow, so the choice of delta reuse policy can
2070 significantly affect run time.
2070 significantly affect run time.
2071
2071
2072 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2072 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2073 two extremes. Deltas will be reused if they are appropriate. But if the
2073 two extremes. Deltas will be reused if they are appropriate. But if the
2074 delta could choose a better revision, it will do so. This means if you
2074 delta could choose a better revision, it will do so. This means if you
2075 are converting a non-generaldelta revlog to a generaldelta revlog,
2075 are converting a non-generaldelta revlog to a generaldelta revlog,
2076 deltas will be recomputed if the delta's parent isn't a parent of the
2076 deltas will be recomputed if the delta's parent isn't a parent of the
2077 revision.
2077 revision.
2078
2078
2079 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2079 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2080 controls whether to compute deltas against both parents for merges.
2080 controls whether to compute deltas against both parents for merges.
2081 By default, the current default is used.
2081 By default, the current default is used.
2082 """
2082 """
2083 if deltareuse not in self.DELTAREUSEALL:
2083 if deltareuse not in self.DELTAREUSEALL:
2084 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2084 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2085
2085
2086 if len(destrevlog):
2086 if len(destrevlog):
2087 raise ValueError(_('destination revlog is not empty'))
2087 raise ValueError(_('destination revlog is not empty'))
2088
2088
2089 if getattr(self, 'filteredrevs', None):
2089 if getattr(self, 'filteredrevs', None):
2090 raise ValueError(_('source revlog has filtered revisions'))
2090 raise ValueError(_('source revlog has filtered revisions'))
2091 if getattr(destrevlog, 'filteredrevs', None):
2091 if getattr(destrevlog, 'filteredrevs', None):
2092 raise ValueError(_('destination revlog has filtered revisions'))
2092 raise ValueError(_('destination revlog has filtered revisions'))
2093
2093
2094 # lazydeltabase controls whether to reuse a cached delta, if possible.
2094 # lazydeltabase controls whether to reuse a cached delta, if possible.
2095 oldlazydeltabase = destrevlog._lazydeltabase
2095 oldlazydeltabase = destrevlog._lazydeltabase
2096 oldamd = destrevlog._aggressivemergedeltas
2096 oldamd = destrevlog._aggressivemergedeltas
2097
2097
2098 try:
2098 try:
2099 if deltareuse == self.DELTAREUSEALWAYS:
2099 if deltareuse == self.DELTAREUSEALWAYS:
2100 destrevlog._lazydeltabase = True
2100 destrevlog._lazydeltabase = True
2101 elif deltareuse == self.DELTAREUSESAMEREVS:
2101 elif deltareuse == self.DELTAREUSESAMEREVS:
2102 destrevlog._lazydeltabase = False
2102 destrevlog._lazydeltabase = False
2103
2103
2104 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2104 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2105
2105
2106 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2106 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2107 self.DELTAREUSESAMEREVS)
2107 self.DELTAREUSESAMEREVS)
2108
2108
2109 index = self.index
2109 index = self.index
2110 for rev in self:
2110 for rev in self:
2111 entry = index[rev]
2111 entry = index[rev]
2112
2112
2113 # Some classes override linkrev to take filtered revs into
2113 # Some classes override linkrev to take filtered revs into
2114 # account. Use raw entry from index.
2114 # account. Use raw entry from index.
2115 flags = entry[0] & 0xffff
2115 flags = entry[0] & 0xffff
2116 linkrev = entry[4]
2116 linkrev = entry[4]
2117 p1 = index[entry[5]][7]
2117 p1 = index[entry[5]][7]
2118 p2 = index[entry[6]][7]
2118 p2 = index[entry[6]][7]
2119 node = entry[7]
2119 node = entry[7]
2120
2120
2121 # (Possibly) reuse the delta from the revlog if allowed and
2121 # (Possibly) reuse the delta from the revlog if allowed and
2122 # the revlog chunk is a delta.
2122 # the revlog chunk is a delta.
2123 cachedelta = None
2123 cachedelta = None
2124 rawtext = None
2124 rawtext = None
2125 if populatecachedelta:
2125 if populatecachedelta:
2126 dp = self.deltaparent(rev)
2126 dp = self.deltaparent(rev)
2127 if dp != nullrev:
2127 if dp != nullrev:
2128 cachedelta = (dp, str(self._chunk(rev)))
2128 cachedelta = (dp, str(self._chunk(rev)))
2129
2129
2130 if not cachedelta:
2130 if not cachedelta:
2131 rawtext = self.revision(rev, raw=True)
2131 rawtext = self.revision(rev, raw=True)
2132
2132
2133 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2133 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2134 checkambig=False)
2134 checkambig=False)
2135 dfh = None
2135 dfh = None
2136 if not destrevlog._inline:
2136 if not destrevlog._inline:
2137 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2137 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2138 try:
2138 try:
2139 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2139 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2140 flags, cachedelta, ifh, dfh)
2140 flags, cachedelta, ifh, dfh)
2141 finally:
2141 finally:
2142 if dfh:
2142 if dfh:
2143 dfh.close()
2143 dfh.close()
2144 ifh.close()
2144 ifh.close()
2145
2145
2146 if addrevisioncb:
2146 if addrevisioncb:
2147 addrevisioncb(self, rev, node)
2147 addrevisioncb(self, rev, node)
2148 finally:
2148 finally:
2149 destrevlog._lazydeltabase = oldlazydeltabase
2149 destrevlog._lazydeltabase = oldlazydeltabase
2150 destrevlog._aggressivemergedeltas = oldamd
2150 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now