##// END OF EJS Templates
changegroup: introduce an explicit linear sorting...
Boris Feld -
r40483:256b1f0c stable
parent child Browse files
Show More
@@ -1,1169 +1,1169 b''
1 # sqlitestore.py - Storage backend that uses SQLite
1 # sqlitestore.py - Storage backend that uses SQLite
2 #
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """store repository data in SQLite (EXPERIMENTAL)
8 """store repository data in SQLite (EXPERIMENTAL)
9
9
10 The sqlitestore extension enables the storage of repository data in SQLite.
10 The sqlitestore extension enables the storage of repository data in SQLite.
11
11
12 This extension is HIGHLY EXPERIMENTAL. There are NO BACKWARDS COMPATIBILITY
12 This extension is HIGHLY EXPERIMENTAL. There are NO BACKWARDS COMPATIBILITY
13 GUARANTEES. This means that repositories created with this extension may
13 GUARANTEES. This means that repositories created with this extension may
14 only be usable with the exact version of this extension/Mercurial that was
14 only be usable with the exact version of this extension/Mercurial that was
15 used. The extension attempts to enforce this in order to prevent repository
15 used. The extension attempts to enforce this in order to prevent repository
16 corruption.
16 corruption.
17
17
18 In addition, several features are not yet supported or have known bugs:
18 In addition, several features are not yet supported or have known bugs:
19
19
20 * Only some data is stored in SQLite. Changeset, manifest, and other repository
20 * Only some data is stored in SQLite. Changeset, manifest, and other repository
21 data is not yet stored in SQLite.
21 data is not yet stored in SQLite.
22 * Transactions are not robust. If the process is aborted at the right time
22 * Transactions are not robust. If the process is aborted at the right time
23 during transaction close/rollback, the repository could be in an inconsistent
23 during transaction close/rollback, the repository could be in an inconsistent
24 state. This problem will diminish once all repository data is tracked by
24 state. This problem will diminish once all repository data is tracked by
25 SQLite.
25 SQLite.
26 * Bundle repositories do not work (the ability to use e.g.
26 * Bundle repositories do not work (the ability to use e.g.
27 `hg -R <bundle-file> log` to automatically overlay a bundle on top of the
27 `hg -R <bundle-file> log` to automatically overlay a bundle on top of the
28 existing repository).
28 existing repository).
29 * Various other features don't work.
29 * Various other features don't work.
30
30
31 This extension should work for basic clone/pull, update, and commit workflows.
31 This extension should work for basic clone/pull, update, and commit workflows.
32 Some history rewriting operations may fail due to lack of support for bundle
32 Some history rewriting operations may fail due to lack of support for bundle
33 repositories.
33 repositories.
34
34
35 To use, activate the extension and set the ``storage.new-repo-backend`` config
35 To use, activate the extension and set the ``storage.new-repo-backend`` config
36 option to ``sqlite`` to enable new repositories to use SQLite for storage.
36 option to ``sqlite`` to enable new repositories to use SQLite for storage.
37 """
37 """
38
38
39 # To run the test suite with repos using SQLite by default, execute the
39 # To run the test suite with repos using SQLite by default, execute the
40 # following:
40 # following:
41 #
41 #
42 # HGREPOFEATURES="sqlitestore" run-tests.py \
42 # HGREPOFEATURES="sqlitestore" run-tests.py \
43 # --extra-config-opt extensions.sqlitestore= \
43 # --extra-config-opt extensions.sqlitestore= \
44 # --extra-config-opt storage.new-repo-backend=sqlite
44 # --extra-config-opt storage.new-repo-backend=sqlite
45
45
46 from __future__ import absolute_import
46 from __future__ import absolute_import
47
47
48 import hashlib
48 import hashlib
49 import sqlite3
49 import sqlite3
50 import struct
50 import struct
51 import threading
51 import threading
52 import zlib
52 import zlib
53
53
54 from mercurial.i18n import _
54 from mercurial.i18n import _
55 from mercurial.node import (
55 from mercurial.node import (
56 nullid,
56 nullid,
57 nullrev,
57 nullrev,
58 short,
58 short,
59 )
59 )
60 from mercurial.thirdparty import (
60 from mercurial.thirdparty import (
61 attr,
61 attr,
62 )
62 )
63 from mercurial import (
63 from mercurial import (
64 ancestor,
64 ancestor,
65 dagop,
65 dagop,
66 error,
66 error,
67 extensions,
67 extensions,
68 localrepo,
68 localrepo,
69 mdiff,
69 mdiff,
70 pycompat,
70 pycompat,
71 registrar,
71 registrar,
72 repository,
72 repository,
73 util,
73 util,
74 verify,
74 verify,
75 )
75 )
76 from mercurial.utils import (
76 from mercurial.utils import (
77 interfaceutil,
77 interfaceutil,
78 storageutil,
78 storageutil,
79 )
79 )
80
80
81 try:
81 try:
82 from mercurial import zstd
82 from mercurial import zstd
83 zstd.__version__
83 zstd.__version__
84 except ImportError:
84 except ImportError:
85 zstd = None
85 zstd = None
86
86
87 configtable = {}
87 configtable = {}
88 configitem = registrar.configitem(configtable)
88 configitem = registrar.configitem(configtable)
89
89
90 # experimental config: storage.sqlite.compression
90 # experimental config: storage.sqlite.compression
91 configitem('storage', 'sqlite.compression',
91 configitem('storage', 'sqlite.compression',
92 default='zstd' if zstd else 'zlib')
92 default='zstd' if zstd else 'zlib')
93
93
94 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
94 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
95 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
95 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
96 # be specifying the version(s) of Mercurial they are tested with, or
96 # be specifying the version(s) of Mercurial they are tested with, or
97 # leave the attribute unspecified.
97 # leave the attribute unspecified.
98 testedwith = 'ships-with-hg-core'
98 testedwith = 'ships-with-hg-core'
99
99
100 REQUIREMENT = b'exp-sqlite-001'
100 REQUIREMENT = b'exp-sqlite-001'
101 REQUIREMENT_ZSTD = b'exp-sqlite-comp-001=zstd'
101 REQUIREMENT_ZSTD = b'exp-sqlite-comp-001=zstd'
102 REQUIREMENT_ZLIB = b'exp-sqlite-comp-001=zlib'
102 REQUIREMENT_ZLIB = b'exp-sqlite-comp-001=zlib'
103 REQUIREMENT_NONE = b'exp-sqlite-comp-001=none'
103 REQUIREMENT_NONE = b'exp-sqlite-comp-001=none'
104 REQUIREMENT_SHALLOW_FILES = b'exp-sqlite-shallow-files'
104 REQUIREMENT_SHALLOW_FILES = b'exp-sqlite-shallow-files'
105
105
106 CURRENT_SCHEMA_VERSION = 1
106 CURRENT_SCHEMA_VERSION = 1
107
107
108 COMPRESSION_NONE = 1
108 COMPRESSION_NONE = 1
109 COMPRESSION_ZSTD = 2
109 COMPRESSION_ZSTD = 2
110 COMPRESSION_ZLIB = 3
110 COMPRESSION_ZLIB = 3
111
111
112 FLAG_CENSORED = 1
112 FLAG_CENSORED = 1
113 FLAG_MISSING_P1 = 2
113 FLAG_MISSING_P1 = 2
114 FLAG_MISSING_P2 = 4
114 FLAG_MISSING_P2 = 4
115
115
116 CREATE_SCHEMA = [
116 CREATE_SCHEMA = [
117 # Deltas are stored as content-indexed blobs.
117 # Deltas are stored as content-indexed blobs.
118 # compression column holds COMPRESSION_* constant for how the
118 # compression column holds COMPRESSION_* constant for how the
119 # delta is encoded.
119 # delta is encoded.
120
120
121 r'CREATE TABLE delta ('
121 r'CREATE TABLE delta ('
122 r' id INTEGER PRIMARY KEY, '
122 r' id INTEGER PRIMARY KEY, '
123 r' compression INTEGER NOT NULL, '
123 r' compression INTEGER NOT NULL, '
124 r' hash BLOB UNIQUE ON CONFLICT ABORT, '
124 r' hash BLOB UNIQUE ON CONFLICT ABORT, '
125 r' delta BLOB NOT NULL '
125 r' delta BLOB NOT NULL '
126 r')',
126 r')',
127
127
128 # Tracked paths are denormalized to integers to avoid redundant
128 # Tracked paths are denormalized to integers to avoid redundant
129 # storage of the path name.
129 # storage of the path name.
130 r'CREATE TABLE filepath ('
130 r'CREATE TABLE filepath ('
131 r' id INTEGER PRIMARY KEY, '
131 r' id INTEGER PRIMARY KEY, '
132 r' path BLOB NOT NULL '
132 r' path BLOB NOT NULL '
133 r')',
133 r')',
134
134
135 r'CREATE UNIQUE INDEX filepath_path '
135 r'CREATE UNIQUE INDEX filepath_path '
136 r' ON filepath (path)',
136 r' ON filepath (path)',
137
137
138 # We have a single table for all file revision data.
138 # We have a single table for all file revision data.
139 # Each file revision is uniquely described by a (path, rev) and
139 # Each file revision is uniquely described by a (path, rev) and
140 # (path, node).
140 # (path, node).
141 #
141 #
142 # Revision data is stored as a pointer to the delta producing this
142 # Revision data is stored as a pointer to the delta producing this
143 # revision and the file revision whose delta should be applied before
143 # revision and the file revision whose delta should be applied before
144 # that one. One can reconstruct the delta chain by recursively following
144 # that one. One can reconstruct the delta chain by recursively following
145 # the delta base revision pointers until one encounters NULL.
145 # the delta base revision pointers until one encounters NULL.
146 #
146 #
147 # flags column holds bitwise integer flags controlling storage options.
147 # flags column holds bitwise integer flags controlling storage options.
148 # These flags are defined by the FLAG_* constants.
148 # These flags are defined by the FLAG_* constants.
149 r'CREATE TABLE fileindex ('
149 r'CREATE TABLE fileindex ('
150 r' id INTEGER PRIMARY KEY, '
150 r' id INTEGER PRIMARY KEY, '
151 r' pathid INTEGER REFERENCES filepath(id), '
151 r' pathid INTEGER REFERENCES filepath(id), '
152 r' revnum INTEGER NOT NULL, '
152 r' revnum INTEGER NOT NULL, '
153 r' p1rev INTEGER NOT NULL, '
153 r' p1rev INTEGER NOT NULL, '
154 r' p2rev INTEGER NOT NULL, '
154 r' p2rev INTEGER NOT NULL, '
155 r' linkrev INTEGER NOT NULL, '
155 r' linkrev INTEGER NOT NULL, '
156 r' flags INTEGER NOT NULL, '
156 r' flags INTEGER NOT NULL, '
157 r' deltaid INTEGER REFERENCES delta(id), '
157 r' deltaid INTEGER REFERENCES delta(id), '
158 r' deltabaseid INTEGER REFERENCES fileindex(id), '
158 r' deltabaseid INTEGER REFERENCES fileindex(id), '
159 r' node BLOB NOT NULL '
159 r' node BLOB NOT NULL '
160 r')',
160 r')',
161
161
162 r'CREATE UNIQUE INDEX fileindex_pathrevnum '
162 r'CREATE UNIQUE INDEX fileindex_pathrevnum '
163 r' ON fileindex (pathid, revnum)',
163 r' ON fileindex (pathid, revnum)',
164
164
165 r'CREATE UNIQUE INDEX fileindex_pathnode '
165 r'CREATE UNIQUE INDEX fileindex_pathnode '
166 r' ON fileindex (pathid, node)',
166 r' ON fileindex (pathid, node)',
167
167
168 # Provide a view over all file data for convenience.
168 # Provide a view over all file data for convenience.
169 r'CREATE VIEW filedata AS '
169 r'CREATE VIEW filedata AS '
170 r'SELECT '
170 r'SELECT '
171 r' fileindex.id AS id, '
171 r' fileindex.id AS id, '
172 r' filepath.id AS pathid, '
172 r' filepath.id AS pathid, '
173 r' filepath.path AS path, '
173 r' filepath.path AS path, '
174 r' fileindex.revnum AS revnum, '
174 r' fileindex.revnum AS revnum, '
175 r' fileindex.node AS node, '
175 r' fileindex.node AS node, '
176 r' fileindex.p1rev AS p1rev, '
176 r' fileindex.p1rev AS p1rev, '
177 r' fileindex.p2rev AS p2rev, '
177 r' fileindex.p2rev AS p2rev, '
178 r' fileindex.linkrev AS linkrev, '
178 r' fileindex.linkrev AS linkrev, '
179 r' fileindex.flags AS flags, '
179 r' fileindex.flags AS flags, '
180 r' fileindex.deltaid AS deltaid, '
180 r' fileindex.deltaid AS deltaid, '
181 r' fileindex.deltabaseid AS deltabaseid '
181 r' fileindex.deltabaseid AS deltabaseid '
182 r'FROM filepath, fileindex '
182 r'FROM filepath, fileindex '
183 r'WHERE fileindex.pathid=filepath.id',
183 r'WHERE fileindex.pathid=filepath.id',
184
184
185 r'PRAGMA user_version=%d' % CURRENT_SCHEMA_VERSION,
185 r'PRAGMA user_version=%d' % CURRENT_SCHEMA_VERSION,
186 ]
186 ]
187
187
188 def resolvedeltachain(db, pathid, node, revisioncache,
188 def resolvedeltachain(db, pathid, node, revisioncache,
189 stoprids, zstddctx=None):
189 stoprids, zstddctx=None):
190 """Resolve a delta chain for a file node."""
190 """Resolve a delta chain for a file node."""
191
191
192 # TODO the "not in ({stops})" here is possibly slowing down the query
192 # TODO the "not in ({stops})" here is possibly slowing down the query
193 # because it needs to perform the lookup on every recursive invocation.
193 # because it needs to perform the lookup on every recursive invocation.
194 # This could possibly be faster if we created a temporary query with
194 # This could possibly be faster if we created a temporary query with
195 # baseid "poisoned" to null and limited the recursive filter to
195 # baseid "poisoned" to null and limited the recursive filter to
196 # "is not null".
196 # "is not null".
197 res = db.execute(
197 res = db.execute(
198 r'WITH RECURSIVE '
198 r'WITH RECURSIVE '
199 r' deltachain(deltaid, baseid) AS ('
199 r' deltachain(deltaid, baseid) AS ('
200 r' SELECT deltaid, deltabaseid FROM fileindex '
200 r' SELECT deltaid, deltabaseid FROM fileindex '
201 r' WHERE pathid=? AND node=? '
201 r' WHERE pathid=? AND node=? '
202 r' UNION ALL '
202 r' UNION ALL '
203 r' SELECT fileindex.deltaid, deltabaseid '
203 r' SELECT fileindex.deltaid, deltabaseid '
204 r' FROM fileindex, deltachain '
204 r' FROM fileindex, deltachain '
205 r' WHERE '
205 r' WHERE '
206 r' fileindex.id=deltachain.baseid '
206 r' fileindex.id=deltachain.baseid '
207 r' AND deltachain.baseid IS NOT NULL '
207 r' AND deltachain.baseid IS NOT NULL '
208 r' AND fileindex.id NOT IN ({stops}) '
208 r' AND fileindex.id NOT IN ({stops}) '
209 r' ) '
209 r' ) '
210 r'SELECT deltachain.baseid, compression, delta '
210 r'SELECT deltachain.baseid, compression, delta '
211 r'FROM deltachain, delta '
211 r'FROM deltachain, delta '
212 r'WHERE delta.id=deltachain.deltaid'.format(
212 r'WHERE delta.id=deltachain.deltaid'.format(
213 stops=r','.join([r'?'] * len(stoprids))),
213 stops=r','.join([r'?'] * len(stoprids))),
214 tuple([pathid, node] + list(stoprids.keys())))
214 tuple([pathid, node] + list(stoprids.keys())))
215
215
216 deltas = []
216 deltas = []
217 lastdeltabaseid = None
217 lastdeltabaseid = None
218
218
219 for deltabaseid, compression, delta in res:
219 for deltabaseid, compression, delta in res:
220 lastdeltabaseid = deltabaseid
220 lastdeltabaseid = deltabaseid
221
221
222 if compression == COMPRESSION_ZSTD:
222 if compression == COMPRESSION_ZSTD:
223 delta = zstddctx.decompress(delta)
223 delta = zstddctx.decompress(delta)
224 elif compression == COMPRESSION_NONE:
224 elif compression == COMPRESSION_NONE:
225 delta = delta
225 delta = delta
226 elif compression == COMPRESSION_ZLIB:
226 elif compression == COMPRESSION_ZLIB:
227 delta = zlib.decompress(delta)
227 delta = zlib.decompress(delta)
228 else:
228 else:
229 raise SQLiteStoreError('unhandled compression type: %d' %
229 raise SQLiteStoreError('unhandled compression type: %d' %
230 compression)
230 compression)
231
231
232 deltas.append(delta)
232 deltas.append(delta)
233
233
234 if lastdeltabaseid in stoprids:
234 if lastdeltabaseid in stoprids:
235 basetext = revisioncache[stoprids[lastdeltabaseid]]
235 basetext = revisioncache[stoprids[lastdeltabaseid]]
236 else:
236 else:
237 basetext = deltas.pop()
237 basetext = deltas.pop()
238
238
239 deltas.reverse()
239 deltas.reverse()
240 fulltext = mdiff.patches(basetext, deltas)
240 fulltext = mdiff.patches(basetext, deltas)
241
241
242 # SQLite returns buffer instances for blob columns on Python 2. This
242 # SQLite returns buffer instances for blob columns on Python 2. This
243 # type can propagate through the delta application layer. Because
243 # type can propagate through the delta application layer. Because
244 # downstream callers assume revisions are bytes, cast as needed.
244 # downstream callers assume revisions are bytes, cast as needed.
245 if not isinstance(fulltext, bytes):
245 if not isinstance(fulltext, bytes):
246 fulltext = bytes(delta)
246 fulltext = bytes(delta)
247
247
248 return fulltext
248 return fulltext
249
249
250 def insertdelta(db, compression, hash, delta):
250 def insertdelta(db, compression, hash, delta):
251 try:
251 try:
252 return db.execute(
252 return db.execute(
253 r'INSERT INTO delta (compression, hash, delta) '
253 r'INSERT INTO delta (compression, hash, delta) '
254 r'VALUES (?, ?, ?)',
254 r'VALUES (?, ?, ?)',
255 (compression, hash, delta)).lastrowid
255 (compression, hash, delta)).lastrowid
256 except sqlite3.IntegrityError:
256 except sqlite3.IntegrityError:
257 return db.execute(
257 return db.execute(
258 r'SELECT id FROM delta WHERE hash=?',
258 r'SELECT id FROM delta WHERE hash=?',
259 (hash,)).fetchone()[0]
259 (hash,)).fetchone()[0]
260
260
261 class SQLiteStoreError(error.StorageError):
261 class SQLiteStoreError(error.StorageError):
262 pass
262 pass
263
263
264 @attr.s
264 @attr.s
265 class revisionentry(object):
265 class revisionentry(object):
266 rid = attr.ib()
266 rid = attr.ib()
267 rev = attr.ib()
267 rev = attr.ib()
268 node = attr.ib()
268 node = attr.ib()
269 p1rev = attr.ib()
269 p1rev = attr.ib()
270 p2rev = attr.ib()
270 p2rev = attr.ib()
271 p1node = attr.ib()
271 p1node = attr.ib()
272 p2node = attr.ib()
272 p2node = attr.ib()
273 linkrev = attr.ib()
273 linkrev = attr.ib()
274 flags = attr.ib()
274 flags = attr.ib()
275
275
276 @interfaceutil.implementer(repository.irevisiondelta)
276 @interfaceutil.implementer(repository.irevisiondelta)
277 @attr.s(slots=True)
277 @attr.s(slots=True)
278 class sqliterevisiondelta(object):
278 class sqliterevisiondelta(object):
279 node = attr.ib()
279 node = attr.ib()
280 p1node = attr.ib()
280 p1node = attr.ib()
281 p2node = attr.ib()
281 p2node = attr.ib()
282 basenode = attr.ib()
282 basenode = attr.ib()
283 flags = attr.ib()
283 flags = attr.ib()
284 baserevisionsize = attr.ib()
284 baserevisionsize = attr.ib()
285 revision = attr.ib()
285 revision = attr.ib()
286 delta = attr.ib()
286 delta = attr.ib()
287 linknode = attr.ib(default=None)
287 linknode = attr.ib(default=None)
288
288
289 @interfaceutil.implementer(repository.iverifyproblem)
289 @interfaceutil.implementer(repository.iverifyproblem)
290 @attr.s(frozen=True)
290 @attr.s(frozen=True)
291 class sqliteproblem(object):
291 class sqliteproblem(object):
292 warning = attr.ib(default=None)
292 warning = attr.ib(default=None)
293 error = attr.ib(default=None)
293 error = attr.ib(default=None)
294 node = attr.ib(default=None)
294 node = attr.ib(default=None)
295
295
296 @interfaceutil.implementer(repository.ifilestorage)
296 @interfaceutil.implementer(repository.ifilestorage)
297 class sqlitefilestore(object):
297 class sqlitefilestore(object):
298 """Implements storage for an individual tracked path."""
298 """Implements storage for an individual tracked path."""
299
299
300 def __init__(self, db, path, compression):
300 def __init__(self, db, path, compression):
301 self._db = db
301 self._db = db
302 self._path = path
302 self._path = path
303
303
304 self._pathid = None
304 self._pathid = None
305
305
306 # revnum -> node
306 # revnum -> node
307 self._revtonode = {}
307 self._revtonode = {}
308 # node -> revnum
308 # node -> revnum
309 self._nodetorev = {}
309 self._nodetorev = {}
310 # node -> data structure
310 # node -> data structure
311 self._revisions = {}
311 self._revisions = {}
312
312
313 self._revisioncache = util.lrucachedict(10)
313 self._revisioncache = util.lrucachedict(10)
314
314
315 self._compengine = compression
315 self._compengine = compression
316
316
317 if compression == 'zstd':
317 if compression == 'zstd':
318 self._cctx = zstd.ZstdCompressor(level=3)
318 self._cctx = zstd.ZstdCompressor(level=3)
319 self._dctx = zstd.ZstdDecompressor()
319 self._dctx = zstd.ZstdDecompressor()
320 else:
320 else:
321 self._cctx = None
321 self._cctx = None
322 self._dctx = None
322 self._dctx = None
323
323
324 self._refreshindex()
324 self._refreshindex()
325
325
326 def _refreshindex(self):
326 def _refreshindex(self):
327 self._revtonode = {}
327 self._revtonode = {}
328 self._nodetorev = {}
328 self._nodetorev = {}
329 self._revisions = {}
329 self._revisions = {}
330
330
331 res = list(self._db.execute(
331 res = list(self._db.execute(
332 r'SELECT id FROM filepath WHERE path=?', (self._path,)))
332 r'SELECT id FROM filepath WHERE path=?', (self._path,)))
333
333
334 if not res:
334 if not res:
335 self._pathid = None
335 self._pathid = None
336 return
336 return
337
337
338 self._pathid = res[0][0]
338 self._pathid = res[0][0]
339
339
340 res = self._db.execute(
340 res = self._db.execute(
341 r'SELECT id, revnum, node, p1rev, p2rev, linkrev, flags '
341 r'SELECT id, revnum, node, p1rev, p2rev, linkrev, flags '
342 r'FROM fileindex '
342 r'FROM fileindex '
343 r'WHERE pathid=? '
343 r'WHERE pathid=? '
344 r'ORDER BY revnum ASC',
344 r'ORDER BY revnum ASC',
345 (self._pathid,))
345 (self._pathid,))
346
346
347 for i, row in enumerate(res):
347 for i, row in enumerate(res):
348 rid, rev, node, p1rev, p2rev, linkrev, flags = row
348 rid, rev, node, p1rev, p2rev, linkrev, flags = row
349
349
350 if i != rev:
350 if i != rev:
351 raise SQLiteStoreError(_('sqlite database has inconsistent '
351 raise SQLiteStoreError(_('sqlite database has inconsistent '
352 'revision numbers'))
352 'revision numbers'))
353
353
354 if p1rev == nullrev:
354 if p1rev == nullrev:
355 p1node = nullid
355 p1node = nullid
356 else:
356 else:
357 p1node = self._revtonode[p1rev]
357 p1node = self._revtonode[p1rev]
358
358
359 if p2rev == nullrev:
359 if p2rev == nullrev:
360 p2node = nullid
360 p2node = nullid
361 else:
361 else:
362 p2node = self._revtonode[p2rev]
362 p2node = self._revtonode[p2rev]
363
363
364 entry = revisionentry(
364 entry = revisionentry(
365 rid=rid,
365 rid=rid,
366 rev=rev,
366 rev=rev,
367 node=node,
367 node=node,
368 p1rev=p1rev,
368 p1rev=p1rev,
369 p2rev=p2rev,
369 p2rev=p2rev,
370 p1node=p1node,
370 p1node=p1node,
371 p2node=p2node,
371 p2node=p2node,
372 linkrev=linkrev,
372 linkrev=linkrev,
373 flags=flags)
373 flags=flags)
374
374
375 self._revtonode[rev] = node
375 self._revtonode[rev] = node
376 self._nodetorev[node] = rev
376 self._nodetorev[node] = rev
377 self._revisions[node] = entry
377 self._revisions[node] = entry
378
378
379 # Start of ifileindex interface.
379 # Start of ifileindex interface.
380
380
381 def __len__(self):
381 def __len__(self):
382 return len(self._revisions)
382 return len(self._revisions)
383
383
384 def __iter__(self):
384 def __iter__(self):
385 return iter(pycompat.xrange(len(self._revisions)))
385 return iter(pycompat.xrange(len(self._revisions)))
386
386
387 def hasnode(self, node):
387 def hasnode(self, node):
388 if node == nullid:
388 if node == nullid:
389 return False
389 return False
390
390
391 return node in self._nodetorev
391 return node in self._nodetorev
392
392
393 def revs(self, start=0, stop=None):
393 def revs(self, start=0, stop=None):
394 return storageutil.iterrevs(len(self._revisions), start=start,
394 return storageutil.iterrevs(len(self._revisions), start=start,
395 stop=stop)
395 stop=stop)
396
396
397 def parents(self, node):
397 def parents(self, node):
398 if node == nullid:
398 if node == nullid:
399 return nullid, nullid
399 return nullid, nullid
400
400
401 if node not in self._revisions:
401 if node not in self._revisions:
402 raise error.LookupError(node, self._path, _('no node'))
402 raise error.LookupError(node, self._path, _('no node'))
403
403
404 entry = self._revisions[node]
404 entry = self._revisions[node]
405 return entry.p1node, entry.p2node
405 return entry.p1node, entry.p2node
406
406
407 def parentrevs(self, rev):
407 def parentrevs(self, rev):
408 if rev == nullrev:
408 if rev == nullrev:
409 return nullrev, nullrev
409 return nullrev, nullrev
410
410
411 if rev not in self._revtonode:
411 if rev not in self._revtonode:
412 raise IndexError(rev)
412 raise IndexError(rev)
413
413
414 entry = self._revisions[self._revtonode[rev]]
414 entry = self._revisions[self._revtonode[rev]]
415 return entry.p1rev, entry.p2rev
415 return entry.p1rev, entry.p2rev
416
416
417 def rev(self, node):
417 def rev(self, node):
418 if node == nullid:
418 if node == nullid:
419 return nullrev
419 return nullrev
420
420
421 if node not in self._nodetorev:
421 if node not in self._nodetorev:
422 raise error.LookupError(node, self._path, _('no node'))
422 raise error.LookupError(node, self._path, _('no node'))
423
423
424 return self._nodetorev[node]
424 return self._nodetorev[node]
425
425
426 def node(self, rev):
426 def node(self, rev):
427 if rev == nullrev:
427 if rev == nullrev:
428 return nullid
428 return nullid
429
429
430 if rev not in self._revtonode:
430 if rev not in self._revtonode:
431 raise IndexError(rev)
431 raise IndexError(rev)
432
432
433 return self._revtonode[rev]
433 return self._revtonode[rev]
434
434
435 def lookup(self, node):
435 def lookup(self, node):
436 return storageutil.fileidlookup(self, node, self._path)
436 return storageutil.fileidlookup(self, node, self._path)
437
437
438 def linkrev(self, rev):
438 def linkrev(self, rev):
439 if rev == nullrev:
439 if rev == nullrev:
440 return nullrev
440 return nullrev
441
441
442 if rev not in self._revtonode:
442 if rev not in self._revtonode:
443 raise IndexError(rev)
443 raise IndexError(rev)
444
444
445 entry = self._revisions[self._revtonode[rev]]
445 entry = self._revisions[self._revtonode[rev]]
446 return entry.linkrev
446 return entry.linkrev
447
447
448 def iscensored(self, rev):
448 def iscensored(self, rev):
449 if rev == nullrev:
449 if rev == nullrev:
450 return False
450 return False
451
451
452 if rev not in self._revtonode:
452 if rev not in self._revtonode:
453 raise IndexError(rev)
453 raise IndexError(rev)
454
454
455 return self._revisions[self._revtonode[rev]].flags & FLAG_CENSORED
455 return self._revisions[self._revtonode[rev]].flags & FLAG_CENSORED
456
456
457 def commonancestorsheads(self, node1, node2):
457 def commonancestorsheads(self, node1, node2):
458 rev1 = self.rev(node1)
458 rev1 = self.rev(node1)
459 rev2 = self.rev(node2)
459 rev2 = self.rev(node2)
460
460
461 ancestors = ancestor.commonancestorsheads(self.parentrevs, rev1, rev2)
461 ancestors = ancestor.commonancestorsheads(self.parentrevs, rev1, rev2)
462 return pycompat.maplist(self.node, ancestors)
462 return pycompat.maplist(self.node, ancestors)
463
463
464 def descendants(self, revs):
464 def descendants(self, revs):
465 # TODO we could implement this using a recursive SQL query, which
465 # TODO we could implement this using a recursive SQL query, which
466 # might be faster.
466 # might be faster.
467 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
467 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
468
468
469 def heads(self, start=None, stop=None):
469 def heads(self, start=None, stop=None):
470 if start is None and stop is None:
470 if start is None and stop is None:
471 if not len(self):
471 if not len(self):
472 return [nullid]
472 return [nullid]
473
473
474 startrev = self.rev(start) if start is not None else nullrev
474 startrev = self.rev(start) if start is not None else nullrev
475 stoprevs = {self.rev(n) for n in stop or []}
475 stoprevs = {self.rev(n) for n in stop or []}
476
476
477 revs = dagop.headrevssubset(self.revs, self.parentrevs,
477 revs = dagop.headrevssubset(self.revs, self.parentrevs,
478 startrev=startrev, stoprevs=stoprevs)
478 startrev=startrev, stoprevs=stoprevs)
479
479
480 return [self.node(rev) for rev in revs]
480 return [self.node(rev) for rev in revs]
481
481
482 def children(self, node):
482 def children(self, node):
483 rev = self.rev(node)
483 rev = self.rev(node)
484
484
485 res = self._db.execute(
485 res = self._db.execute(
486 r'SELECT'
486 r'SELECT'
487 r' node '
487 r' node '
488 r' FROM filedata '
488 r' FROM filedata '
489 r' WHERE path=? AND (p1rev=? OR p2rev=?) '
489 r' WHERE path=? AND (p1rev=? OR p2rev=?) '
490 r' ORDER BY revnum ASC',
490 r' ORDER BY revnum ASC',
491 (self._path, rev, rev))
491 (self._path, rev, rev))
492
492
493 return [row[0] for row in res]
493 return [row[0] for row in res]
494
494
495 # End of ifileindex interface.
495 # End of ifileindex interface.
496
496
497 # Start of ifiledata interface.
497 # Start of ifiledata interface.
498
498
499 def size(self, rev):
499 def size(self, rev):
500 if rev == nullrev:
500 if rev == nullrev:
501 return 0
501 return 0
502
502
503 if rev not in self._revtonode:
503 if rev not in self._revtonode:
504 raise IndexError(rev)
504 raise IndexError(rev)
505
505
506 node = self._revtonode[rev]
506 node = self._revtonode[rev]
507
507
508 if self.renamed(node):
508 if self.renamed(node):
509 return len(self.read(node))
509 return len(self.read(node))
510
510
511 return len(self.revision(node))
511 return len(self.revision(node))
512
512
513 def revision(self, node, raw=False, _verifyhash=True):
513 def revision(self, node, raw=False, _verifyhash=True):
514 if node in (nullid, nullrev):
514 if node in (nullid, nullrev):
515 return b''
515 return b''
516
516
517 if isinstance(node, int):
517 if isinstance(node, int):
518 node = self.node(node)
518 node = self.node(node)
519
519
520 if node not in self._nodetorev:
520 if node not in self._nodetorev:
521 raise error.LookupError(node, self._path, _('no node'))
521 raise error.LookupError(node, self._path, _('no node'))
522
522
523 if node in self._revisioncache:
523 if node in self._revisioncache:
524 return self._revisioncache[node]
524 return self._revisioncache[node]
525
525
526 # Because we have a fulltext revision cache, we are able to
526 # Because we have a fulltext revision cache, we are able to
527 # short-circuit delta chain traversal and decompression as soon as
527 # short-circuit delta chain traversal and decompression as soon as
528 # we encounter a revision in the cache.
528 # we encounter a revision in the cache.
529
529
530 stoprids = {self._revisions[n].rid: n
530 stoprids = {self._revisions[n].rid: n
531 for n in self._revisioncache}
531 for n in self._revisioncache}
532
532
533 if not stoprids:
533 if not stoprids:
534 stoprids[-1] = None
534 stoprids[-1] = None
535
535
536 fulltext = resolvedeltachain(self._db, self._pathid, node,
536 fulltext = resolvedeltachain(self._db, self._pathid, node,
537 self._revisioncache, stoprids,
537 self._revisioncache, stoprids,
538 zstddctx=self._dctx)
538 zstddctx=self._dctx)
539
539
540 # Don't verify hashes if parent nodes were rewritten, as the hash
540 # Don't verify hashes if parent nodes were rewritten, as the hash
541 # wouldn't verify.
541 # wouldn't verify.
542 if self._revisions[node].flags & (FLAG_MISSING_P1 | FLAG_MISSING_P2):
542 if self._revisions[node].flags & (FLAG_MISSING_P1 | FLAG_MISSING_P2):
543 _verifyhash = False
543 _verifyhash = False
544
544
545 if _verifyhash:
545 if _verifyhash:
546 self._checkhash(fulltext, node)
546 self._checkhash(fulltext, node)
547 self._revisioncache[node] = fulltext
547 self._revisioncache[node] = fulltext
548
548
549 return fulltext
549 return fulltext
550
550
551 def read(self, node):
551 def read(self, node):
552 return storageutil.filtermetadata(self.revision(node))
552 return storageutil.filtermetadata(self.revision(node))
553
553
554 def renamed(self, node):
554 def renamed(self, node):
555 return storageutil.filerevisioncopied(self, node)
555 return storageutil.filerevisioncopied(self, node)
556
556
557 def cmp(self, node, fulltext):
557 def cmp(self, node, fulltext):
558 return not storageutil.filedataequivalent(self, node, fulltext)
558 return not storageutil.filedataequivalent(self, node, fulltext)
559
559
560 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
560 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
561 assumehaveparentrevisions=False, deltaprevious=False):
561 assumehaveparentrevisions=False, deltaprevious=False):
562 if nodesorder not in ('nodes', 'storage', None):
562 if nodesorder not in ('nodes', 'storage', 'linear', None):
563 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
563 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
564 nodesorder)
564 nodesorder)
565
565
566 nodes = [n for n in nodes if n != nullid]
566 nodes = [n for n in nodes if n != nullid]
567
567
568 if not nodes:
568 if not nodes:
569 return
569 return
570
570
571 # TODO perform in a single query.
571 # TODO perform in a single query.
572 res = self._db.execute(
572 res = self._db.execute(
573 r'SELECT revnum, deltaid FROM fileindex '
573 r'SELECT revnum, deltaid FROM fileindex '
574 r'WHERE pathid=? '
574 r'WHERE pathid=? '
575 r' AND node in (%s)' % (r','.join([r'?'] * len(nodes))),
575 r' AND node in (%s)' % (r','.join([r'?'] * len(nodes))),
576 tuple([self._pathid] + nodes))
576 tuple([self._pathid] + nodes))
577
577
578 deltabases = {}
578 deltabases = {}
579
579
580 for rev, deltaid in res:
580 for rev, deltaid in res:
581 res = self._db.execute(
581 res = self._db.execute(
582 r'SELECT revnum from fileindex WHERE pathid=? AND deltaid=?',
582 r'SELECT revnum from fileindex WHERE pathid=? AND deltaid=?',
583 (self._pathid, deltaid))
583 (self._pathid, deltaid))
584 deltabases[rev] = res.fetchone()[0]
584 deltabases[rev] = res.fetchone()[0]
585
585
586 # TODO define revdifffn so we can use delta from storage.
586 # TODO define revdifffn so we can use delta from storage.
587 for delta in storageutil.emitrevisions(
587 for delta in storageutil.emitrevisions(
588 self, nodes, nodesorder, sqliterevisiondelta,
588 self, nodes, nodesorder, sqliterevisiondelta,
589 deltaparentfn=deltabases.__getitem__,
589 deltaparentfn=deltabases.__getitem__,
590 revisiondata=revisiondata,
590 revisiondata=revisiondata,
591 assumehaveparentrevisions=assumehaveparentrevisions,
591 assumehaveparentrevisions=assumehaveparentrevisions,
592 deltaprevious=deltaprevious):
592 deltaprevious=deltaprevious):
593
593
594 yield delta
594 yield delta
595
595
596 # End of ifiledata interface.
596 # End of ifiledata interface.
597
597
598 # Start of ifilemutation interface.
598 # Start of ifilemutation interface.
599
599
600 def add(self, filedata, meta, transaction, linkrev, p1, p2):
600 def add(self, filedata, meta, transaction, linkrev, p1, p2):
601 if meta or filedata.startswith(b'\x01\n'):
601 if meta or filedata.startswith(b'\x01\n'):
602 filedata = storageutil.packmeta(meta, filedata)
602 filedata = storageutil.packmeta(meta, filedata)
603
603
604 return self.addrevision(filedata, transaction, linkrev, p1, p2)
604 return self.addrevision(filedata, transaction, linkrev, p1, p2)
605
605
606 def addrevision(self, revisiondata, transaction, linkrev, p1, p2, node=None,
606 def addrevision(self, revisiondata, transaction, linkrev, p1, p2, node=None,
607 flags=0, cachedelta=None):
607 flags=0, cachedelta=None):
608 if flags:
608 if flags:
609 raise SQLiteStoreError(_('flags not supported on revisions'))
609 raise SQLiteStoreError(_('flags not supported on revisions'))
610
610
611 validatehash = node is not None
611 validatehash = node is not None
612 node = node or storageutil.hashrevisionsha1(revisiondata, p1, p2)
612 node = node or storageutil.hashrevisionsha1(revisiondata, p1, p2)
613
613
614 if validatehash:
614 if validatehash:
615 self._checkhash(revisiondata, node, p1, p2)
615 self._checkhash(revisiondata, node, p1, p2)
616
616
617 if node in self._nodetorev:
617 if node in self._nodetorev:
618 return node
618 return node
619
619
620 node = self._addrawrevision(node, revisiondata, transaction, linkrev,
620 node = self._addrawrevision(node, revisiondata, transaction, linkrev,
621 p1, p2)
621 p1, p2)
622
622
623 self._revisioncache[node] = revisiondata
623 self._revisioncache[node] = revisiondata
624 return node
624 return node
625
625
626 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None,
626 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None,
627 maybemissingparents=False):
627 maybemissingparents=False):
628 nodes = []
628 nodes = []
629
629
630 for node, p1, p2, linknode, deltabase, delta, wireflags in deltas:
630 for node, p1, p2, linknode, deltabase, delta, wireflags in deltas:
631 storeflags = 0
631 storeflags = 0
632
632
633 if wireflags & repository.REVISION_FLAG_CENSORED:
633 if wireflags & repository.REVISION_FLAG_CENSORED:
634 storeflags |= FLAG_CENSORED
634 storeflags |= FLAG_CENSORED
635
635
636 if wireflags & ~repository.REVISION_FLAG_CENSORED:
636 if wireflags & ~repository.REVISION_FLAG_CENSORED:
637 raise SQLiteStoreError('unhandled revision flag')
637 raise SQLiteStoreError('unhandled revision flag')
638
638
639 if maybemissingparents:
639 if maybemissingparents:
640 if p1 != nullid and not self.hasnode(p1):
640 if p1 != nullid and not self.hasnode(p1):
641 p1 = nullid
641 p1 = nullid
642 storeflags |= FLAG_MISSING_P1
642 storeflags |= FLAG_MISSING_P1
643
643
644 if p2 != nullid and not self.hasnode(p2):
644 if p2 != nullid and not self.hasnode(p2):
645 p2 = nullid
645 p2 = nullid
646 storeflags |= FLAG_MISSING_P2
646 storeflags |= FLAG_MISSING_P2
647
647
648 baserev = self.rev(deltabase)
648 baserev = self.rev(deltabase)
649
649
650 # If base is censored, delta must be full replacement in a single
650 # If base is censored, delta must be full replacement in a single
651 # patch operation.
651 # patch operation.
652 if baserev != nullrev and self.iscensored(baserev):
652 if baserev != nullrev and self.iscensored(baserev):
653 hlen = struct.calcsize('>lll')
653 hlen = struct.calcsize('>lll')
654 oldlen = len(self.revision(deltabase, raw=True,
654 oldlen = len(self.revision(deltabase, raw=True,
655 _verifyhash=False))
655 _verifyhash=False))
656 newlen = len(delta) - hlen
656 newlen = len(delta) - hlen
657
657
658 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
658 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
659 raise error.CensoredBaseError(self._path,
659 raise error.CensoredBaseError(self._path,
660 deltabase)
660 deltabase)
661
661
662 if (not (storeflags & FLAG_CENSORED)
662 if (not (storeflags & FLAG_CENSORED)
663 and storageutil.deltaiscensored(
663 and storageutil.deltaiscensored(
664 delta, baserev, lambda x: len(self.revision(x, raw=True)))):
664 delta, baserev, lambda x: len(self.revision(x, raw=True)))):
665 storeflags |= FLAG_CENSORED
665 storeflags |= FLAG_CENSORED
666
666
667 linkrev = linkmapper(linknode)
667 linkrev = linkmapper(linknode)
668
668
669 nodes.append(node)
669 nodes.append(node)
670
670
671 if node in self._revisions:
671 if node in self._revisions:
672 # Possibly reset parents to make them proper.
672 # Possibly reset parents to make them proper.
673 entry = self._revisions[node]
673 entry = self._revisions[node]
674
674
675 if entry.flags & FLAG_MISSING_P1 and p1 != nullid:
675 if entry.flags & FLAG_MISSING_P1 and p1 != nullid:
676 entry.p1node = p1
676 entry.p1node = p1
677 entry.p1rev = self._nodetorev[p1]
677 entry.p1rev = self._nodetorev[p1]
678 entry.flags &= ~FLAG_MISSING_P1
678 entry.flags &= ~FLAG_MISSING_P1
679
679
680 self._db.execute(
680 self._db.execute(
681 r'UPDATE fileindex SET p1rev=?, flags=? '
681 r'UPDATE fileindex SET p1rev=?, flags=? '
682 r'WHERE id=?',
682 r'WHERE id=?',
683 (self._nodetorev[p1], entry.flags, entry.rid))
683 (self._nodetorev[p1], entry.flags, entry.rid))
684
684
685 if entry.flags & FLAG_MISSING_P2 and p2 != nullid:
685 if entry.flags & FLAG_MISSING_P2 and p2 != nullid:
686 entry.p2node = p2
686 entry.p2node = p2
687 entry.p2rev = self._nodetorev[p2]
687 entry.p2rev = self._nodetorev[p2]
688 entry.flags &= ~FLAG_MISSING_P2
688 entry.flags &= ~FLAG_MISSING_P2
689
689
690 self._db.execute(
690 self._db.execute(
691 r'UPDATE fileindex SET p2rev=?, flags=? '
691 r'UPDATE fileindex SET p2rev=?, flags=? '
692 r'WHERE id=?',
692 r'WHERE id=?',
693 (self._nodetorev[p1], entry.flags, entry.rid))
693 (self._nodetorev[p1], entry.flags, entry.rid))
694
694
695 continue
695 continue
696
696
697 if deltabase == nullid:
697 if deltabase == nullid:
698 text = mdiff.patch(b'', delta)
698 text = mdiff.patch(b'', delta)
699 storedelta = None
699 storedelta = None
700 else:
700 else:
701 text = None
701 text = None
702 storedelta = (deltabase, delta)
702 storedelta = (deltabase, delta)
703
703
704 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
704 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
705 storedelta=storedelta, flags=storeflags)
705 storedelta=storedelta, flags=storeflags)
706
706
707 if addrevisioncb:
707 if addrevisioncb:
708 addrevisioncb(self, node)
708 addrevisioncb(self, node)
709
709
710 return nodes
710 return nodes
711
711
712 def censorrevision(self, tr, censornode, tombstone=b''):
712 def censorrevision(self, tr, censornode, tombstone=b''):
713 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
713 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
714
714
715 # This restriction is cargo culted from revlogs and makes no sense for
715 # This restriction is cargo culted from revlogs and makes no sense for
716 # SQLite, since columns can be resized at will.
716 # SQLite, since columns can be resized at will.
717 if len(tombstone) > len(self.revision(censornode, raw=True)):
717 if len(tombstone) > len(self.revision(censornode, raw=True)):
718 raise error.Abort(_('censor tombstone must be no longer than '
718 raise error.Abort(_('censor tombstone must be no longer than '
719 'censored data'))
719 'censored data'))
720
720
721 # We need to replace the censored revision's data with the tombstone.
721 # We need to replace the censored revision's data with the tombstone.
722 # But replacing that data will have implications for delta chains that
722 # But replacing that data will have implications for delta chains that
723 # reference it.
723 # reference it.
724 #
724 #
725 # While "better," more complex strategies are possible, we do something
725 # While "better," more complex strategies are possible, we do something
726 # simple: we find delta chain children of the censored revision and we
726 # simple: we find delta chain children of the censored revision and we
727 # replace those incremental deltas with fulltexts of their corresponding
727 # replace those incremental deltas with fulltexts of their corresponding
728 # revision. Then we delete the now-unreferenced delta and original
728 # revision. Then we delete the now-unreferenced delta and original
729 # revision and insert a replacement.
729 # revision and insert a replacement.
730
730
731 # Find the delta to be censored.
731 # Find the delta to be censored.
732 censoreddeltaid = self._db.execute(
732 censoreddeltaid = self._db.execute(
733 r'SELECT deltaid FROM fileindex WHERE id=?',
733 r'SELECT deltaid FROM fileindex WHERE id=?',
734 (self._revisions[censornode].rid,)).fetchone()[0]
734 (self._revisions[censornode].rid,)).fetchone()[0]
735
735
736 # Find all its delta chain children.
736 # Find all its delta chain children.
737 # TODO once we support storing deltas for !files, we'll need to look
737 # TODO once we support storing deltas for !files, we'll need to look
738 # for those delta chains too.
738 # for those delta chains too.
739 rows = list(self._db.execute(
739 rows = list(self._db.execute(
740 r'SELECT id, pathid, node FROM fileindex '
740 r'SELECT id, pathid, node FROM fileindex '
741 r'WHERE deltabaseid=? OR deltaid=?',
741 r'WHERE deltabaseid=? OR deltaid=?',
742 (censoreddeltaid, censoreddeltaid)))
742 (censoreddeltaid, censoreddeltaid)))
743
743
744 for row in rows:
744 for row in rows:
745 rid, pathid, node = row
745 rid, pathid, node = row
746
746
747 fulltext = resolvedeltachain(self._db, pathid, node, {}, {-1: None},
747 fulltext = resolvedeltachain(self._db, pathid, node, {}, {-1: None},
748 zstddctx=self._dctx)
748 zstddctx=self._dctx)
749
749
750 deltahash = hashlib.sha1(fulltext).digest()
750 deltahash = hashlib.sha1(fulltext).digest()
751
751
752 if self._compengine == 'zstd':
752 if self._compengine == 'zstd':
753 deltablob = self._cctx.compress(fulltext)
753 deltablob = self._cctx.compress(fulltext)
754 compression = COMPRESSION_ZSTD
754 compression = COMPRESSION_ZSTD
755 elif self._compengine == 'zlib':
755 elif self._compengine == 'zlib':
756 deltablob = zlib.compress(fulltext)
756 deltablob = zlib.compress(fulltext)
757 compression = COMPRESSION_ZLIB
757 compression = COMPRESSION_ZLIB
758 elif self._compengine == 'none':
758 elif self._compengine == 'none':
759 deltablob = fulltext
759 deltablob = fulltext
760 compression = COMPRESSION_NONE
760 compression = COMPRESSION_NONE
761 else:
761 else:
762 raise error.ProgrammingError('unhandled compression engine: %s'
762 raise error.ProgrammingError('unhandled compression engine: %s'
763 % self._compengine)
763 % self._compengine)
764
764
765 if len(deltablob) >= len(fulltext):
765 if len(deltablob) >= len(fulltext):
766 deltablob = fulltext
766 deltablob = fulltext
767 compression = COMPRESSION_NONE
767 compression = COMPRESSION_NONE
768
768
769 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
769 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
770
770
771 self._db.execute(
771 self._db.execute(
772 r'UPDATE fileindex SET deltaid=?, deltabaseid=NULL '
772 r'UPDATE fileindex SET deltaid=?, deltabaseid=NULL '
773 r'WHERE id=?', (deltaid, rid))
773 r'WHERE id=?', (deltaid, rid))
774
774
775 # Now create the tombstone delta and replace the delta on the censored
775 # Now create the tombstone delta and replace the delta on the censored
776 # node.
776 # node.
777 deltahash = hashlib.sha1(tombstone).digest()
777 deltahash = hashlib.sha1(tombstone).digest()
778 tombstonedeltaid = insertdelta(self._db, COMPRESSION_NONE,
778 tombstonedeltaid = insertdelta(self._db, COMPRESSION_NONE,
779 deltahash, tombstone)
779 deltahash, tombstone)
780
780
781 flags = self._revisions[censornode].flags
781 flags = self._revisions[censornode].flags
782 flags |= FLAG_CENSORED
782 flags |= FLAG_CENSORED
783
783
784 self._db.execute(
784 self._db.execute(
785 r'UPDATE fileindex SET flags=?, deltaid=?, deltabaseid=NULL '
785 r'UPDATE fileindex SET flags=?, deltaid=?, deltabaseid=NULL '
786 r'WHERE pathid=? AND node=?',
786 r'WHERE pathid=? AND node=?',
787 (flags, tombstonedeltaid, self._pathid, censornode))
787 (flags, tombstonedeltaid, self._pathid, censornode))
788
788
789 self._db.execute(
789 self._db.execute(
790 r'DELETE FROM delta WHERE id=?', (censoreddeltaid,))
790 r'DELETE FROM delta WHERE id=?', (censoreddeltaid,))
791
791
792 self._refreshindex()
792 self._refreshindex()
793 self._revisioncache.clear()
793 self._revisioncache.clear()
794
794
795 def getstrippoint(self, minlink):
795 def getstrippoint(self, minlink):
796 return storageutil.resolvestripinfo(minlink, len(self) - 1,
796 return storageutil.resolvestripinfo(minlink, len(self) - 1,
797 [self.rev(n) for n in self.heads()],
797 [self.rev(n) for n in self.heads()],
798 self.linkrev,
798 self.linkrev,
799 self.parentrevs)
799 self.parentrevs)
800
800
801 def strip(self, minlink, transaction):
801 def strip(self, minlink, transaction):
802 if not len(self):
802 if not len(self):
803 return
803 return
804
804
805 rev, _ignored = self.getstrippoint(minlink)
805 rev, _ignored = self.getstrippoint(minlink)
806
806
807 if rev == len(self):
807 if rev == len(self):
808 return
808 return
809
809
810 for rev in self.revs(rev):
810 for rev in self.revs(rev):
811 self._db.execute(
811 self._db.execute(
812 r'DELETE FROM fileindex WHERE pathid=? AND node=?',
812 r'DELETE FROM fileindex WHERE pathid=? AND node=?',
813 (self._pathid, self.node(rev)))
813 (self._pathid, self.node(rev)))
814
814
815 # TODO how should we garbage collect data in delta table?
815 # TODO how should we garbage collect data in delta table?
816
816
817 self._refreshindex()
817 self._refreshindex()
818
818
819 # End of ifilemutation interface.
819 # End of ifilemutation interface.
820
820
821 # Start of ifilestorage interface.
821 # Start of ifilestorage interface.
822
822
823 def files(self):
823 def files(self):
824 return []
824 return []
825
825
826 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
826 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
827 revisionscount=False, trackedsize=False,
827 revisionscount=False, trackedsize=False,
828 storedsize=False):
828 storedsize=False):
829 d = {}
829 d = {}
830
830
831 if exclusivefiles:
831 if exclusivefiles:
832 d['exclusivefiles'] = []
832 d['exclusivefiles'] = []
833
833
834 if sharedfiles:
834 if sharedfiles:
835 # TODO list sqlite file(s) here.
835 # TODO list sqlite file(s) here.
836 d['sharedfiles'] = []
836 d['sharedfiles'] = []
837
837
838 if revisionscount:
838 if revisionscount:
839 d['revisionscount'] = len(self)
839 d['revisionscount'] = len(self)
840
840
841 if trackedsize:
841 if trackedsize:
842 d['trackedsize'] = sum(len(self.revision(node))
842 d['trackedsize'] = sum(len(self.revision(node))
843 for node in self._nodetorev)
843 for node in self._nodetorev)
844
844
845 if storedsize:
845 if storedsize:
846 # TODO implement this?
846 # TODO implement this?
847 d['storedsize'] = None
847 d['storedsize'] = None
848
848
849 return d
849 return d
850
850
851 def verifyintegrity(self, state):
851 def verifyintegrity(self, state):
852 state['skipread'] = set()
852 state['skipread'] = set()
853
853
854 for rev in self:
854 for rev in self:
855 node = self.node(rev)
855 node = self.node(rev)
856
856
857 try:
857 try:
858 self.revision(node)
858 self.revision(node)
859 except Exception as e:
859 except Exception as e:
860 yield sqliteproblem(
860 yield sqliteproblem(
861 error=_('unpacking %s: %s') % (short(node), e),
861 error=_('unpacking %s: %s') % (short(node), e),
862 node=node)
862 node=node)
863
863
864 state['skipread'].add(node)
864 state['skipread'].add(node)
865
865
866 # End of ifilestorage interface.
866 # End of ifilestorage interface.
867
867
868 def _checkhash(self, fulltext, node, p1=None, p2=None):
868 def _checkhash(self, fulltext, node, p1=None, p2=None):
869 if p1 is None and p2 is None:
869 if p1 is None and p2 is None:
870 p1, p2 = self.parents(node)
870 p1, p2 = self.parents(node)
871
871
872 if node == storageutil.hashrevisionsha1(fulltext, p1, p2):
872 if node == storageutil.hashrevisionsha1(fulltext, p1, p2):
873 return
873 return
874
874
875 try:
875 try:
876 del self._revisioncache[node]
876 del self._revisioncache[node]
877 except KeyError:
877 except KeyError:
878 pass
878 pass
879
879
880 if storageutil.iscensoredtext(fulltext):
880 if storageutil.iscensoredtext(fulltext):
881 raise error.CensoredNodeError(self._path, node, fulltext)
881 raise error.CensoredNodeError(self._path, node, fulltext)
882
882
883 raise SQLiteStoreError(_('integrity check failed on %s') %
883 raise SQLiteStoreError(_('integrity check failed on %s') %
884 self._path)
884 self._path)
885
885
886 def _addrawrevision(self, node, revisiondata, transaction, linkrev,
886 def _addrawrevision(self, node, revisiondata, transaction, linkrev,
887 p1, p2, storedelta=None, flags=0):
887 p1, p2, storedelta=None, flags=0):
888 if self._pathid is None:
888 if self._pathid is None:
889 res = self._db.execute(
889 res = self._db.execute(
890 r'INSERT INTO filepath (path) VALUES (?)', (self._path,))
890 r'INSERT INTO filepath (path) VALUES (?)', (self._path,))
891 self._pathid = res.lastrowid
891 self._pathid = res.lastrowid
892
892
893 # For simplicity, always store a delta against p1.
893 # For simplicity, always store a delta against p1.
894 # TODO we need a lot more logic here to make behavior reasonable.
894 # TODO we need a lot more logic here to make behavior reasonable.
895
895
896 if storedelta:
896 if storedelta:
897 deltabase, delta = storedelta
897 deltabase, delta = storedelta
898
898
899 if isinstance(deltabase, int):
899 if isinstance(deltabase, int):
900 deltabase = self.node(deltabase)
900 deltabase = self.node(deltabase)
901
901
902 else:
902 else:
903 assert revisiondata is not None
903 assert revisiondata is not None
904 deltabase = p1
904 deltabase = p1
905
905
906 if deltabase == nullid:
906 if deltabase == nullid:
907 delta = revisiondata
907 delta = revisiondata
908 else:
908 else:
909 delta = mdiff.textdiff(self.revision(self.rev(deltabase)),
909 delta = mdiff.textdiff(self.revision(self.rev(deltabase)),
910 revisiondata)
910 revisiondata)
911
911
912 # File index stores a pointer to its delta and the parent delta.
912 # File index stores a pointer to its delta and the parent delta.
913 # The parent delta is stored via a pointer to the fileindex PK.
913 # The parent delta is stored via a pointer to the fileindex PK.
914 if deltabase == nullid:
914 if deltabase == nullid:
915 baseid = None
915 baseid = None
916 else:
916 else:
917 baseid = self._revisions[deltabase].rid
917 baseid = self._revisions[deltabase].rid
918
918
919 # Deltas are stored with a hash of their content. This allows
919 # Deltas are stored with a hash of their content. This allows
920 # us to de-duplicate. The table is configured to ignore conflicts
920 # us to de-duplicate. The table is configured to ignore conflicts
921 # and it is faster to just insert and silently noop than to look
921 # and it is faster to just insert and silently noop than to look
922 # first.
922 # first.
923 deltahash = hashlib.sha1(delta).digest()
923 deltahash = hashlib.sha1(delta).digest()
924
924
925 if self._compengine == 'zstd':
925 if self._compengine == 'zstd':
926 deltablob = self._cctx.compress(delta)
926 deltablob = self._cctx.compress(delta)
927 compression = COMPRESSION_ZSTD
927 compression = COMPRESSION_ZSTD
928 elif self._compengine == 'zlib':
928 elif self._compengine == 'zlib':
929 deltablob = zlib.compress(delta)
929 deltablob = zlib.compress(delta)
930 compression = COMPRESSION_ZLIB
930 compression = COMPRESSION_ZLIB
931 elif self._compengine == 'none':
931 elif self._compengine == 'none':
932 deltablob = delta
932 deltablob = delta
933 compression = COMPRESSION_NONE
933 compression = COMPRESSION_NONE
934 else:
934 else:
935 raise error.ProgrammingError('unhandled compression engine: %s' %
935 raise error.ProgrammingError('unhandled compression engine: %s' %
936 self._compengine)
936 self._compengine)
937
937
938 # Don't store compressed data if it isn't practical.
938 # Don't store compressed data if it isn't practical.
939 if len(deltablob) >= len(delta):
939 if len(deltablob) >= len(delta):
940 deltablob = delta
940 deltablob = delta
941 compression = COMPRESSION_NONE
941 compression = COMPRESSION_NONE
942
942
943 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
943 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
944
944
945 rev = len(self)
945 rev = len(self)
946
946
947 if p1 == nullid:
947 if p1 == nullid:
948 p1rev = nullrev
948 p1rev = nullrev
949 else:
949 else:
950 p1rev = self._nodetorev[p1]
950 p1rev = self._nodetorev[p1]
951
951
952 if p2 == nullid:
952 if p2 == nullid:
953 p2rev = nullrev
953 p2rev = nullrev
954 else:
954 else:
955 p2rev = self._nodetorev[p2]
955 p2rev = self._nodetorev[p2]
956
956
957 rid = self._db.execute(
957 rid = self._db.execute(
958 r'INSERT INTO fileindex ('
958 r'INSERT INTO fileindex ('
959 r' pathid, revnum, node, p1rev, p2rev, linkrev, flags, '
959 r' pathid, revnum, node, p1rev, p2rev, linkrev, flags, '
960 r' deltaid, deltabaseid) '
960 r' deltaid, deltabaseid) '
961 r' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
961 r' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
962 (self._pathid, rev, node, p1rev, p2rev, linkrev, flags,
962 (self._pathid, rev, node, p1rev, p2rev, linkrev, flags,
963 deltaid, baseid)
963 deltaid, baseid)
964 ).lastrowid
964 ).lastrowid
965
965
966 entry = revisionentry(
966 entry = revisionentry(
967 rid=rid,
967 rid=rid,
968 rev=rev,
968 rev=rev,
969 node=node,
969 node=node,
970 p1rev=p1rev,
970 p1rev=p1rev,
971 p2rev=p2rev,
971 p2rev=p2rev,
972 p1node=p1,
972 p1node=p1,
973 p2node=p2,
973 p2node=p2,
974 linkrev=linkrev,
974 linkrev=linkrev,
975 flags=flags)
975 flags=flags)
976
976
977 self._nodetorev[node] = rev
977 self._nodetorev[node] = rev
978 self._revtonode[rev] = node
978 self._revtonode[rev] = node
979 self._revisions[node] = entry
979 self._revisions[node] = entry
980
980
981 return node
981 return node
982
982
983 class sqliterepository(localrepo.localrepository):
983 class sqliterepository(localrepo.localrepository):
984 def cancopy(self):
984 def cancopy(self):
985 return False
985 return False
986
986
987 def transaction(self, *args, **kwargs):
987 def transaction(self, *args, **kwargs):
988 current = self.currenttransaction()
988 current = self.currenttransaction()
989
989
990 tr = super(sqliterepository, self).transaction(*args, **kwargs)
990 tr = super(sqliterepository, self).transaction(*args, **kwargs)
991
991
992 if current:
992 if current:
993 return tr
993 return tr
994
994
995 self._dbconn.execute(r'BEGIN TRANSACTION')
995 self._dbconn.execute(r'BEGIN TRANSACTION')
996
996
997 def committransaction(_):
997 def committransaction(_):
998 self._dbconn.commit()
998 self._dbconn.commit()
999
999
1000 tr.addfinalize('sqlitestore', committransaction)
1000 tr.addfinalize('sqlitestore', committransaction)
1001
1001
1002 return tr
1002 return tr
1003
1003
1004 @property
1004 @property
1005 def _dbconn(self):
1005 def _dbconn(self):
1006 # SQLite connections can only be used on the thread that created
1006 # SQLite connections can only be used on the thread that created
1007 # them. In most cases, this "just works." However, hgweb uses
1007 # them. In most cases, this "just works." However, hgweb uses
1008 # multiple threads.
1008 # multiple threads.
1009 tid = threading.current_thread().ident
1009 tid = threading.current_thread().ident
1010
1010
1011 if self._db:
1011 if self._db:
1012 if self._db[0] == tid:
1012 if self._db[0] == tid:
1013 return self._db[1]
1013 return self._db[1]
1014
1014
1015 db = makedb(self.svfs.join('db.sqlite'))
1015 db = makedb(self.svfs.join('db.sqlite'))
1016 self._db = (tid, db)
1016 self._db = (tid, db)
1017
1017
1018 return db
1018 return db
1019
1019
1020 def makedb(path):
1020 def makedb(path):
1021 """Construct a database handle for a database at path."""
1021 """Construct a database handle for a database at path."""
1022
1022
1023 db = sqlite3.connect(path)
1023 db = sqlite3.connect(path)
1024 db.text_factory = bytes
1024 db.text_factory = bytes
1025
1025
1026 res = db.execute(r'PRAGMA user_version').fetchone()[0]
1026 res = db.execute(r'PRAGMA user_version').fetchone()[0]
1027
1027
1028 # New database.
1028 # New database.
1029 if res == 0:
1029 if res == 0:
1030 for statement in CREATE_SCHEMA:
1030 for statement in CREATE_SCHEMA:
1031 db.execute(statement)
1031 db.execute(statement)
1032
1032
1033 db.commit()
1033 db.commit()
1034
1034
1035 elif res == CURRENT_SCHEMA_VERSION:
1035 elif res == CURRENT_SCHEMA_VERSION:
1036 pass
1036 pass
1037
1037
1038 else:
1038 else:
1039 raise error.Abort(_('sqlite database has unrecognized version'))
1039 raise error.Abort(_('sqlite database has unrecognized version'))
1040
1040
1041 db.execute(r'PRAGMA journal_mode=WAL')
1041 db.execute(r'PRAGMA journal_mode=WAL')
1042
1042
1043 return db
1043 return db
1044
1044
1045 def featuresetup(ui, supported):
1045 def featuresetup(ui, supported):
1046 supported.add(REQUIREMENT)
1046 supported.add(REQUIREMENT)
1047
1047
1048 if zstd:
1048 if zstd:
1049 supported.add(REQUIREMENT_ZSTD)
1049 supported.add(REQUIREMENT_ZSTD)
1050
1050
1051 supported.add(REQUIREMENT_ZLIB)
1051 supported.add(REQUIREMENT_ZLIB)
1052 supported.add(REQUIREMENT_NONE)
1052 supported.add(REQUIREMENT_NONE)
1053 supported.add(REQUIREMENT_SHALLOW_FILES)
1053 supported.add(REQUIREMENT_SHALLOW_FILES)
1054 supported.add(repository.NARROW_REQUIREMENT)
1054 supported.add(repository.NARROW_REQUIREMENT)
1055
1055
1056 def newreporequirements(orig, ui, createopts):
1056 def newreporequirements(orig, ui, createopts):
1057 if createopts['backend'] != 'sqlite':
1057 if createopts['backend'] != 'sqlite':
1058 return orig(ui, createopts)
1058 return orig(ui, createopts)
1059
1059
1060 # This restriction can be lifted once we have more confidence.
1060 # This restriction can be lifted once we have more confidence.
1061 if 'sharedrepo' in createopts:
1061 if 'sharedrepo' in createopts:
1062 raise error.Abort(_('shared repositories not supported with SQLite '
1062 raise error.Abort(_('shared repositories not supported with SQLite '
1063 'store'))
1063 'store'))
1064
1064
1065 # This filtering is out of an abundance of caution: we want to ensure
1065 # This filtering is out of an abundance of caution: we want to ensure
1066 # we honor creation options and we do that by annotating exactly the
1066 # we honor creation options and we do that by annotating exactly the
1067 # creation options we recognize.
1067 # creation options we recognize.
1068 known = {
1068 known = {
1069 'narrowfiles',
1069 'narrowfiles',
1070 'backend',
1070 'backend',
1071 'shallowfilestore',
1071 'shallowfilestore',
1072 }
1072 }
1073
1073
1074 unsupported = set(createopts) - known
1074 unsupported = set(createopts) - known
1075 if unsupported:
1075 if unsupported:
1076 raise error.Abort(_('SQLite store does not support repo creation '
1076 raise error.Abort(_('SQLite store does not support repo creation '
1077 'option: %s') % ', '.join(sorted(unsupported)))
1077 'option: %s') % ', '.join(sorted(unsupported)))
1078
1078
1079 # Since we're a hybrid store that still relies on revlogs, we fall back
1079 # Since we're a hybrid store that still relies on revlogs, we fall back
1080 # to using the revlogv1 backend's storage requirements then adding our
1080 # to using the revlogv1 backend's storage requirements then adding our
1081 # own requirement.
1081 # own requirement.
1082 createopts['backend'] = 'revlogv1'
1082 createopts['backend'] = 'revlogv1'
1083 requirements = orig(ui, createopts)
1083 requirements = orig(ui, createopts)
1084 requirements.add(REQUIREMENT)
1084 requirements.add(REQUIREMENT)
1085
1085
1086 compression = ui.config('storage', 'sqlite.compression')
1086 compression = ui.config('storage', 'sqlite.compression')
1087
1087
1088 if compression == 'zstd' and not zstd:
1088 if compression == 'zstd' and not zstd:
1089 raise error.Abort(_('storage.sqlite.compression set to "zstd" but '
1089 raise error.Abort(_('storage.sqlite.compression set to "zstd" but '
1090 'zstandard compression not available to this '
1090 'zstandard compression not available to this '
1091 'Mercurial install'))
1091 'Mercurial install'))
1092
1092
1093 if compression == 'zstd':
1093 if compression == 'zstd':
1094 requirements.add(REQUIREMENT_ZSTD)
1094 requirements.add(REQUIREMENT_ZSTD)
1095 elif compression == 'zlib':
1095 elif compression == 'zlib':
1096 requirements.add(REQUIREMENT_ZLIB)
1096 requirements.add(REQUIREMENT_ZLIB)
1097 elif compression == 'none':
1097 elif compression == 'none':
1098 requirements.add(REQUIREMENT_NONE)
1098 requirements.add(REQUIREMENT_NONE)
1099 else:
1099 else:
1100 raise error.Abort(_('unknown compression engine defined in '
1100 raise error.Abort(_('unknown compression engine defined in '
1101 'storage.sqlite.compression: %s') % compression)
1101 'storage.sqlite.compression: %s') % compression)
1102
1102
1103 if createopts.get('shallowfilestore'):
1103 if createopts.get('shallowfilestore'):
1104 requirements.add(REQUIREMENT_SHALLOW_FILES)
1104 requirements.add(REQUIREMENT_SHALLOW_FILES)
1105
1105
1106 return requirements
1106 return requirements
1107
1107
1108 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1108 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1109 class sqlitefilestorage(object):
1109 class sqlitefilestorage(object):
1110 """Repository file storage backed by SQLite."""
1110 """Repository file storage backed by SQLite."""
1111 def file(self, path):
1111 def file(self, path):
1112 if path[0] == b'/':
1112 if path[0] == b'/':
1113 path = path[1:]
1113 path = path[1:]
1114
1114
1115 if REQUIREMENT_ZSTD in self.requirements:
1115 if REQUIREMENT_ZSTD in self.requirements:
1116 compression = 'zstd'
1116 compression = 'zstd'
1117 elif REQUIREMENT_ZLIB in self.requirements:
1117 elif REQUIREMENT_ZLIB in self.requirements:
1118 compression = 'zlib'
1118 compression = 'zlib'
1119 elif REQUIREMENT_NONE in self.requirements:
1119 elif REQUIREMENT_NONE in self.requirements:
1120 compression = 'none'
1120 compression = 'none'
1121 else:
1121 else:
1122 raise error.Abort(_('unable to determine what compression engine '
1122 raise error.Abort(_('unable to determine what compression engine '
1123 'to use for SQLite storage'))
1123 'to use for SQLite storage'))
1124
1124
1125 return sqlitefilestore(self._dbconn, path, compression)
1125 return sqlitefilestore(self._dbconn, path, compression)
1126
1126
1127 def makefilestorage(orig, requirements, features, **kwargs):
1127 def makefilestorage(orig, requirements, features, **kwargs):
1128 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1128 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1129 if REQUIREMENT in requirements:
1129 if REQUIREMENT in requirements:
1130 if REQUIREMENT_SHALLOW_FILES in requirements:
1130 if REQUIREMENT_SHALLOW_FILES in requirements:
1131 features.add(repository.REPO_FEATURE_SHALLOW_FILE_STORAGE)
1131 features.add(repository.REPO_FEATURE_SHALLOW_FILE_STORAGE)
1132
1132
1133 return sqlitefilestorage
1133 return sqlitefilestorage
1134 else:
1134 else:
1135 return orig(requirements=requirements, features=features, **kwargs)
1135 return orig(requirements=requirements, features=features, **kwargs)
1136
1136
1137 def makemain(orig, ui, requirements, **kwargs):
1137 def makemain(orig, ui, requirements, **kwargs):
1138 if REQUIREMENT in requirements:
1138 if REQUIREMENT in requirements:
1139 if REQUIREMENT_ZSTD in requirements and not zstd:
1139 if REQUIREMENT_ZSTD in requirements and not zstd:
1140 raise error.Abort(_('repository uses zstandard compression, which '
1140 raise error.Abort(_('repository uses zstandard compression, which '
1141 'is not available to this Mercurial install'))
1141 'is not available to this Mercurial install'))
1142
1142
1143 return sqliterepository
1143 return sqliterepository
1144
1144
1145 return orig(requirements=requirements, **kwargs)
1145 return orig(requirements=requirements, **kwargs)
1146
1146
1147 def verifierinit(orig, self, *args, **kwargs):
1147 def verifierinit(orig, self, *args, **kwargs):
1148 orig(self, *args, **kwargs)
1148 orig(self, *args, **kwargs)
1149
1149
1150 # We don't care that files in the store don't align with what is
1150 # We don't care that files in the store don't align with what is
1151 # advertised. So suppress these warnings.
1151 # advertised. So suppress these warnings.
1152 self.warnorphanstorefiles = False
1152 self.warnorphanstorefiles = False
1153
1153
1154 def extsetup(ui):
1154 def extsetup(ui):
1155 localrepo.featuresetupfuncs.add(featuresetup)
1155 localrepo.featuresetupfuncs.add(featuresetup)
1156 extensions.wrapfunction(localrepo, 'newreporequirements',
1156 extensions.wrapfunction(localrepo, 'newreporequirements',
1157 newreporequirements)
1157 newreporequirements)
1158 extensions.wrapfunction(localrepo, 'makefilestorage',
1158 extensions.wrapfunction(localrepo, 'makefilestorage',
1159 makefilestorage)
1159 makefilestorage)
1160 extensions.wrapfunction(localrepo, 'makemain',
1160 extensions.wrapfunction(localrepo, 'makemain',
1161 makemain)
1161 makemain)
1162 extensions.wrapfunction(verify.verifier, '__init__',
1162 extensions.wrapfunction(verify.verifier, '__init__',
1163 verifierinit)
1163 verifierinit)
1164
1164
1165 def reposetup(ui, repo):
1165 def reposetup(ui, repo):
1166 if isinstance(repo, sqliterepository):
1166 if isinstance(repo, sqliterepository):
1167 repo._db = None
1167 repo._db = None
1168
1168
1169 # TODO check for bundlerepository?
1169 # TODO check for bundlerepository?
@@ -1,2544 +1,2544 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100
100
101 # Aliased for performance.
101 # Aliased for performance.
102 _zlibdecompress = zlib.decompress
102 _zlibdecompress = zlib.decompress
103
103
104 # max size of revlog with inline data
104 # max size of revlog with inline data
105 _maxinline = 131072
105 _maxinline = 131072
106 _chunksize = 1048576
106 _chunksize = 1048576
107
107
108 # Store flag processors (cf. 'addflagprocessor()' to register)
108 # Store flag processors (cf. 'addflagprocessor()' to register)
109 _flagprocessors = {
109 _flagprocessors = {
110 REVIDX_ISCENSORED: None,
110 REVIDX_ISCENSORED: None,
111 }
111 }
112
112
113 # Flag processors for REVIDX_ELLIPSIS.
113 # Flag processors for REVIDX_ELLIPSIS.
114 def ellipsisreadprocessor(rl, text):
114 def ellipsisreadprocessor(rl, text):
115 return text, False
115 return text, False
116
116
117 def ellipsiswriteprocessor(rl, text):
117 def ellipsiswriteprocessor(rl, text):
118 return text, False
118 return text, False
119
119
120 def ellipsisrawprocessor(rl, text):
120 def ellipsisrawprocessor(rl, text):
121 return False
121 return False
122
122
123 ellipsisprocessor = (
123 ellipsisprocessor = (
124 ellipsisreadprocessor,
124 ellipsisreadprocessor,
125 ellipsiswriteprocessor,
125 ellipsiswriteprocessor,
126 ellipsisrawprocessor,
126 ellipsisrawprocessor,
127 )
127 )
128
128
129 def addflagprocessor(flag, processor):
129 def addflagprocessor(flag, processor):
130 """Register a flag processor on a revision data flag.
130 """Register a flag processor on a revision data flag.
131
131
132 Invariant:
132 Invariant:
133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
135 - Only one flag processor can be registered on a specific flag.
135 - Only one flag processor can be registered on a specific flag.
136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
137 following signatures:
137 following signatures:
138 - (read) f(self, rawtext) -> text, bool
138 - (read) f(self, rawtext) -> text, bool
139 - (write) f(self, text) -> rawtext, bool
139 - (write) f(self, text) -> rawtext, bool
140 - (raw) f(self, rawtext) -> bool
140 - (raw) f(self, rawtext) -> bool
141 "text" is presented to the user. "rawtext" is stored in revlog data, not
141 "text" is presented to the user. "rawtext" is stored in revlog data, not
142 directly visible to the user.
142 directly visible to the user.
143 The boolean returned by these transforms is used to determine whether
143 The boolean returned by these transforms is used to determine whether
144 the returned text can be used for hash integrity checking. For example,
144 the returned text can be used for hash integrity checking. For example,
145 if "write" returns False, then "text" is used to generate hash. If
145 if "write" returns False, then "text" is used to generate hash. If
146 "write" returns True, that basically means "rawtext" returned by "write"
146 "write" returns True, that basically means "rawtext" returned by "write"
147 should be used to generate hash. Usually, "write" and "read" return
147 should be used to generate hash. Usually, "write" and "read" return
148 different booleans. And "raw" returns a same boolean as "write".
148 different booleans. And "raw" returns a same boolean as "write".
149
149
150 Note: The 'raw' transform is used for changegroup generation and in some
150 Note: The 'raw' transform is used for changegroup generation and in some
151 debug commands. In this case the transform only indicates whether the
151 debug commands. In this case the transform only indicates whether the
152 contents can be used for hash integrity checks.
152 contents can be used for hash integrity checks.
153 """
153 """
154 _insertflagprocessor(flag, processor, _flagprocessors)
154 _insertflagprocessor(flag, processor, _flagprocessors)
155
155
156 def _insertflagprocessor(flag, processor, flagprocessors):
156 def _insertflagprocessor(flag, processor, flagprocessors):
157 if not flag & REVIDX_KNOWN_FLAGS:
157 if not flag & REVIDX_KNOWN_FLAGS:
158 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
158 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
159 raise error.ProgrammingError(msg)
159 raise error.ProgrammingError(msg)
160 if flag not in REVIDX_FLAGS_ORDER:
160 if flag not in REVIDX_FLAGS_ORDER:
161 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
161 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
162 raise error.ProgrammingError(msg)
162 raise error.ProgrammingError(msg)
163 if flag in flagprocessors:
163 if flag in flagprocessors:
164 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
164 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
165 raise error.Abort(msg)
165 raise error.Abort(msg)
166 flagprocessors[flag] = processor
166 flagprocessors[flag] = processor
167
167
168 def getoffset(q):
168 def getoffset(q):
169 return int(q >> 16)
169 return int(q >> 16)
170
170
171 def gettype(q):
171 def gettype(q):
172 return int(q & 0xFFFF)
172 return int(q & 0xFFFF)
173
173
174 def offset_type(offset, type):
174 def offset_type(offset, type):
175 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
175 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
176 raise ValueError('unknown revlog index flags')
176 raise ValueError('unknown revlog index flags')
177 return int(int(offset) << 16 | type)
177 return int(int(offset) << 16 | type)
178
178
179 @attr.s(slots=True, frozen=True)
179 @attr.s(slots=True, frozen=True)
180 class _revisioninfo(object):
180 class _revisioninfo(object):
181 """Information about a revision that allows building its fulltext
181 """Information about a revision that allows building its fulltext
182 node: expected hash of the revision
182 node: expected hash of the revision
183 p1, p2: parent revs of the revision
183 p1, p2: parent revs of the revision
184 btext: built text cache consisting of a one-element list
184 btext: built text cache consisting of a one-element list
185 cachedelta: (baserev, uncompressed_delta) or None
185 cachedelta: (baserev, uncompressed_delta) or None
186 flags: flags associated to the revision storage
186 flags: flags associated to the revision storage
187
187
188 One of btext[0] or cachedelta must be set.
188 One of btext[0] or cachedelta must be set.
189 """
189 """
190 node = attr.ib()
190 node = attr.ib()
191 p1 = attr.ib()
191 p1 = attr.ib()
192 p2 = attr.ib()
192 p2 = attr.ib()
193 btext = attr.ib()
193 btext = attr.ib()
194 textlen = attr.ib()
194 textlen = attr.ib()
195 cachedelta = attr.ib()
195 cachedelta = attr.ib()
196 flags = attr.ib()
196 flags = attr.ib()
197
197
198 @interfaceutil.implementer(repository.irevisiondelta)
198 @interfaceutil.implementer(repository.irevisiondelta)
199 @attr.s(slots=True)
199 @attr.s(slots=True)
200 class revlogrevisiondelta(object):
200 class revlogrevisiondelta(object):
201 node = attr.ib()
201 node = attr.ib()
202 p1node = attr.ib()
202 p1node = attr.ib()
203 p2node = attr.ib()
203 p2node = attr.ib()
204 basenode = attr.ib()
204 basenode = attr.ib()
205 flags = attr.ib()
205 flags = attr.ib()
206 baserevisionsize = attr.ib()
206 baserevisionsize = attr.ib()
207 revision = attr.ib()
207 revision = attr.ib()
208 delta = attr.ib()
208 delta = attr.ib()
209 linknode = attr.ib(default=None)
209 linknode = attr.ib(default=None)
210
210
211 @interfaceutil.implementer(repository.iverifyproblem)
211 @interfaceutil.implementer(repository.iverifyproblem)
212 @attr.s(frozen=True)
212 @attr.s(frozen=True)
213 class revlogproblem(object):
213 class revlogproblem(object):
214 warning = attr.ib(default=None)
214 warning = attr.ib(default=None)
215 error = attr.ib(default=None)
215 error = attr.ib(default=None)
216 node = attr.ib(default=None)
216 node = attr.ib(default=None)
217
217
218 # index v0:
218 # index v0:
219 # 4 bytes: offset
219 # 4 bytes: offset
220 # 4 bytes: compressed length
220 # 4 bytes: compressed length
221 # 4 bytes: base rev
221 # 4 bytes: base rev
222 # 4 bytes: link rev
222 # 4 bytes: link rev
223 # 20 bytes: parent 1 nodeid
223 # 20 bytes: parent 1 nodeid
224 # 20 bytes: parent 2 nodeid
224 # 20 bytes: parent 2 nodeid
225 # 20 bytes: nodeid
225 # 20 bytes: nodeid
226 indexformatv0 = struct.Struct(">4l20s20s20s")
226 indexformatv0 = struct.Struct(">4l20s20s20s")
227 indexformatv0_pack = indexformatv0.pack
227 indexformatv0_pack = indexformatv0.pack
228 indexformatv0_unpack = indexformatv0.unpack
228 indexformatv0_unpack = indexformatv0.unpack
229
229
230 class revlogoldindex(list):
230 class revlogoldindex(list):
231 def __getitem__(self, i):
231 def __getitem__(self, i):
232 if i == -1:
232 if i == -1:
233 return (0, 0, 0, -1, -1, -1, -1, nullid)
233 return (0, 0, 0, -1, -1, -1, -1, nullid)
234 return list.__getitem__(self, i)
234 return list.__getitem__(self, i)
235
235
236 class revlogoldio(object):
236 class revlogoldio(object):
237 def __init__(self):
237 def __init__(self):
238 self.size = indexformatv0.size
238 self.size = indexformatv0.size
239
239
240 def parseindex(self, data, inline):
240 def parseindex(self, data, inline):
241 s = self.size
241 s = self.size
242 index = []
242 index = []
243 nodemap = {nullid: nullrev}
243 nodemap = {nullid: nullrev}
244 n = off = 0
244 n = off = 0
245 l = len(data)
245 l = len(data)
246 while off + s <= l:
246 while off + s <= l:
247 cur = data[off:off + s]
247 cur = data[off:off + s]
248 off += s
248 off += s
249 e = indexformatv0_unpack(cur)
249 e = indexformatv0_unpack(cur)
250 # transform to revlogv1 format
250 # transform to revlogv1 format
251 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
251 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
252 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
252 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
253 index.append(e2)
253 index.append(e2)
254 nodemap[e[6]] = n
254 nodemap[e[6]] = n
255 n += 1
255 n += 1
256
256
257 return revlogoldindex(index), nodemap, None
257 return revlogoldindex(index), nodemap, None
258
258
259 def packentry(self, entry, node, version, rev):
259 def packentry(self, entry, node, version, rev):
260 if gettype(entry[0]):
260 if gettype(entry[0]):
261 raise error.RevlogError(_('index entry flags need revlog '
261 raise error.RevlogError(_('index entry flags need revlog '
262 'version 1'))
262 'version 1'))
263 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
263 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
264 node(entry[5]), node(entry[6]), entry[7])
264 node(entry[5]), node(entry[6]), entry[7])
265 return indexformatv0_pack(*e2)
265 return indexformatv0_pack(*e2)
266
266
267 # index ng:
267 # index ng:
268 # 6 bytes: offset
268 # 6 bytes: offset
269 # 2 bytes: flags
269 # 2 bytes: flags
270 # 4 bytes: compressed length
270 # 4 bytes: compressed length
271 # 4 bytes: uncompressed length
271 # 4 bytes: uncompressed length
272 # 4 bytes: base rev
272 # 4 bytes: base rev
273 # 4 bytes: link rev
273 # 4 bytes: link rev
274 # 4 bytes: parent 1 rev
274 # 4 bytes: parent 1 rev
275 # 4 bytes: parent 2 rev
275 # 4 bytes: parent 2 rev
276 # 32 bytes: nodeid
276 # 32 bytes: nodeid
277 indexformatng = struct.Struct(">Qiiiiii20s12x")
277 indexformatng = struct.Struct(">Qiiiiii20s12x")
278 indexformatng_pack = indexformatng.pack
278 indexformatng_pack = indexformatng.pack
279 versionformat = struct.Struct(">I")
279 versionformat = struct.Struct(">I")
280 versionformat_pack = versionformat.pack
280 versionformat_pack = versionformat.pack
281 versionformat_unpack = versionformat.unpack
281 versionformat_unpack = versionformat.unpack
282
282
283 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
283 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
284 # signed integer)
284 # signed integer)
285 _maxentrysize = 0x7fffffff
285 _maxentrysize = 0x7fffffff
286
286
287 class revlogio(object):
287 class revlogio(object):
288 def __init__(self):
288 def __init__(self):
289 self.size = indexformatng.size
289 self.size = indexformatng.size
290
290
291 def parseindex(self, data, inline):
291 def parseindex(self, data, inline):
292 # call the C implementation to parse the index data
292 # call the C implementation to parse the index data
293 index, cache = parsers.parse_index2(data, inline)
293 index, cache = parsers.parse_index2(data, inline)
294 return index, getattr(index, 'nodemap', None), cache
294 return index, getattr(index, 'nodemap', None), cache
295
295
296 def packentry(self, entry, node, version, rev):
296 def packentry(self, entry, node, version, rev):
297 p = indexformatng_pack(*entry)
297 p = indexformatng_pack(*entry)
298 if rev == 0:
298 if rev == 0:
299 p = versionformat_pack(version) + p[4:]
299 p = versionformat_pack(version) + p[4:]
300 return p
300 return p
301
301
302 class revlog(object):
302 class revlog(object):
303 """
303 """
304 the underlying revision storage object
304 the underlying revision storage object
305
305
306 A revlog consists of two parts, an index and the revision data.
306 A revlog consists of two parts, an index and the revision data.
307
307
308 The index is a file with a fixed record size containing
308 The index is a file with a fixed record size containing
309 information on each revision, including its nodeid (hash), the
309 information on each revision, including its nodeid (hash), the
310 nodeids of its parents, the position and offset of its data within
310 nodeids of its parents, the position and offset of its data within
311 the data file, and the revision it's based on. Finally, each entry
311 the data file, and the revision it's based on. Finally, each entry
312 contains a linkrev entry that can serve as a pointer to external
312 contains a linkrev entry that can serve as a pointer to external
313 data.
313 data.
314
314
315 The revision data itself is a linear collection of data chunks.
315 The revision data itself is a linear collection of data chunks.
316 Each chunk represents a revision and is usually represented as a
316 Each chunk represents a revision and is usually represented as a
317 delta against the previous chunk. To bound lookup time, runs of
317 delta against the previous chunk. To bound lookup time, runs of
318 deltas are limited to about 2 times the length of the original
318 deltas are limited to about 2 times the length of the original
319 version data. This makes retrieval of a version proportional to
319 version data. This makes retrieval of a version proportional to
320 its size, or O(1) relative to the number of revisions.
320 its size, or O(1) relative to the number of revisions.
321
321
322 Both pieces of the revlog are written to in an append-only
322 Both pieces of the revlog are written to in an append-only
323 fashion, which means we never need to rewrite a file to insert or
323 fashion, which means we never need to rewrite a file to insert or
324 remove data, and can use some simple techniques to avoid the need
324 remove data, and can use some simple techniques to avoid the need
325 for locking while reading.
325 for locking while reading.
326
326
327 If checkambig, indexfile is opened with checkambig=True at
327 If checkambig, indexfile is opened with checkambig=True at
328 writing, to avoid file stat ambiguity.
328 writing, to avoid file stat ambiguity.
329
329
330 If mmaplargeindex is True, and an mmapindexthreshold is set, the
330 If mmaplargeindex is True, and an mmapindexthreshold is set, the
331 index will be mmapped rather than read if it is larger than the
331 index will be mmapped rather than read if it is larger than the
332 configured threshold.
332 configured threshold.
333
333
334 If censorable is True, the revlog can have censored revisions.
334 If censorable is True, the revlog can have censored revisions.
335 """
335 """
336 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
336 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
337 mmaplargeindex=False, censorable=False):
337 mmaplargeindex=False, censorable=False):
338 """
338 """
339 create a revlog object
339 create a revlog object
340
340
341 opener is a function that abstracts the file opening operation
341 opener is a function that abstracts the file opening operation
342 and can be used to implement COW semantics or the like.
342 and can be used to implement COW semantics or the like.
343 """
343 """
344 self.indexfile = indexfile
344 self.indexfile = indexfile
345 self.datafile = datafile or (indexfile[:-2] + ".d")
345 self.datafile = datafile or (indexfile[:-2] + ".d")
346 self.opener = opener
346 self.opener = opener
347 # When True, indexfile is opened with checkambig=True at writing, to
347 # When True, indexfile is opened with checkambig=True at writing, to
348 # avoid file stat ambiguity.
348 # avoid file stat ambiguity.
349 self._checkambig = checkambig
349 self._checkambig = checkambig
350 self._censorable = censorable
350 self._censorable = censorable
351 # 3-tuple of (node, rev, text) for a raw revision.
351 # 3-tuple of (node, rev, text) for a raw revision.
352 self._revisioncache = None
352 self._revisioncache = None
353 # Maps rev to chain base rev.
353 # Maps rev to chain base rev.
354 self._chainbasecache = util.lrucachedict(100)
354 self._chainbasecache = util.lrucachedict(100)
355 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
355 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
356 self._chunkcache = (0, '')
356 self._chunkcache = (0, '')
357 # How much data to read and cache into the raw revlog data cache.
357 # How much data to read and cache into the raw revlog data cache.
358 self._chunkcachesize = 65536
358 self._chunkcachesize = 65536
359 self._maxchainlen = None
359 self._maxchainlen = None
360 self._deltabothparents = True
360 self._deltabothparents = True
361 self.index = []
361 self.index = []
362 # Mapping of partial identifiers to full nodes.
362 # Mapping of partial identifiers to full nodes.
363 self._pcache = {}
363 self._pcache = {}
364 # Mapping of revision integer to full node.
364 # Mapping of revision integer to full node.
365 self._nodecache = {nullid: nullrev}
365 self._nodecache = {nullid: nullrev}
366 self._nodepos = None
366 self._nodepos = None
367 self._compengine = 'zlib'
367 self._compengine = 'zlib'
368 self._maxdeltachainspan = -1
368 self._maxdeltachainspan = -1
369 self._withsparseread = False
369 self._withsparseread = False
370 self._sparserevlog = False
370 self._sparserevlog = False
371 self._srdensitythreshold = 0.50
371 self._srdensitythreshold = 0.50
372 self._srmingapsize = 262144
372 self._srmingapsize = 262144
373
373
374 # Make copy of flag processors so each revlog instance can support
374 # Make copy of flag processors so each revlog instance can support
375 # custom flags.
375 # custom flags.
376 self._flagprocessors = dict(_flagprocessors)
376 self._flagprocessors = dict(_flagprocessors)
377
377
378 mmapindexthreshold = None
378 mmapindexthreshold = None
379 v = REVLOG_DEFAULT_VERSION
379 v = REVLOG_DEFAULT_VERSION
380 opts = getattr(opener, 'options', None)
380 opts = getattr(opener, 'options', None)
381 if opts is not None:
381 if opts is not None:
382 if 'revlogv2' in opts:
382 if 'revlogv2' in opts:
383 # version 2 revlogs always use generaldelta.
383 # version 2 revlogs always use generaldelta.
384 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
384 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
385 elif 'revlogv1' in opts:
385 elif 'revlogv1' in opts:
386 if 'generaldelta' in opts:
386 if 'generaldelta' in opts:
387 v |= FLAG_GENERALDELTA
387 v |= FLAG_GENERALDELTA
388 else:
388 else:
389 v = 0
389 v = 0
390 if 'chunkcachesize' in opts:
390 if 'chunkcachesize' in opts:
391 self._chunkcachesize = opts['chunkcachesize']
391 self._chunkcachesize = opts['chunkcachesize']
392 if 'maxchainlen' in opts:
392 if 'maxchainlen' in opts:
393 self._maxchainlen = opts['maxchainlen']
393 self._maxchainlen = opts['maxchainlen']
394 if 'deltabothparents' in opts:
394 if 'deltabothparents' in opts:
395 self._deltabothparents = opts['deltabothparents']
395 self._deltabothparents = opts['deltabothparents']
396 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
396 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
397 if 'compengine' in opts:
397 if 'compengine' in opts:
398 self._compengine = opts['compengine']
398 self._compengine = opts['compengine']
399 if 'maxdeltachainspan' in opts:
399 if 'maxdeltachainspan' in opts:
400 self._maxdeltachainspan = opts['maxdeltachainspan']
400 self._maxdeltachainspan = opts['maxdeltachainspan']
401 if mmaplargeindex and 'mmapindexthreshold' in opts:
401 if mmaplargeindex and 'mmapindexthreshold' in opts:
402 mmapindexthreshold = opts['mmapindexthreshold']
402 mmapindexthreshold = opts['mmapindexthreshold']
403 self._sparserevlog = bool(opts.get('sparse-revlog', False))
403 self._sparserevlog = bool(opts.get('sparse-revlog', False))
404 withsparseread = bool(opts.get('with-sparse-read', False))
404 withsparseread = bool(opts.get('with-sparse-read', False))
405 # sparse-revlog forces sparse-read
405 # sparse-revlog forces sparse-read
406 self._withsparseread = self._sparserevlog or withsparseread
406 self._withsparseread = self._sparserevlog or withsparseread
407 if 'sparse-read-density-threshold' in opts:
407 if 'sparse-read-density-threshold' in opts:
408 self._srdensitythreshold = opts['sparse-read-density-threshold']
408 self._srdensitythreshold = opts['sparse-read-density-threshold']
409 if 'sparse-read-min-gap-size' in opts:
409 if 'sparse-read-min-gap-size' in opts:
410 self._srmingapsize = opts['sparse-read-min-gap-size']
410 self._srmingapsize = opts['sparse-read-min-gap-size']
411 if opts.get('enableellipsis'):
411 if opts.get('enableellipsis'):
412 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
412 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
413
413
414 # revlog v0 doesn't have flag processors
414 # revlog v0 doesn't have flag processors
415 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
415 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
416 _insertflagprocessor(flag, processor, self._flagprocessors)
416 _insertflagprocessor(flag, processor, self._flagprocessors)
417
417
418 if self._chunkcachesize <= 0:
418 if self._chunkcachesize <= 0:
419 raise error.RevlogError(_('revlog chunk cache size %r is not '
419 raise error.RevlogError(_('revlog chunk cache size %r is not '
420 'greater than 0') % self._chunkcachesize)
420 'greater than 0') % self._chunkcachesize)
421 elif self._chunkcachesize & (self._chunkcachesize - 1):
421 elif self._chunkcachesize & (self._chunkcachesize - 1):
422 raise error.RevlogError(_('revlog chunk cache size %r is not a '
422 raise error.RevlogError(_('revlog chunk cache size %r is not a '
423 'power of 2') % self._chunkcachesize)
423 'power of 2') % self._chunkcachesize)
424
424
425 self._loadindex(v, mmapindexthreshold)
425 self._loadindex(v, mmapindexthreshold)
426
426
427 def _loadindex(self, v, mmapindexthreshold):
427 def _loadindex(self, v, mmapindexthreshold):
428 indexdata = ''
428 indexdata = ''
429 self._initempty = True
429 self._initempty = True
430 try:
430 try:
431 with self._indexfp() as f:
431 with self._indexfp() as f:
432 if (mmapindexthreshold is not None and
432 if (mmapindexthreshold is not None and
433 self.opener.fstat(f).st_size >= mmapindexthreshold):
433 self.opener.fstat(f).st_size >= mmapindexthreshold):
434 indexdata = util.buffer(util.mmapread(f))
434 indexdata = util.buffer(util.mmapread(f))
435 else:
435 else:
436 indexdata = f.read()
436 indexdata = f.read()
437 if len(indexdata) > 0:
437 if len(indexdata) > 0:
438 v = versionformat_unpack(indexdata[:4])[0]
438 v = versionformat_unpack(indexdata[:4])[0]
439 self._initempty = False
439 self._initempty = False
440 except IOError as inst:
440 except IOError as inst:
441 if inst.errno != errno.ENOENT:
441 if inst.errno != errno.ENOENT:
442 raise
442 raise
443
443
444 self.version = v
444 self.version = v
445 self._inline = v & FLAG_INLINE_DATA
445 self._inline = v & FLAG_INLINE_DATA
446 self._generaldelta = v & FLAG_GENERALDELTA
446 self._generaldelta = v & FLAG_GENERALDELTA
447 flags = v & ~0xFFFF
447 flags = v & ~0xFFFF
448 fmt = v & 0xFFFF
448 fmt = v & 0xFFFF
449 if fmt == REVLOGV0:
449 if fmt == REVLOGV0:
450 if flags:
450 if flags:
451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
452 'revlog %s') %
452 'revlog %s') %
453 (flags >> 16, fmt, self.indexfile))
453 (flags >> 16, fmt, self.indexfile))
454 elif fmt == REVLOGV1:
454 elif fmt == REVLOGV1:
455 if flags & ~REVLOGV1_FLAGS:
455 if flags & ~REVLOGV1_FLAGS:
456 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
456 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
457 'revlog %s') %
457 'revlog %s') %
458 (flags >> 16, fmt, self.indexfile))
458 (flags >> 16, fmt, self.indexfile))
459 elif fmt == REVLOGV2:
459 elif fmt == REVLOGV2:
460 if flags & ~REVLOGV2_FLAGS:
460 if flags & ~REVLOGV2_FLAGS:
461 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
461 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
462 'revlog %s') %
462 'revlog %s') %
463 (flags >> 16, fmt, self.indexfile))
463 (flags >> 16, fmt, self.indexfile))
464 else:
464 else:
465 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
465 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
466 (fmt, self.indexfile))
466 (fmt, self.indexfile))
467
467
468 self._storedeltachains = True
468 self._storedeltachains = True
469
469
470 self._io = revlogio()
470 self._io = revlogio()
471 if self.version == REVLOGV0:
471 if self.version == REVLOGV0:
472 self._io = revlogoldio()
472 self._io = revlogoldio()
473 try:
473 try:
474 d = self._io.parseindex(indexdata, self._inline)
474 d = self._io.parseindex(indexdata, self._inline)
475 except (ValueError, IndexError):
475 except (ValueError, IndexError):
476 raise error.RevlogError(_("index %s is corrupted") %
476 raise error.RevlogError(_("index %s is corrupted") %
477 self.indexfile)
477 self.indexfile)
478 self.index, nodemap, self._chunkcache = d
478 self.index, nodemap, self._chunkcache = d
479 if nodemap is not None:
479 if nodemap is not None:
480 self.nodemap = self._nodecache = nodemap
480 self.nodemap = self._nodecache = nodemap
481 if not self._chunkcache:
481 if not self._chunkcache:
482 self._chunkclear()
482 self._chunkclear()
483 # revnum -> (chain-length, sum-delta-length)
483 # revnum -> (chain-length, sum-delta-length)
484 self._chaininfocache = {}
484 self._chaininfocache = {}
485 # revlog header -> revlog compressor
485 # revlog header -> revlog compressor
486 self._decompressors = {}
486 self._decompressors = {}
487
487
488 @util.propertycache
488 @util.propertycache
489 def _compressor(self):
489 def _compressor(self):
490 return util.compengines[self._compengine].revlogcompressor()
490 return util.compengines[self._compengine].revlogcompressor()
491
491
492 def _indexfp(self, mode='r'):
492 def _indexfp(self, mode='r'):
493 """file object for the revlog's index file"""
493 """file object for the revlog's index file"""
494 args = {r'mode': mode}
494 args = {r'mode': mode}
495 if mode != 'r':
495 if mode != 'r':
496 args[r'checkambig'] = self._checkambig
496 args[r'checkambig'] = self._checkambig
497 if mode == 'w':
497 if mode == 'w':
498 args[r'atomictemp'] = True
498 args[r'atomictemp'] = True
499 return self.opener(self.indexfile, **args)
499 return self.opener(self.indexfile, **args)
500
500
501 def _datafp(self, mode='r'):
501 def _datafp(self, mode='r'):
502 """file object for the revlog's data file"""
502 """file object for the revlog's data file"""
503 return self.opener(self.datafile, mode=mode)
503 return self.opener(self.datafile, mode=mode)
504
504
505 @contextlib.contextmanager
505 @contextlib.contextmanager
506 def _datareadfp(self, existingfp=None):
506 def _datareadfp(self, existingfp=None):
507 """file object suitable to read data"""
507 """file object suitable to read data"""
508 if existingfp is not None:
508 if existingfp is not None:
509 yield existingfp
509 yield existingfp
510 else:
510 else:
511 if self._inline:
511 if self._inline:
512 func = self._indexfp
512 func = self._indexfp
513 else:
513 else:
514 func = self._datafp
514 func = self._datafp
515 with func() as fp:
515 with func() as fp:
516 yield fp
516 yield fp
517
517
518 def tip(self):
518 def tip(self):
519 return self.node(len(self.index) - 1)
519 return self.node(len(self.index) - 1)
520 def __contains__(self, rev):
520 def __contains__(self, rev):
521 return 0 <= rev < len(self)
521 return 0 <= rev < len(self)
522 def __len__(self):
522 def __len__(self):
523 return len(self.index)
523 return len(self.index)
524 def __iter__(self):
524 def __iter__(self):
525 return iter(pycompat.xrange(len(self)))
525 return iter(pycompat.xrange(len(self)))
526 def revs(self, start=0, stop=None):
526 def revs(self, start=0, stop=None):
527 """iterate over all rev in this revlog (from start to stop)"""
527 """iterate over all rev in this revlog (from start to stop)"""
528 return storageutil.iterrevs(len(self), start=start, stop=stop)
528 return storageutil.iterrevs(len(self), start=start, stop=stop)
529
529
530 @util.propertycache
530 @util.propertycache
531 def nodemap(self):
531 def nodemap(self):
532 if self.index:
532 if self.index:
533 # populate mapping down to the initial node
533 # populate mapping down to the initial node
534 node0 = self.index[0][7] # get around changelog filtering
534 node0 = self.index[0][7] # get around changelog filtering
535 self.rev(node0)
535 self.rev(node0)
536 return self._nodecache
536 return self._nodecache
537
537
538 def hasnode(self, node):
538 def hasnode(self, node):
539 try:
539 try:
540 self.rev(node)
540 self.rev(node)
541 return True
541 return True
542 except KeyError:
542 except KeyError:
543 return False
543 return False
544
544
545 def candelta(self, baserev, rev):
545 def candelta(self, baserev, rev):
546 """whether two revisions (baserev, rev) can be delta-ed or not"""
546 """whether two revisions (baserev, rev) can be delta-ed or not"""
547 # Disable delta if either rev requires a content-changing flag
547 # Disable delta if either rev requires a content-changing flag
548 # processor (ex. LFS). This is because such flag processor can alter
548 # processor (ex. LFS). This is because such flag processor can alter
549 # the rawtext content that the delta will be based on, and two clients
549 # the rawtext content that the delta will be based on, and two clients
550 # could have a same revlog node with different flags (i.e. different
550 # could have a same revlog node with different flags (i.e. different
551 # rawtext contents) and the delta could be incompatible.
551 # rawtext contents) and the delta could be incompatible.
552 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
552 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
553 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
553 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
554 return False
554 return False
555 return True
555 return True
556
556
557 def clearcaches(self):
557 def clearcaches(self):
558 self._revisioncache = None
558 self._revisioncache = None
559 self._chainbasecache.clear()
559 self._chainbasecache.clear()
560 self._chunkcache = (0, '')
560 self._chunkcache = (0, '')
561 self._pcache = {}
561 self._pcache = {}
562
562
563 try:
563 try:
564 self._nodecache.clearcaches()
564 self._nodecache.clearcaches()
565 except AttributeError:
565 except AttributeError:
566 self._nodecache = {nullid: nullrev}
566 self._nodecache = {nullid: nullrev}
567 self._nodepos = None
567 self._nodepos = None
568
568
569 def rev(self, node):
569 def rev(self, node):
570 try:
570 try:
571 return self._nodecache[node]
571 return self._nodecache[node]
572 except TypeError:
572 except TypeError:
573 raise
573 raise
574 except error.RevlogError:
574 except error.RevlogError:
575 # parsers.c radix tree lookup failed
575 # parsers.c radix tree lookup failed
576 if node == wdirid or node in wdirfilenodeids:
576 if node == wdirid or node in wdirfilenodeids:
577 raise error.WdirUnsupported
577 raise error.WdirUnsupported
578 raise error.LookupError(node, self.indexfile, _('no node'))
578 raise error.LookupError(node, self.indexfile, _('no node'))
579 except KeyError:
579 except KeyError:
580 # pure python cache lookup failed
580 # pure python cache lookup failed
581 n = self._nodecache
581 n = self._nodecache
582 i = self.index
582 i = self.index
583 p = self._nodepos
583 p = self._nodepos
584 if p is None:
584 if p is None:
585 p = len(i) - 1
585 p = len(i) - 1
586 else:
586 else:
587 assert p < len(i)
587 assert p < len(i)
588 for r in pycompat.xrange(p, -1, -1):
588 for r in pycompat.xrange(p, -1, -1):
589 v = i[r][7]
589 v = i[r][7]
590 n[v] = r
590 n[v] = r
591 if v == node:
591 if v == node:
592 self._nodepos = r - 1
592 self._nodepos = r - 1
593 return r
593 return r
594 if node == wdirid or node in wdirfilenodeids:
594 if node == wdirid or node in wdirfilenodeids:
595 raise error.WdirUnsupported
595 raise error.WdirUnsupported
596 raise error.LookupError(node, self.indexfile, _('no node'))
596 raise error.LookupError(node, self.indexfile, _('no node'))
597
597
598 # Accessors for index entries.
598 # Accessors for index entries.
599
599
600 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
600 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
601 # are flags.
601 # are flags.
602 def start(self, rev):
602 def start(self, rev):
603 return int(self.index[rev][0] >> 16)
603 return int(self.index[rev][0] >> 16)
604
604
605 def flags(self, rev):
605 def flags(self, rev):
606 return self.index[rev][0] & 0xFFFF
606 return self.index[rev][0] & 0xFFFF
607
607
608 def length(self, rev):
608 def length(self, rev):
609 return self.index[rev][1]
609 return self.index[rev][1]
610
610
611 def rawsize(self, rev):
611 def rawsize(self, rev):
612 """return the length of the uncompressed text for a given revision"""
612 """return the length of the uncompressed text for a given revision"""
613 l = self.index[rev][2]
613 l = self.index[rev][2]
614 if l >= 0:
614 if l >= 0:
615 return l
615 return l
616
616
617 t = self.revision(rev, raw=True)
617 t = self.revision(rev, raw=True)
618 return len(t)
618 return len(t)
619
619
620 def size(self, rev):
620 def size(self, rev):
621 """length of non-raw text (processed by a "read" flag processor)"""
621 """length of non-raw text (processed by a "read" flag processor)"""
622 # fast path: if no "read" flag processor could change the content,
622 # fast path: if no "read" flag processor could change the content,
623 # size is rawsize. note: ELLIPSIS is known to not change the content.
623 # size is rawsize. note: ELLIPSIS is known to not change the content.
624 flags = self.flags(rev)
624 flags = self.flags(rev)
625 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
625 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
626 return self.rawsize(rev)
626 return self.rawsize(rev)
627
627
628 return len(self.revision(rev, raw=False))
628 return len(self.revision(rev, raw=False))
629
629
630 def chainbase(self, rev):
630 def chainbase(self, rev):
631 base = self._chainbasecache.get(rev)
631 base = self._chainbasecache.get(rev)
632 if base is not None:
632 if base is not None:
633 return base
633 return base
634
634
635 index = self.index
635 index = self.index
636 iterrev = rev
636 iterrev = rev
637 base = index[iterrev][3]
637 base = index[iterrev][3]
638 while base != iterrev:
638 while base != iterrev:
639 iterrev = base
639 iterrev = base
640 base = index[iterrev][3]
640 base = index[iterrev][3]
641
641
642 self._chainbasecache[rev] = base
642 self._chainbasecache[rev] = base
643 return base
643 return base
644
644
645 def linkrev(self, rev):
645 def linkrev(self, rev):
646 return self.index[rev][4]
646 return self.index[rev][4]
647
647
648 def parentrevs(self, rev):
648 def parentrevs(self, rev):
649 try:
649 try:
650 entry = self.index[rev]
650 entry = self.index[rev]
651 except IndexError:
651 except IndexError:
652 if rev == wdirrev:
652 if rev == wdirrev:
653 raise error.WdirUnsupported
653 raise error.WdirUnsupported
654 raise
654 raise
655
655
656 return entry[5], entry[6]
656 return entry[5], entry[6]
657
657
658 # fast parentrevs(rev) where rev isn't filtered
658 # fast parentrevs(rev) where rev isn't filtered
659 _uncheckedparentrevs = parentrevs
659 _uncheckedparentrevs = parentrevs
660
660
661 def node(self, rev):
661 def node(self, rev):
662 try:
662 try:
663 return self.index[rev][7]
663 return self.index[rev][7]
664 except IndexError:
664 except IndexError:
665 if rev == wdirrev:
665 if rev == wdirrev:
666 raise error.WdirUnsupported
666 raise error.WdirUnsupported
667 raise
667 raise
668
668
669 # Derived from index values.
669 # Derived from index values.
670
670
671 def end(self, rev):
671 def end(self, rev):
672 return self.start(rev) + self.length(rev)
672 return self.start(rev) + self.length(rev)
673
673
674 def parents(self, node):
674 def parents(self, node):
675 i = self.index
675 i = self.index
676 d = i[self.rev(node)]
676 d = i[self.rev(node)]
677 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
677 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
678
678
679 def chainlen(self, rev):
679 def chainlen(self, rev):
680 return self._chaininfo(rev)[0]
680 return self._chaininfo(rev)[0]
681
681
682 def _chaininfo(self, rev):
682 def _chaininfo(self, rev):
683 chaininfocache = self._chaininfocache
683 chaininfocache = self._chaininfocache
684 if rev in chaininfocache:
684 if rev in chaininfocache:
685 return chaininfocache[rev]
685 return chaininfocache[rev]
686 index = self.index
686 index = self.index
687 generaldelta = self._generaldelta
687 generaldelta = self._generaldelta
688 iterrev = rev
688 iterrev = rev
689 e = index[iterrev]
689 e = index[iterrev]
690 clen = 0
690 clen = 0
691 compresseddeltalen = 0
691 compresseddeltalen = 0
692 while iterrev != e[3]:
692 while iterrev != e[3]:
693 clen += 1
693 clen += 1
694 compresseddeltalen += e[1]
694 compresseddeltalen += e[1]
695 if generaldelta:
695 if generaldelta:
696 iterrev = e[3]
696 iterrev = e[3]
697 else:
697 else:
698 iterrev -= 1
698 iterrev -= 1
699 if iterrev in chaininfocache:
699 if iterrev in chaininfocache:
700 t = chaininfocache[iterrev]
700 t = chaininfocache[iterrev]
701 clen += t[0]
701 clen += t[0]
702 compresseddeltalen += t[1]
702 compresseddeltalen += t[1]
703 break
703 break
704 e = index[iterrev]
704 e = index[iterrev]
705 else:
705 else:
706 # Add text length of base since decompressing that also takes
706 # Add text length of base since decompressing that also takes
707 # work. For cache hits the length is already included.
707 # work. For cache hits the length is already included.
708 compresseddeltalen += e[1]
708 compresseddeltalen += e[1]
709 r = (clen, compresseddeltalen)
709 r = (clen, compresseddeltalen)
710 chaininfocache[rev] = r
710 chaininfocache[rev] = r
711 return r
711 return r
712
712
713 def _deltachain(self, rev, stoprev=None):
713 def _deltachain(self, rev, stoprev=None):
714 """Obtain the delta chain for a revision.
714 """Obtain the delta chain for a revision.
715
715
716 ``stoprev`` specifies a revision to stop at. If not specified, we
716 ``stoprev`` specifies a revision to stop at. If not specified, we
717 stop at the base of the chain.
717 stop at the base of the chain.
718
718
719 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
719 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
720 revs in ascending order and ``stopped`` is a bool indicating whether
720 revs in ascending order and ``stopped`` is a bool indicating whether
721 ``stoprev`` was hit.
721 ``stoprev`` was hit.
722 """
722 """
723 # Try C implementation.
723 # Try C implementation.
724 try:
724 try:
725 return self.index.deltachain(rev, stoprev, self._generaldelta)
725 return self.index.deltachain(rev, stoprev, self._generaldelta)
726 except AttributeError:
726 except AttributeError:
727 pass
727 pass
728
728
729 chain = []
729 chain = []
730
730
731 # Alias to prevent attribute lookup in tight loop.
731 # Alias to prevent attribute lookup in tight loop.
732 index = self.index
732 index = self.index
733 generaldelta = self._generaldelta
733 generaldelta = self._generaldelta
734
734
735 iterrev = rev
735 iterrev = rev
736 e = index[iterrev]
736 e = index[iterrev]
737 while iterrev != e[3] and iterrev != stoprev:
737 while iterrev != e[3] and iterrev != stoprev:
738 chain.append(iterrev)
738 chain.append(iterrev)
739 if generaldelta:
739 if generaldelta:
740 iterrev = e[3]
740 iterrev = e[3]
741 else:
741 else:
742 iterrev -= 1
742 iterrev -= 1
743 e = index[iterrev]
743 e = index[iterrev]
744
744
745 if iterrev == stoprev:
745 if iterrev == stoprev:
746 stopped = True
746 stopped = True
747 else:
747 else:
748 chain.append(iterrev)
748 chain.append(iterrev)
749 stopped = False
749 stopped = False
750
750
751 chain.reverse()
751 chain.reverse()
752 return chain, stopped
752 return chain, stopped
753
753
754 def ancestors(self, revs, stoprev=0, inclusive=False):
754 def ancestors(self, revs, stoprev=0, inclusive=False):
755 """Generate the ancestors of 'revs' in reverse topological order.
755 """Generate the ancestors of 'revs' in reverse topological order.
756 Does not generate revs lower than stoprev.
756 Does not generate revs lower than stoprev.
757
757
758 See the documentation for ancestor.lazyancestors for more details."""
758 See the documentation for ancestor.lazyancestors for more details."""
759
759
760 # first, make sure start revisions aren't filtered
760 # first, make sure start revisions aren't filtered
761 revs = list(revs)
761 revs = list(revs)
762 checkrev = self.node
762 checkrev = self.node
763 for r in revs:
763 for r in revs:
764 checkrev(r)
764 checkrev(r)
765 # and we're sure ancestors aren't filtered as well
765 # and we're sure ancestors aren't filtered as well
766 if util.safehasattr(parsers, 'rustlazyancestors'):
766 if util.safehasattr(parsers, 'rustlazyancestors'):
767 return ancestor.rustlazyancestors(
767 return ancestor.rustlazyancestors(
768 self.index, revs,
768 self.index, revs,
769 stoprev=stoprev, inclusive=inclusive)
769 stoprev=stoprev, inclusive=inclusive)
770 return ancestor.lazyancestors(self._uncheckedparentrevs, revs,
770 return ancestor.lazyancestors(self._uncheckedparentrevs, revs,
771 stoprev=stoprev, inclusive=inclusive)
771 stoprev=stoprev, inclusive=inclusive)
772
772
773 def descendants(self, revs):
773 def descendants(self, revs):
774 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
774 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
775
775
776 def findcommonmissing(self, common=None, heads=None):
776 def findcommonmissing(self, common=None, heads=None):
777 """Return a tuple of the ancestors of common and the ancestors of heads
777 """Return a tuple of the ancestors of common and the ancestors of heads
778 that are not ancestors of common. In revset terminology, we return the
778 that are not ancestors of common. In revset terminology, we return the
779 tuple:
779 tuple:
780
780
781 ::common, (::heads) - (::common)
781 ::common, (::heads) - (::common)
782
782
783 The list is sorted by revision number, meaning it is
783 The list is sorted by revision number, meaning it is
784 topologically sorted.
784 topologically sorted.
785
785
786 'heads' and 'common' are both lists of node IDs. If heads is
786 'heads' and 'common' are both lists of node IDs. If heads is
787 not supplied, uses all of the revlog's heads. If common is not
787 not supplied, uses all of the revlog's heads. If common is not
788 supplied, uses nullid."""
788 supplied, uses nullid."""
789 if common is None:
789 if common is None:
790 common = [nullid]
790 common = [nullid]
791 if heads is None:
791 if heads is None:
792 heads = self.heads()
792 heads = self.heads()
793
793
794 common = [self.rev(n) for n in common]
794 common = [self.rev(n) for n in common]
795 heads = [self.rev(n) for n in heads]
795 heads = [self.rev(n) for n in heads]
796
796
797 # we want the ancestors, but inclusive
797 # we want the ancestors, but inclusive
798 class lazyset(object):
798 class lazyset(object):
799 def __init__(self, lazyvalues):
799 def __init__(self, lazyvalues):
800 self.addedvalues = set()
800 self.addedvalues = set()
801 self.lazyvalues = lazyvalues
801 self.lazyvalues = lazyvalues
802
802
803 def __contains__(self, value):
803 def __contains__(self, value):
804 return value in self.addedvalues or value in self.lazyvalues
804 return value in self.addedvalues or value in self.lazyvalues
805
805
806 def __iter__(self):
806 def __iter__(self):
807 added = self.addedvalues
807 added = self.addedvalues
808 for r in added:
808 for r in added:
809 yield r
809 yield r
810 for r in self.lazyvalues:
810 for r in self.lazyvalues:
811 if not r in added:
811 if not r in added:
812 yield r
812 yield r
813
813
814 def add(self, value):
814 def add(self, value):
815 self.addedvalues.add(value)
815 self.addedvalues.add(value)
816
816
817 def update(self, values):
817 def update(self, values):
818 self.addedvalues.update(values)
818 self.addedvalues.update(values)
819
819
820 has = lazyset(self.ancestors(common))
820 has = lazyset(self.ancestors(common))
821 has.add(nullrev)
821 has.add(nullrev)
822 has.update(common)
822 has.update(common)
823
823
824 # take all ancestors from heads that aren't in has
824 # take all ancestors from heads that aren't in has
825 missing = set()
825 missing = set()
826 visit = collections.deque(r for r in heads if r not in has)
826 visit = collections.deque(r for r in heads if r not in has)
827 while visit:
827 while visit:
828 r = visit.popleft()
828 r = visit.popleft()
829 if r in missing:
829 if r in missing:
830 continue
830 continue
831 else:
831 else:
832 missing.add(r)
832 missing.add(r)
833 for p in self.parentrevs(r):
833 for p in self.parentrevs(r):
834 if p not in has:
834 if p not in has:
835 visit.append(p)
835 visit.append(p)
836 missing = list(missing)
836 missing = list(missing)
837 missing.sort()
837 missing.sort()
838 return has, [self.node(miss) for miss in missing]
838 return has, [self.node(miss) for miss in missing]
839
839
840 def incrementalmissingrevs(self, common=None):
840 def incrementalmissingrevs(self, common=None):
841 """Return an object that can be used to incrementally compute the
841 """Return an object that can be used to incrementally compute the
842 revision numbers of the ancestors of arbitrary sets that are not
842 revision numbers of the ancestors of arbitrary sets that are not
843 ancestors of common. This is an ancestor.incrementalmissingancestors
843 ancestors of common. This is an ancestor.incrementalmissingancestors
844 object.
844 object.
845
845
846 'common' is a list of revision numbers. If common is not supplied, uses
846 'common' is a list of revision numbers. If common is not supplied, uses
847 nullrev.
847 nullrev.
848 """
848 """
849 if common is None:
849 if common is None:
850 common = [nullrev]
850 common = [nullrev]
851
851
852 return ancestor.incrementalmissingancestors(self.parentrevs, common)
852 return ancestor.incrementalmissingancestors(self.parentrevs, common)
853
853
854 def findmissingrevs(self, common=None, heads=None):
854 def findmissingrevs(self, common=None, heads=None):
855 """Return the revision numbers of the ancestors of heads that
855 """Return the revision numbers of the ancestors of heads that
856 are not ancestors of common.
856 are not ancestors of common.
857
857
858 More specifically, return a list of revision numbers corresponding to
858 More specifically, return a list of revision numbers corresponding to
859 nodes N such that every N satisfies the following constraints:
859 nodes N such that every N satisfies the following constraints:
860
860
861 1. N is an ancestor of some node in 'heads'
861 1. N is an ancestor of some node in 'heads'
862 2. N is not an ancestor of any node in 'common'
862 2. N is not an ancestor of any node in 'common'
863
863
864 The list is sorted by revision number, meaning it is
864 The list is sorted by revision number, meaning it is
865 topologically sorted.
865 topologically sorted.
866
866
867 'heads' and 'common' are both lists of revision numbers. If heads is
867 'heads' and 'common' are both lists of revision numbers. If heads is
868 not supplied, uses all of the revlog's heads. If common is not
868 not supplied, uses all of the revlog's heads. If common is not
869 supplied, uses nullid."""
869 supplied, uses nullid."""
870 if common is None:
870 if common is None:
871 common = [nullrev]
871 common = [nullrev]
872 if heads is None:
872 if heads is None:
873 heads = self.headrevs()
873 heads = self.headrevs()
874
874
875 inc = self.incrementalmissingrevs(common=common)
875 inc = self.incrementalmissingrevs(common=common)
876 return inc.missingancestors(heads)
876 return inc.missingancestors(heads)
877
877
878 def findmissing(self, common=None, heads=None):
878 def findmissing(self, common=None, heads=None):
879 """Return the ancestors of heads that are not ancestors of common.
879 """Return the ancestors of heads that are not ancestors of common.
880
880
881 More specifically, return a list of nodes N such that every N
881 More specifically, return a list of nodes N such that every N
882 satisfies the following constraints:
882 satisfies the following constraints:
883
883
884 1. N is an ancestor of some node in 'heads'
884 1. N is an ancestor of some node in 'heads'
885 2. N is not an ancestor of any node in 'common'
885 2. N is not an ancestor of any node in 'common'
886
886
887 The list is sorted by revision number, meaning it is
887 The list is sorted by revision number, meaning it is
888 topologically sorted.
888 topologically sorted.
889
889
890 'heads' and 'common' are both lists of node IDs. If heads is
890 'heads' and 'common' are both lists of node IDs. If heads is
891 not supplied, uses all of the revlog's heads. If common is not
891 not supplied, uses all of the revlog's heads. If common is not
892 supplied, uses nullid."""
892 supplied, uses nullid."""
893 if common is None:
893 if common is None:
894 common = [nullid]
894 common = [nullid]
895 if heads is None:
895 if heads is None:
896 heads = self.heads()
896 heads = self.heads()
897
897
898 common = [self.rev(n) for n in common]
898 common = [self.rev(n) for n in common]
899 heads = [self.rev(n) for n in heads]
899 heads = [self.rev(n) for n in heads]
900
900
901 inc = self.incrementalmissingrevs(common=common)
901 inc = self.incrementalmissingrevs(common=common)
902 return [self.node(r) for r in inc.missingancestors(heads)]
902 return [self.node(r) for r in inc.missingancestors(heads)]
903
903
904 def nodesbetween(self, roots=None, heads=None):
904 def nodesbetween(self, roots=None, heads=None):
905 """Return a topological path from 'roots' to 'heads'.
905 """Return a topological path from 'roots' to 'heads'.
906
906
907 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
907 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
908 topologically sorted list of all nodes N that satisfy both of
908 topologically sorted list of all nodes N that satisfy both of
909 these constraints:
909 these constraints:
910
910
911 1. N is a descendant of some node in 'roots'
911 1. N is a descendant of some node in 'roots'
912 2. N is an ancestor of some node in 'heads'
912 2. N is an ancestor of some node in 'heads'
913
913
914 Every node is considered to be both a descendant and an ancestor
914 Every node is considered to be both a descendant and an ancestor
915 of itself, so every reachable node in 'roots' and 'heads' will be
915 of itself, so every reachable node in 'roots' and 'heads' will be
916 included in 'nodes'.
916 included in 'nodes'.
917
917
918 'outroots' is the list of reachable nodes in 'roots', i.e., the
918 'outroots' is the list of reachable nodes in 'roots', i.e., the
919 subset of 'roots' that is returned in 'nodes'. Likewise,
919 subset of 'roots' that is returned in 'nodes'. Likewise,
920 'outheads' is the subset of 'heads' that is also in 'nodes'.
920 'outheads' is the subset of 'heads' that is also in 'nodes'.
921
921
922 'roots' and 'heads' are both lists of node IDs. If 'roots' is
922 'roots' and 'heads' are both lists of node IDs. If 'roots' is
923 unspecified, uses nullid as the only root. If 'heads' is
923 unspecified, uses nullid as the only root. If 'heads' is
924 unspecified, uses list of all of the revlog's heads."""
924 unspecified, uses list of all of the revlog's heads."""
925 nonodes = ([], [], [])
925 nonodes = ([], [], [])
926 if roots is not None:
926 if roots is not None:
927 roots = list(roots)
927 roots = list(roots)
928 if not roots:
928 if not roots:
929 return nonodes
929 return nonodes
930 lowestrev = min([self.rev(n) for n in roots])
930 lowestrev = min([self.rev(n) for n in roots])
931 else:
931 else:
932 roots = [nullid] # Everybody's a descendant of nullid
932 roots = [nullid] # Everybody's a descendant of nullid
933 lowestrev = nullrev
933 lowestrev = nullrev
934 if (lowestrev == nullrev) and (heads is None):
934 if (lowestrev == nullrev) and (heads is None):
935 # We want _all_ the nodes!
935 # We want _all_ the nodes!
936 return ([self.node(r) for r in self], [nullid], list(self.heads()))
936 return ([self.node(r) for r in self], [nullid], list(self.heads()))
937 if heads is None:
937 if heads is None:
938 # All nodes are ancestors, so the latest ancestor is the last
938 # All nodes are ancestors, so the latest ancestor is the last
939 # node.
939 # node.
940 highestrev = len(self) - 1
940 highestrev = len(self) - 1
941 # Set ancestors to None to signal that every node is an ancestor.
941 # Set ancestors to None to signal that every node is an ancestor.
942 ancestors = None
942 ancestors = None
943 # Set heads to an empty dictionary for later discovery of heads
943 # Set heads to an empty dictionary for later discovery of heads
944 heads = {}
944 heads = {}
945 else:
945 else:
946 heads = list(heads)
946 heads = list(heads)
947 if not heads:
947 if not heads:
948 return nonodes
948 return nonodes
949 ancestors = set()
949 ancestors = set()
950 # Turn heads into a dictionary so we can remove 'fake' heads.
950 # Turn heads into a dictionary so we can remove 'fake' heads.
951 # Also, later we will be using it to filter out the heads we can't
951 # Also, later we will be using it to filter out the heads we can't
952 # find from roots.
952 # find from roots.
953 heads = dict.fromkeys(heads, False)
953 heads = dict.fromkeys(heads, False)
954 # Start at the top and keep marking parents until we're done.
954 # Start at the top and keep marking parents until we're done.
955 nodestotag = set(heads)
955 nodestotag = set(heads)
956 # Remember where the top was so we can use it as a limit later.
956 # Remember where the top was so we can use it as a limit later.
957 highestrev = max([self.rev(n) for n in nodestotag])
957 highestrev = max([self.rev(n) for n in nodestotag])
958 while nodestotag:
958 while nodestotag:
959 # grab a node to tag
959 # grab a node to tag
960 n = nodestotag.pop()
960 n = nodestotag.pop()
961 # Never tag nullid
961 # Never tag nullid
962 if n == nullid:
962 if n == nullid:
963 continue
963 continue
964 # A node's revision number represents its place in a
964 # A node's revision number represents its place in a
965 # topologically sorted list of nodes.
965 # topologically sorted list of nodes.
966 r = self.rev(n)
966 r = self.rev(n)
967 if r >= lowestrev:
967 if r >= lowestrev:
968 if n not in ancestors:
968 if n not in ancestors:
969 # If we are possibly a descendant of one of the roots
969 # If we are possibly a descendant of one of the roots
970 # and we haven't already been marked as an ancestor
970 # and we haven't already been marked as an ancestor
971 ancestors.add(n) # Mark as ancestor
971 ancestors.add(n) # Mark as ancestor
972 # Add non-nullid parents to list of nodes to tag.
972 # Add non-nullid parents to list of nodes to tag.
973 nodestotag.update([p for p in self.parents(n) if
973 nodestotag.update([p for p in self.parents(n) if
974 p != nullid])
974 p != nullid])
975 elif n in heads: # We've seen it before, is it a fake head?
975 elif n in heads: # We've seen it before, is it a fake head?
976 # So it is, real heads should not be the ancestors of
976 # So it is, real heads should not be the ancestors of
977 # any other heads.
977 # any other heads.
978 heads.pop(n)
978 heads.pop(n)
979 if not ancestors:
979 if not ancestors:
980 return nonodes
980 return nonodes
981 # Now that we have our set of ancestors, we want to remove any
981 # Now that we have our set of ancestors, we want to remove any
982 # roots that are not ancestors.
982 # roots that are not ancestors.
983
983
984 # If one of the roots was nullid, everything is included anyway.
984 # If one of the roots was nullid, everything is included anyway.
985 if lowestrev > nullrev:
985 if lowestrev > nullrev:
986 # But, since we weren't, let's recompute the lowest rev to not
986 # But, since we weren't, let's recompute the lowest rev to not
987 # include roots that aren't ancestors.
987 # include roots that aren't ancestors.
988
988
989 # Filter out roots that aren't ancestors of heads
989 # Filter out roots that aren't ancestors of heads
990 roots = [root for root in roots if root in ancestors]
990 roots = [root for root in roots if root in ancestors]
991 # Recompute the lowest revision
991 # Recompute the lowest revision
992 if roots:
992 if roots:
993 lowestrev = min([self.rev(root) for root in roots])
993 lowestrev = min([self.rev(root) for root in roots])
994 else:
994 else:
995 # No more roots? Return empty list
995 # No more roots? Return empty list
996 return nonodes
996 return nonodes
997 else:
997 else:
998 # We are descending from nullid, and don't need to care about
998 # We are descending from nullid, and don't need to care about
999 # any other roots.
999 # any other roots.
1000 lowestrev = nullrev
1000 lowestrev = nullrev
1001 roots = [nullid]
1001 roots = [nullid]
1002 # Transform our roots list into a set.
1002 # Transform our roots list into a set.
1003 descendants = set(roots)
1003 descendants = set(roots)
1004 # Also, keep the original roots so we can filter out roots that aren't
1004 # Also, keep the original roots so we can filter out roots that aren't
1005 # 'real' roots (i.e. are descended from other roots).
1005 # 'real' roots (i.e. are descended from other roots).
1006 roots = descendants.copy()
1006 roots = descendants.copy()
1007 # Our topologically sorted list of output nodes.
1007 # Our topologically sorted list of output nodes.
1008 orderedout = []
1008 orderedout = []
1009 # Don't start at nullid since we don't want nullid in our output list,
1009 # Don't start at nullid since we don't want nullid in our output list,
1010 # and if nullid shows up in descendants, empty parents will look like
1010 # and if nullid shows up in descendants, empty parents will look like
1011 # they're descendants.
1011 # they're descendants.
1012 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1012 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1013 n = self.node(r)
1013 n = self.node(r)
1014 isdescendant = False
1014 isdescendant = False
1015 if lowestrev == nullrev: # Everybody is a descendant of nullid
1015 if lowestrev == nullrev: # Everybody is a descendant of nullid
1016 isdescendant = True
1016 isdescendant = True
1017 elif n in descendants:
1017 elif n in descendants:
1018 # n is already a descendant
1018 # n is already a descendant
1019 isdescendant = True
1019 isdescendant = True
1020 # This check only needs to be done here because all the roots
1020 # This check only needs to be done here because all the roots
1021 # will start being marked is descendants before the loop.
1021 # will start being marked is descendants before the loop.
1022 if n in roots:
1022 if n in roots:
1023 # If n was a root, check if it's a 'real' root.
1023 # If n was a root, check if it's a 'real' root.
1024 p = tuple(self.parents(n))
1024 p = tuple(self.parents(n))
1025 # If any of its parents are descendants, it's not a root.
1025 # If any of its parents are descendants, it's not a root.
1026 if (p[0] in descendants) or (p[1] in descendants):
1026 if (p[0] in descendants) or (p[1] in descendants):
1027 roots.remove(n)
1027 roots.remove(n)
1028 else:
1028 else:
1029 p = tuple(self.parents(n))
1029 p = tuple(self.parents(n))
1030 # A node is a descendant if either of its parents are
1030 # A node is a descendant if either of its parents are
1031 # descendants. (We seeded the dependents list with the roots
1031 # descendants. (We seeded the dependents list with the roots
1032 # up there, remember?)
1032 # up there, remember?)
1033 if (p[0] in descendants) or (p[1] in descendants):
1033 if (p[0] in descendants) or (p[1] in descendants):
1034 descendants.add(n)
1034 descendants.add(n)
1035 isdescendant = True
1035 isdescendant = True
1036 if isdescendant and ((ancestors is None) or (n in ancestors)):
1036 if isdescendant and ((ancestors is None) or (n in ancestors)):
1037 # Only include nodes that are both descendants and ancestors.
1037 # Only include nodes that are both descendants and ancestors.
1038 orderedout.append(n)
1038 orderedout.append(n)
1039 if (ancestors is not None) and (n in heads):
1039 if (ancestors is not None) and (n in heads):
1040 # We're trying to figure out which heads are reachable
1040 # We're trying to figure out which heads are reachable
1041 # from roots.
1041 # from roots.
1042 # Mark this head as having been reached
1042 # Mark this head as having been reached
1043 heads[n] = True
1043 heads[n] = True
1044 elif ancestors is None:
1044 elif ancestors is None:
1045 # Otherwise, we're trying to discover the heads.
1045 # Otherwise, we're trying to discover the heads.
1046 # Assume this is a head because if it isn't, the next step
1046 # Assume this is a head because if it isn't, the next step
1047 # will eventually remove it.
1047 # will eventually remove it.
1048 heads[n] = True
1048 heads[n] = True
1049 # But, obviously its parents aren't.
1049 # But, obviously its parents aren't.
1050 for p in self.parents(n):
1050 for p in self.parents(n):
1051 heads.pop(p, None)
1051 heads.pop(p, None)
1052 heads = [head for head, flag in heads.iteritems() if flag]
1052 heads = [head for head, flag in heads.iteritems() if flag]
1053 roots = list(roots)
1053 roots = list(roots)
1054 assert orderedout
1054 assert orderedout
1055 assert roots
1055 assert roots
1056 assert heads
1056 assert heads
1057 return (orderedout, roots, heads)
1057 return (orderedout, roots, heads)
1058
1058
1059 def headrevs(self):
1059 def headrevs(self):
1060 try:
1060 try:
1061 return self.index.headrevs()
1061 return self.index.headrevs()
1062 except AttributeError:
1062 except AttributeError:
1063 return self._headrevs()
1063 return self._headrevs()
1064
1064
1065 def computephases(self, roots):
1065 def computephases(self, roots):
1066 return self.index.computephasesmapsets(roots)
1066 return self.index.computephasesmapsets(roots)
1067
1067
1068 def _headrevs(self):
1068 def _headrevs(self):
1069 count = len(self)
1069 count = len(self)
1070 if not count:
1070 if not count:
1071 return [nullrev]
1071 return [nullrev]
1072 # we won't iter over filtered rev so nobody is a head at start
1072 # we won't iter over filtered rev so nobody is a head at start
1073 ishead = [0] * (count + 1)
1073 ishead = [0] * (count + 1)
1074 index = self.index
1074 index = self.index
1075 for r in self:
1075 for r in self:
1076 ishead[r] = 1 # I may be an head
1076 ishead[r] = 1 # I may be an head
1077 e = index[r]
1077 e = index[r]
1078 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1078 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1079 return [r for r, val in enumerate(ishead) if val]
1079 return [r for r, val in enumerate(ishead) if val]
1080
1080
1081 def heads(self, start=None, stop=None):
1081 def heads(self, start=None, stop=None):
1082 """return the list of all nodes that have no children
1082 """return the list of all nodes that have no children
1083
1083
1084 if start is specified, only heads that are descendants of
1084 if start is specified, only heads that are descendants of
1085 start will be returned
1085 start will be returned
1086 if stop is specified, it will consider all the revs from stop
1086 if stop is specified, it will consider all the revs from stop
1087 as if they had no children
1087 as if they had no children
1088 """
1088 """
1089 if start is None and stop is None:
1089 if start is None and stop is None:
1090 if not len(self):
1090 if not len(self):
1091 return [nullid]
1091 return [nullid]
1092 return [self.node(r) for r in self.headrevs()]
1092 return [self.node(r) for r in self.headrevs()]
1093
1093
1094 if start is None:
1094 if start is None:
1095 start = nullrev
1095 start = nullrev
1096 else:
1096 else:
1097 start = self.rev(start)
1097 start = self.rev(start)
1098
1098
1099 stoprevs = set(self.rev(n) for n in stop or [])
1099 stoprevs = set(self.rev(n) for n in stop or [])
1100
1100
1101 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1101 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1102 stoprevs=stoprevs)
1102 stoprevs=stoprevs)
1103
1103
1104 return [self.node(rev) for rev in revs]
1104 return [self.node(rev) for rev in revs]
1105
1105
1106 def children(self, node):
1106 def children(self, node):
1107 """find the children of a given node"""
1107 """find the children of a given node"""
1108 c = []
1108 c = []
1109 p = self.rev(node)
1109 p = self.rev(node)
1110 for r in self.revs(start=p + 1):
1110 for r in self.revs(start=p + 1):
1111 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1111 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1112 if prevs:
1112 if prevs:
1113 for pr in prevs:
1113 for pr in prevs:
1114 if pr == p:
1114 if pr == p:
1115 c.append(self.node(r))
1115 c.append(self.node(r))
1116 elif p == nullrev:
1116 elif p == nullrev:
1117 c.append(self.node(r))
1117 c.append(self.node(r))
1118 return c
1118 return c
1119
1119
1120 def commonancestorsheads(self, a, b):
1120 def commonancestorsheads(self, a, b):
1121 """calculate all the heads of the common ancestors of nodes a and b"""
1121 """calculate all the heads of the common ancestors of nodes a and b"""
1122 a, b = self.rev(a), self.rev(b)
1122 a, b = self.rev(a), self.rev(b)
1123 ancs = self._commonancestorsheads(a, b)
1123 ancs = self._commonancestorsheads(a, b)
1124 return pycompat.maplist(self.node, ancs)
1124 return pycompat.maplist(self.node, ancs)
1125
1125
1126 def _commonancestorsheads(self, *revs):
1126 def _commonancestorsheads(self, *revs):
1127 """calculate all the heads of the common ancestors of revs"""
1127 """calculate all the heads of the common ancestors of revs"""
1128 try:
1128 try:
1129 ancs = self.index.commonancestorsheads(*revs)
1129 ancs = self.index.commonancestorsheads(*revs)
1130 except (AttributeError, OverflowError): # C implementation failed
1130 except (AttributeError, OverflowError): # C implementation failed
1131 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1131 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1132 return ancs
1132 return ancs
1133
1133
1134 def isancestor(self, a, b):
1134 def isancestor(self, a, b):
1135 """return True if node a is an ancestor of node b
1135 """return True if node a is an ancestor of node b
1136
1136
1137 A revision is considered an ancestor of itself."""
1137 A revision is considered an ancestor of itself."""
1138 a, b = self.rev(a), self.rev(b)
1138 a, b = self.rev(a), self.rev(b)
1139 return self.isancestorrev(a, b)
1139 return self.isancestorrev(a, b)
1140
1140
1141 def isancestorrev(self, a, b):
1141 def isancestorrev(self, a, b):
1142 """return True if revision a is an ancestor of revision b
1142 """return True if revision a is an ancestor of revision b
1143
1143
1144 A revision is considered an ancestor of itself.
1144 A revision is considered an ancestor of itself.
1145
1145
1146 The implementation of this is trivial but the use of
1146 The implementation of this is trivial but the use of
1147 commonancestorsheads is not."""
1147 commonancestorsheads is not."""
1148 if a == nullrev:
1148 if a == nullrev:
1149 return True
1149 return True
1150 elif a == b:
1150 elif a == b:
1151 return True
1151 return True
1152 elif a > b:
1152 elif a > b:
1153 return False
1153 return False
1154 return a in self._commonancestorsheads(a, b)
1154 return a in self._commonancestorsheads(a, b)
1155
1155
1156 def ancestor(self, a, b):
1156 def ancestor(self, a, b):
1157 """calculate the "best" common ancestor of nodes a and b"""
1157 """calculate the "best" common ancestor of nodes a and b"""
1158
1158
1159 a, b = self.rev(a), self.rev(b)
1159 a, b = self.rev(a), self.rev(b)
1160 try:
1160 try:
1161 ancs = self.index.ancestors(a, b)
1161 ancs = self.index.ancestors(a, b)
1162 except (AttributeError, OverflowError):
1162 except (AttributeError, OverflowError):
1163 ancs = ancestor.ancestors(self.parentrevs, a, b)
1163 ancs = ancestor.ancestors(self.parentrevs, a, b)
1164 if ancs:
1164 if ancs:
1165 # choose a consistent winner when there's a tie
1165 # choose a consistent winner when there's a tie
1166 return min(map(self.node, ancs))
1166 return min(map(self.node, ancs))
1167 return nullid
1167 return nullid
1168
1168
1169 def _match(self, id):
1169 def _match(self, id):
1170 if isinstance(id, int):
1170 if isinstance(id, int):
1171 # rev
1171 # rev
1172 return self.node(id)
1172 return self.node(id)
1173 if len(id) == 20:
1173 if len(id) == 20:
1174 # possibly a binary node
1174 # possibly a binary node
1175 # odds of a binary node being all hex in ASCII are 1 in 10**25
1175 # odds of a binary node being all hex in ASCII are 1 in 10**25
1176 try:
1176 try:
1177 node = id
1177 node = id
1178 self.rev(node) # quick search the index
1178 self.rev(node) # quick search the index
1179 return node
1179 return node
1180 except error.LookupError:
1180 except error.LookupError:
1181 pass # may be partial hex id
1181 pass # may be partial hex id
1182 try:
1182 try:
1183 # str(rev)
1183 # str(rev)
1184 rev = int(id)
1184 rev = int(id)
1185 if "%d" % rev != id:
1185 if "%d" % rev != id:
1186 raise ValueError
1186 raise ValueError
1187 if rev < 0:
1187 if rev < 0:
1188 rev = len(self) + rev
1188 rev = len(self) + rev
1189 if rev < 0 or rev >= len(self):
1189 if rev < 0 or rev >= len(self):
1190 raise ValueError
1190 raise ValueError
1191 return self.node(rev)
1191 return self.node(rev)
1192 except (ValueError, OverflowError):
1192 except (ValueError, OverflowError):
1193 pass
1193 pass
1194 if len(id) == 40:
1194 if len(id) == 40:
1195 try:
1195 try:
1196 # a full hex nodeid?
1196 # a full hex nodeid?
1197 node = bin(id)
1197 node = bin(id)
1198 self.rev(node)
1198 self.rev(node)
1199 return node
1199 return node
1200 except (TypeError, error.LookupError):
1200 except (TypeError, error.LookupError):
1201 pass
1201 pass
1202
1202
1203 def _partialmatch(self, id):
1203 def _partialmatch(self, id):
1204 # we don't care wdirfilenodeids as they should be always full hash
1204 # we don't care wdirfilenodeids as they should be always full hash
1205 maybewdir = wdirhex.startswith(id)
1205 maybewdir = wdirhex.startswith(id)
1206 try:
1206 try:
1207 partial = self.index.partialmatch(id)
1207 partial = self.index.partialmatch(id)
1208 if partial and self.hasnode(partial):
1208 if partial and self.hasnode(partial):
1209 if maybewdir:
1209 if maybewdir:
1210 # single 'ff...' match in radix tree, ambiguous with wdir
1210 # single 'ff...' match in radix tree, ambiguous with wdir
1211 raise error.RevlogError
1211 raise error.RevlogError
1212 return partial
1212 return partial
1213 if maybewdir:
1213 if maybewdir:
1214 # no 'ff...' match in radix tree, wdir identified
1214 # no 'ff...' match in radix tree, wdir identified
1215 raise error.WdirUnsupported
1215 raise error.WdirUnsupported
1216 return None
1216 return None
1217 except error.RevlogError:
1217 except error.RevlogError:
1218 # parsers.c radix tree lookup gave multiple matches
1218 # parsers.c radix tree lookup gave multiple matches
1219 # fast path: for unfiltered changelog, radix tree is accurate
1219 # fast path: for unfiltered changelog, radix tree is accurate
1220 if not getattr(self, 'filteredrevs', None):
1220 if not getattr(self, 'filteredrevs', None):
1221 raise error.AmbiguousPrefixLookupError(
1221 raise error.AmbiguousPrefixLookupError(
1222 id, self.indexfile, _('ambiguous identifier'))
1222 id, self.indexfile, _('ambiguous identifier'))
1223 # fall through to slow path that filters hidden revisions
1223 # fall through to slow path that filters hidden revisions
1224 except (AttributeError, ValueError):
1224 except (AttributeError, ValueError):
1225 # we are pure python, or key was too short to search radix tree
1225 # we are pure python, or key was too short to search radix tree
1226 pass
1226 pass
1227
1227
1228 if id in self._pcache:
1228 if id in self._pcache:
1229 return self._pcache[id]
1229 return self._pcache[id]
1230
1230
1231 if len(id) <= 40:
1231 if len(id) <= 40:
1232 try:
1232 try:
1233 # hex(node)[:...]
1233 # hex(node)[:...]
1234 l = len(id) // 2 # grab an even number of digits
1234 l = len(id) // 2 # grab an even number of digits
1235 prefix = bin(id[:l * 2])
1235 prefix = bin(id[:l * 2])
1236 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1236 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1237 nl = [n for n in nl if hex(n).startswith(id) and
1237 nl = [n for n in nl if hex(n).startswith(id) and
1238 self.hasnode(n)]
1238 self.hasnode(n)]
1239 if nullhex.startswith(id):
1239 if nullhex.startswith(id):
1240 nl.append(nullid)
1240 nl.append(nullid)
1241 if len(nl) > 0:
1241 if len(nl) > 0:
1242 if len(nl) == 1 and not maybewdir:
1242 if len(nl) == 1 and not maybewdir:
1243 self._pcache[id] = nl[0]
1243 self._pcache[id] = nl[0]
1244 return nl[0]
1244 return nl[0]
1245 raise error.AmbiguousPrefixLookupError(
1245 raise error.AmbiguousPrefixLookupError(
1246 id, self.indexfile, _('ambiguous identifier'))
1246 id, self.indexfile, _('ambiguous identifier'))
1247 if maybewdir:
1247 if maybewdir:
1248 raise error.WdirUnsupported
1248 raise error.WdirUnsupported
1249 return None
1249 return None
1250 except TypeError:
1250 except TypeError:
1251 pass
1251 pass
1252
1252
1253 def lookup(self, id):
1253 def lookup(self, id):
1254 """locate a node based on:
1254 """locate a node based on:
1255 - revision number or str(revision number)
1255 - revision number or str(revision number)
1256 - nodeid or subset of hex nodeid
1256 - nodeid or subset of hex nodeid
1257 """
1257 """
1258 n = self._match(id)
1258 n = self._match(id)
1259 if n is not None:
1259 if n is not None:
1260 return n
1260 return n
1261 n = self._partialmatch(id)
1261 n = self._partialmatch(id)
1262 if n:
1262 if n:
1263 return n
1263 return n
1264
1264
1265 raise error.LookupError(id, self.indexfile, _('no match found'))
1265 raise error.LookupError(id, self.indexfile, _('no match found'))
1266
1266
1267 def shortest(self, node, minlength=1):
1267 def shortest(self, node, minlength=1):
1268 """Find the shortest unambiguous prefix that matches node."""
1268 """Find the shortest unambiguous prefix that matches node."""
1269 def isvalid(prefix):
1269 def isvalid(prefix):
1270 try:
1270 try:
1271 node = self._partialmatch(prefix)
1271 node = self._partialmatch(prefix)
1272 except error.AmbiguousPrefixLookupError:
1272 except error.AmbiguousPrefixLookupError:
1273 return False
1273 return False
1274 except error.WdirUnsupported:
1274 except error.WdirUnsupported:
1275 # single 'ff...' match
1275 # single 'ff...' match
1276 return True
1276 return True
1277 if node is None:
1277 if node is None:
1278 raise error.LookupError(node, self.indexfile, _('no node'))
1278 raise error.LookupError(node, self.indexfile, _('no node'))
1279 return True
1279 return True
1280
1280
1281 def maybewdir(prefix):
1281 def maybewdir(prefix):
1282 return all(c == 'f' for c in prefix)
1282 return all(c == 'f' for c in prefix)
1283
1283
1284 hexnode = hex(node)
1284 hexnode = hex(node)
1285
1285
1286 def disambiguate(hexnode, minlength):
1286 def disambiguate(hexnode, minlength):
1287 """Disambiguate against wdirid."""
1287 """Disambiguate against wdirid."""
1288 for length in range(minlength, 41):
1288 for length in range(minlength, 41):
1289 prefix = hexnode[:length]
1289 prefix = hexnode[:length]
1290 if not maybewdir(prefix):
1290 if not maybewdir(prefix):
1291 return prefix
1291 return prefix
1292
1292
1293 if not getattr(self, 'filteredrevs', None):
1293 if not getattr(self, 'filteredrevs', None):
1294 try:
1294 try:
1295 length = max(self.index.shortest(node), minlength)
1295 length = max(self.index.shortest(node), minlength)
1296 return disambiguate(hexnode, length)
1296 return disambiguate(hexnode, length)
1297 except error.RevlogError:
1297 except error.RevlogError:
1298 if node != wdirid:
1298 if node != wdirid:
1299 raise error.LookupError(node, self.indexfile, _('no node'))
1299 raise error.LookupError(node, self.indexfile, _('no node'))
1300 except AttributeError:
1300 except AttributeError:
1301 # Fall through to pure code
1301 # Fall through to pure code
1302 pass
1302 pass
1303
1303
1304 if node == wdirid:
1304 if node == wdirid:
1305 for length in range(minlength, 41):
1305 for length in range(minlength, 41):
1306 prefix = hexnode[:length]
1306 prefix = hexnode[:length]
1307 if isvalid(prefix):
1307 if isvalid(prefix):
1308 return prefix
1308 return prefix
1309
1309
1310 for length in range(minlength, 41):
1310 for length in range(minlength, 41):
1311 prefix = hexnode[:length]
1311 prefix = hexnode[:length]
1312 if isvalid(prefix):
1312 if isvalid(prefix):
1313 return disambiguate(hexnode, length)
1313 return disambiguate(hexnode, length)
1314
1314
1315 def cmp(self, node, text):
1315 def cmp(self, node, text):
1316 """compare text with a given file revision
1316 """compare text with a given file revision
1317
1317
1318 returns True if text is different than what is stored.
1318 returns True if text is different than what is stored.
1319 """
1319 """
1320 p1, p2 = self.parents(node)
1320 p1, p2 = self.parents(node)
1321 return storageutil.hashrevisionsha1(text, p1, p2) != node
1321 return storageutil.hashrevisionsha1(text, p1, p2) != node
1322
1322
1323 def _cachesegment(self, offset, data):
1323 def _cachesegment(self, offset, data):
1324 """Add a segment to the revlog cache.
1324 """Add a segment to the revlog cache.
1325
1325
1326 Accepts an absolute offset and the data that is at that location.
1326 Accepts an absolute offset and the data that is at that location.
1327 """
1327 """
1328 o, d = self._chunkcache
1328 o, d = self._chunkcache
1329 # try to add to existing cache
1329 # try to add to existing cache
1330 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1330 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1331 self._chunkcache = o, d + data
1331 self._chunkcache = o, d + data
1332 else:
1332 else:
1333 self._chunkcache = offset, data
1333 self._chunkcache = offset, data
1334
1334
1335 def _readsegment(self, offset, length, df=None):
1335 def _readsegment(self, offset, length, df=None):
1336 """Load a segment of raw data from the revlog.
1336 """Load a segment of raw data from the revlog.
1337
1337
1338 Accepts an absolute offset, length to read, and an optional existing
1338 Accepts an absolute offset, length to read, and an optional existing
1339 file handle to read from.
1339 file handle to read from.
1340
1340
1341 If an existing file handle is passed, it will be seeked and the
1341 If an existing file handle is passed, it will be seeked and the
1342 original seek position will NOT be restored.
1342 original seek position will NOT be restored.
1343
1343
1344 Returns a str or buffer of raw byte data.
1344 Returns a str or buffer of raw byte data.
1345 """
1345 """
1346 # Cache data both forward and backward around the requested
1346 # Cache data both forward and backward around the requested
1347 # data, in a fixed size window. This helps speed up operations
1347 # data, in a fixed size window. This helps speed up operations
1348 # involving reading the revlog backwards.
1348 # involving reading the revlog backwards.
1349 cachesize = self._chunkcachesize
1349 cachesize = self._chunkcachesize
1350 realoffset = offset & ~(cachesize - 1)
1350 realoffset = offset & ~(cachesize - 1)
1351 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1351 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1352 - realoffset)
1352 - realoffset)
1353 with self._datareadfp(df) as df:
1353 with self._datareadfp(df) as df:
1354 df.seek(realoffset)
1354 df.seek(realoffset)
1355 d = df.read(reallength)
1355 d = df.read(reallength)
1356 self._cachesegment(realoffset, d)
1356 self._cachesegment(realoffset, d)
1357 if offset != realoffset or reallength != length:
1357 if offset != realoffset or reallength != length:
1358 return util.buffer(d, offset - realoffset, length)
1358 return util.buffer(d, offset - realoffset, length)
1359 return d
1359 return d
1360
1360
1361 def _getsegment(self, offset, length, df=None):
1361 def _getsegment(self, offset, length, df=None):
1362 """Obtain a segment of raw data from the revlog.
1362 """Obtain a segment of raw data from the revlog.
1363
1363
1364 Accepts an absolute offset, length of bytes to obtain, and an
1364 Accepts an absolute offset, length of bytes to obtain, and an
1365 optional file handle to the already-opened revlog. If the file
1365 optional file handle to the already-opened revlog. If the file
1366 handle is used, it's original seek position will not be preserved.
1366 handle is used, it's original seek position will not be preserved.
1367
1367
1368 Requests for data may be returned from a cache.
1368 Requests for data may be returned from a cache.
1369
1369
1370 Returns a str or a buffer instance of raw byte data.
1370 Returns a str or a buffer instance of raw byte data.
1371 """
1371 """
1372 o, d = self._chunkcache
1372 o, d = self._chunkcache
1373 l = len(d)
1373 l = len(d)
1374
1374
1375 # is it in the cache?
1375 # is it in the cache?
1376 cachestart = offset - o
1376 cachestart = offset - o
1377 cacheend = cachestart + length
1377 cacheend = cachestart + length
1378 if cachestart >= 0 and cacheend <= l:
1378 if cachestart >= 0 and cacheend <= l:
1379 if cachestart == 0 and cacheend == l:
1379 if cachestart == 0 and cacheend == l:
1380 return d # avoid a copy
1380 return d # avoid a copy
1381 return util.buffer(d, cachestart, cacheend - cachestart)
1381 return util.buffer(d, cachestart, cacheend - cachestart)
1382
1382
1383 return self._readsegment(offset, length, df=df)
1383 return self._readsegment(offset, length, df=df)
1384
1384
1385 def _getsegmentforrevs(self, startrev, endrev, df=None):
1385 def _getsegmentforrevs(self, startrev, endrev, df=None):
1386 """Obtain a segment of raw data corresponding to a range of revisions.
1386 """Obtain a segment of raw data corresponding to a range of revisions.
1387
1387
1388 Accepts the start and end revisions and an optional already-open
1388 Accepts the start and end revisions and an optional already-open
1389 file handle to be used for reading. If the file handle is read, its
1389 file handle to be used for reading. If the file handle is read, its
1390 seek position will not be preserved.
1390 seek position will not be preserved.
1391
1391
1392 Requests for data may be satisfied by a cache.
1392 Requests for data may be satisfied by a cache.
1393
1393
1394 Returns a 2-tuple of (offset, data) for the requested range of
1394 Returns a 2-tuple of (offset, data) for the requested range of
1395 revisions. Offset is the integer offset from the beginning of the
1395 revisions. Offset is the integer offset from the beginning of the
1396 revlog and data is a str or buffer of the raw byte data.
1396 revlog and data is a str or buffer of the raw byte data.
1397
1397
1398 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1398 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1399 to determine where each revision's data begins and ends.
1399 to determine where each revision's data begins and ends.
1400 """
1400 """
1401 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1401 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1402 # (functions are expensive).
1402 # (functions are expensive).
1403 index = self.index
1403 index = self.index
1404 istart = index[startrev]
1404 istart = index[startrev]
1405 start = int(istart[0] >> 16)
1405 start = int(istart[0] >> 16)
1406 if startrev == endrev:
1406 if startrev == endrev:
1407 end = start + istart[1]
1407 end = start + istart[1]
1408 else:
1408 else:
1409 iend = index[endrev]
1409 iend = index[endrev]
1410 end = int(iend[0] >> 16) + iend[1]
1410 end = int(iend[0] >> 16) + iend[1]
1411
1411
1412 if self._inline:
1412 if self._inline:
1413 start += (startrev + 1) * self._io.size
1413 start += (startrev + 1) * self._io.size
1414 end += (endrev + 1) * self._io.size
1414 end += (endrev + 1) * self._io.size
1415 length = end - start
1415 length = end - start
1416
1416
1417 return start, self._getsegment(start, length, df=df)
1417 return start, self._getsegment(start, length, df=df)
1418
1418
1419 def _chunk(self, rev, df=None):
1419 def _chunk(self, rev, df=None):
1420 """Obtain a single decompressed chunk for a revision.
1420 """Obtain a single decompressed chunk for a revision.
1421
1421
1422 Accepts an integer revision and an optional already-open file handle
1422 Accepts an integer revision and an optional already-open file handle
1423 to be used for reading. If used, the seek position of the file will not
1423 to be used for reading. If used, the seek position of the file will not
1424 be preserved.
1424 be preserved.
1425
1425
1426 Returns a str holding uncompressed data for the requested revision.
1426 Returns a str holding uncompressed data for the requested revision.
1427 """
1427 """
1428 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1428 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1429
1429
1430 def _chunks(self, revs, df=None, targetsize=None):
1430 def _chunks(self, revs, df=None, targetsize=None):
1431 """Obtain decompressed chunks for the specified revisions.
1431 """Obtain decompressed chunks for the specified revisions.
1432
1432
1433 Accepts an iterable of numeric revisions that are assumed to be in
1433 Accepts an iterable of numeric revisions that are assumed to be in
1434 ascending order. Also accepts an optional already-open file handle
1434 ascending order. Also accepts an optional already-open file handle
1435 to be used for reading. If used, the seek position of the file will
1435 to be used for reading. If used, the seek position of the file will
1436 not be preserved.
1436 not be preserved.
1437
1437
1438 This function is similar to calling ``self._chunk()`` multiple times,
1438 This function is similar to calling ``self._chunk()`` multiple times,
1439 but is faster.
1439 but is faster.
1440
1440
1441 Returns a list with decompressed data for each requested revision.
1441 Returns a list with decompressed data for each requested revision.
1442 """
1442 """
1443 if not revs:
1443 if not revs:
1444 return []
1444 return []
1445 start = self.start
1445 start = self.start
1446 length = self.length
1446 length = self.length
1447 inline = self._inline
1447 inline = self._inline
1448 iosize = self._io.size
1448 iosize = self._io.size
1449 buffer = util.buffer
1449 buffer = util.buffer
1450
1450
1451 l = []
1451 l = []
1452 ladd = l.append
1452 ladd = l.append
1453
1453
1454 if not self._withsparseread:
1454 if not self._withsparseread:
1455 slicedchunks = (revs,)
1455 slicedchunks = (revs,)
1456 else:
1456 else:
1457 slicedchunks = deltautil.slicechunk(self, revs,
1457 slicedchunks = deltautil.slicechunk(self, revs,
1458 targetsize=targetsize)
1458 targetsize=targetsize)
1459
1459
1460 for revschunk in slicedchunks:
1460 for revschunk in slicedchunks:
1461 firstrev = revschunk[0]
1461 firstrev = revschunk[0]
1462 # Skip trailing revisions with empty diff
1462 # Skip trailing revisions with empty diff
1463 for lastrev in revschunk[::-1]:
1463 for lastrev in revschunk[::-1]:
1464 if length(lastrev) != 0:
1464 if length(lastrev) != 0:
1465 break
1465 break
1466
1466
1467 try:
1467 try:
1468 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1468 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1469 except OverflowError:
1469 except OverflowError:
1470 # issue4215 - we can't cache a run of chunks greater than
1470 # issue4215 - we can't cache a run of chunks greater than
1471 # 2G on Windows
1471 # 2G on Windows
1472 return [self._chunk(rev, df=df) for rev in revschunk]
1472 return [self._chunk(rev, df=df) for rev in revschunk]
1473
1473
1474 decomp = self.decompress
1474 decomp = self.decompress
1475 for rev in revschunk:
1475 for rev in revschunk:
1476 chunkstart = start(rev)
1476 chunkstart = start(rev)
1477 if inline:
1477 if inline:
1478 chunkstart += (rev + 1) * iosize
1478 chunkstart += (rev + 1) * iosize
1479 chunklength = length(rev)
1479 chunklength = length(rev)
1480 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1480 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1481
1481
1482 return l
1482 return l
1483
1483
1484 def _chunkclear(self):
1484 def _chunkclear(self):
1485 """Clear the raw chunk cache."""
1485 """Clear the raw chunk cache."""
1486 self._chunkcache = (0, '')
1486 self._chunkcache = (0, '')
1487
1487
1488 def deltaparent(self, rev):
1488 def deltaparent(self, rev):
1489 """return deltaparent of the given revision"""
1489 """return deltaparent of the given revision"""
1490 base = self.index[rev][3]
1490 base = self.index[rev][3]
1491 if base == rev:
1491 if base == rev:
1492 return nullrev
1492 return nullrev
1493 elif self._generaldelta:
1493 elif self._generaldelta:
1494 return base
1494 return base
1495 else:
1495 else:
1496 return rev - 1
1496 return rev - 1
1497
1497
1498 def issnapshot(self, rev):
1498 def issnapshot(self, rev):
1499 """tells whether rev is a snapshot
1499 """tells whether rev is a snapshot
1500 """
1500 """
1501 if rev == nullrev:
1501 if rev == nullrev:
1502 return True
1502 return True
1503 deltap = self.deltaparent(rev)
1503 deltap = self.deltaparent(rev)
1504 if deltap == nullrev:
1504 if deltap == nullrev:
1505 return True
1505 return True
1506 p1, p2 = self.parentrevs(rev)
1506 p1, p2 = self.parentrevs(rev)
1507 if deltap in (p1, p2):
1507 if deltap in (p1, p2):
1508 return False
1508 return False
1509 return self.issnapshot(deltap)
1509 return self.issnapshot(deltap)
1510
1510
1511 def snapshotdepth(self, rev):
1511 def snapshotdepth(self, rev):
1512 """number of snapshot in the chain before this one"""
1512 """number of snapshot in the chain before this one"""
1513 if not self.issnapshot(rev):
1513 if not self.issnapshot(rev):
1514 raise error.ProgrammingError('revision %d not a snapshot')
1514 raise error.ProgrammingError('revision %d not a snapshot')
1515 return len(self._deltachain(rev)[0]) - 1
1515 return len(self._deltachain(rev)[0]) - 1
1516
1516
1517 def revdiff(self, rev1, rev2):
1517 def revdiff(self, rev1, rev2):
1518 """return or calculate a delta between two revisions
1518 """return or calculate a delta between two revisions
1519
1519
1520 The delta calculated is in binary form and is intended to be written to
1520 The delta calculated is in binary form and is intended to be written to
1521 revlog data directly. So this function needs raw revision data.
1521 revlog data directly. So this function needs raw revision data.
1522 """
1522 """
1523 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1523 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1524 return bytes(self._chunk(rev2))
1524 return bytes(self._chunk(rev2))
1525
1525
1526 return mdiff.textdiff(self.revision(rev1, raw=True),
1526 return mdiff.textdiff(self.revision(rev1, raw=True),
1527 self.revision(rev2, raw=True))
1527 self.revision(rev2, raw=True))
1528
1528
1529 def revision(self, nodeorrev, _df=None, raw=False):
1529 def revision(self, nodeorrev, _df=None, raw=False):
1530 """return an uncompressed revision of a given node or revision
1530 """return an uncompressed revision of a given node or revision
1531 number.
1531 number.
1532
1532
1533 _df - an existing file handle to read from. (internal-only)
1533 _df - an existing file handle to read from. (internal-only)
1534 raw - an optional argument specifying if the revision data is to be
1534 raw - an optional argument specifying if the revision data is to be
1535 treated as raw data when applying flag transforms. 'raw' should be set
1535 treated as raw data when applying flag transforms. 'raw' should be set
1536 to True when generating changegroups or in debug commands.
1536 to True when generating changegroups or in debug commands.
1537 """
1537 """
1538 if isinstance(nodeorrev, int):
1538 if isinstance(nodeorrev, int):
1539 rev = nodeorrev
1539 rev = nodeorrev
1540 node = self.node(rev)
1540 node = self.node(rev)
1541 else:
1541 else:
1542 node = nodeorrev
1542 node = nodeorrev
1543 rev = None
1543 rev = None
1544
1544
1545 cachedrev = None
1545 cachedrev = None
1546 flags = None
1546 flags = None
1547 rawtext = None
1547 rawtext = None
1548 if node == nullid:
1548 if node == nullid:
1549 return ""
1549 return ""
1550 if self._revisioncache:
1550 if self._revisioncache:
1551 if self._revisioncache[0] == node:
1551 if self._revisioncache[0] == node:
1552 # _cache only stores rawtext
1552 # _cache only stores rawtext
1553 if raw:
1553 if raw:
1554 return self._revisioncache[2]
1554 return self._revisioncache[2]
1555 # duplicated, but good for perf
1555 # duplicated, but good for perf
1556 if rev is None:
1556 if rev is None:
1557 rev = self.rev(node)
1557 rev = self.rev(node)
1558 if flags is None:
1558 if flags is None:
1559 flags = self.flags(rev)
1559 flags = self.flags(rev)
1560 # no extra flags set, no flag processor runs, text = rawtext
1560 # no extra flags set, no flag processor runs, text = rawtext
1561 if flags == REVIDX_DEFAULT_FLAGS:
1561 if flags == REVIDX_DEFAULT_FLAGS:
1562 return self._revisioncache[2]
1562 return self._revisioncache[2]
1563 # rawtext is reusable. need to run flag processor
1563 # rawtext is reusable. need to run flag processor
1564 rawtext = self._revisioncache[2]
1564 rawtext = self._revisioncache[2]
1565
1565
1566 cachedrev = self._revisioncache[1]
1566 cachedrev = self._revisioncache[1]
1567
1567
1568 # look up what we need to read
1568 # look up what we need to read
1569 if rawtext is None:
1569 if rawtext is None:
1570 if rev is None:
1570 if rev is None:
1571 rev = self.rev(node)
1571 rev = self.rev(node)
1572
1572
1573 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1573 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1574 if stopped:
1574 if stopped:
1575 rawtext = self._revisioncache[2]
1575 rawtext = self._revisioncache[2]
1576
1576
1577 # drop cache to save memory
1577 # drop cache to save memory
1578 self._revisioncache = None
1578 self._revisioncache = None
1579
1579
1580 targetsize = None
1580 targetsize = None
1581 rawsize = self.index[rev][2]
1581 rawsize = self.index[rev][2]
1582 if 0 <= rawsize:
1582 if 0 <= rawsize:
1583 targetsize = 4 * rawsize
1583 targetsize = 4 * rawsize
1584
1584
1585 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1585 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1586 if rawtext is None:
1586 if rawtext is None:
1587 rawtext = bytes(bins[0])
1587 rawtext = bytes(bins[0])
1588 bins = bins[1:]
1588 bins = bins[1:]
1589
1589
1590 rawtext = mdiff.patches(rawtext, bins)
1590 rawtext = mdiff.patches(rawtext, bins)
1591 self._revisioncache = (node, rev, rawtext)
1591 self._revisioncache = (node, rev, rawtext)
1592
1592
1593 if flags is None:
1593 if flags is None:
1594 if rev is None:
1594 if rev is None:
1595 rev = self.rev(node)
1595 rev = self.rev(node)
1596 flags = self.flags(rev)
1596 flags = self.flags(rev)
1597
1597
1598 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1598 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1599 if validatehash:
1599 if validatehash:
1600 self.checkhash(text, node, rev=rev)
1600 self.checkhash(text, node, rev=rev)
1601
1601
1602 return text
1602 return text
1603
1603
1604 def hash(self, text, p1, p2):
1604 def hash(self, text, p1, p2):
1605 """Compute a node hash.
1605 """Compute a node hash.
1606
1606
1607 Available as a function so that subclasses can replace the hash
1607 Available as a function so that subclasses can replace the hash
1608 as needed.
1608 as needed.
1609 """
1609 """
1610 return storageutil.hashrevisionsha1(text, p1, p2)
1610 return storageutil.hashrevisionsha1(text, p1, p2)
1611
1611
1612 def _processflags(self, text, flags, operation, raw=False):
1612 def _processflags(self, text, flags, operation, raw=False):
1613 """Inspect revision data flags and applies transforms defined by
1613 """Inspect revision data flags and applies transforms defined by
1614 registered flag processors.
1614 registered flag processors.
1615
1615
1616 ``text`` - the revision data to process
1616 ``text`` - the revision data to process
1617 ``flags`` - the revision flags
1617 ``flags`` - the revision flags
1618 ``operation`` - the operation being performed (read or write)
1618 ``operation`` - the operation being performed (read or write)
1619 ``raw`` - an optional argument describing if the raw transform should be
1619 ``raw`` - an optional argument describing if the raw transform should be
1620 applied.
1620 applied.
1621
1621
1622 This method processes the flags in the order (or reverse order if
1622 This method processes the flags in the order (or reverse order if
1623 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1623 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1624 flag processors registered for present flags. The order of flags defined
1624 flag processors registered for present flags. The order of flags defined
1625 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1625 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1626
1626
1627 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1627 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1628 processed text and ``validatehash`` is a bool indicating whether the
1628 processed text and ``validatehash`` is a bool indicating whether the
1629 returned text should be checked for hash integrity.
1629 returned text should be checked for hash integrity.
1630
1630
1631 Note: If the ``raw`` argument is set, it has precedence over the
1631 Note: If the ``raw`` argument is set, it has precedence over the
1632 operation and will only update the value of ``validatehash``.
1632 operation and will only update the value of ``validatehash``.
1633 """
1633 """
1634 # fast path: no flag processors will run
1634 # fast path: no flag processors will run
1635 if flags == 0:
1635 if flags == 0:
1636 return text, True
1636 return text, True
1637 if not operation in ('read', 'write'):
1637 if not operation in ('read', 'write'):
1638 raise error.ProgrammingError(_("invalid '%s' operation") %
1638 raise error.ProgrammingError(_("invalid '%s' operation") %
1639 operation)
1639 operation)
1640 # Check all flags are known.
1640 # Check all flags are known.
1641 if flags & ~REVIDX_KNOWN_FLAGS:
1641 if flags & ~REVIDX_KNOWN_FLAGS:
1642 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1642 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1643 (flags & ~REVIDX_KNOWN_FLAGS))
1643 (flags & ~REVIDX_KNOWN_FLAGS))
1644 validatehash = True
1644 validatehash = True
1645 # Depending on the operation (read or write), the order might be
1645 # Depending on the operation (read or write), the order might be
1646 # reversed due to non-commutative transforms.
1646 # reversed due to non-commutative transforms.
1647 orderedflags = REVIDX_FLAGS_ORDER
1647 orderedflags = REVIDX_FLAGS_ORDER
1648 if operation == 'write':
1648 if operation == 'write':
1649 orderedflags = reversed(orderedflags)
1649 orderedflags = reversed(orderedflags)
1650
1650
1651 for flag in orderedflags:
1651 for flag in orderedflags:
1652 # If a flagprocessor has been registered for a known flag, apply the
1652 # If a flagprocessor has been registered for a known flag, apply the
1653 # related operation transform and update result tuple.
1653 # related operation transform and update result tuple.
1654 if flag & flags:
1654 if flag & flags:
1655 vhash = True
1655 vhash = True
1656
1656
1657 if flag not in self._flagprocessors:
1657 if flag not in self._flagprocessors:
1658 message = _("missing processor for flag '%#x'") % (flag)
1658 message = _("missing processor for flag '%#x'") % (flag)
1659 raise error.RevlogError(message)
1659 raise error.RevlogError(message)
1660
1660
1661 processor = self._flagprocessors[flag]
1661 processor = self._flagprocessors[flag]
1662 if processor is not None:
1662 if processor is not None:
1663 readtransform, writetransform, rawtransform = processor
1663 readtransform, writetransform, rawtransform = processor
1664
1664
1665 if raw:
1665 if raw:
1666 vhash = rawtransform(self, text)
1666 vhash = rawtransform(self, text)
1667 elif operation == 'read':
1667 elif operation == 'read':
1668 text, vhash = readtransform(self, text)
1668 text, vhash = readtransform(self, text)
1669 else: # write operation
1669 else: # write operation
1670 text, vhash = writetransform(self, text)
1670 text, vhash = writetransform(self, text)
1671 validatehash = validatehash and vhash
1671 validatehash = validatehash and vhash
1672
1672
1673 return text, validatehash
1673 return text, validatehash
1674
1674
1675 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1675 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1676 """Check node hash integrity.
1676 """Check node hash integrity.
1677
1677
1678 Available as a function so that subclasses can extend hash mismatch
1678 Available as a function so that subclasses can extend hash mismatch
1679 behaviors as needed.
1679 behaviors as needed.
1680 """
1680 """
1681 try:
1681 try:
1682 if p1 is None and p2 is None:
1682 if p1 is None and p2 is None:
1683 p1, p2 = self.parents(node)
1683 p1, p2 = self.parents(node)
1684 if node != self.hash(text, p1, p2):
1684 if node != self.hash(text, p1, p2):
1685 # Clear the revision cache on hash failure. The revision cache
1685 # Clear the revision cache on hash failure. The revision cache
1686 # only stores the raw revision and clearing the cache does have
1686 # only stores the raw revision and clearing the cache does have
1687 # the side-effect that we won't have a cache hit when the raw
1687 # the side-effect that we won't have a cache hit when the raw
1688 # revision data is accessed. But this case should be rare and
1688 # revision data is accessed. But this case should be rare and
1689 # it is extra work to teach the cache about the hash
1689 # it is extra work to teach the cache about the hash
1690 # verification state.
1690 # verification state.
1691 if self._revisioncache and self._revisioncache[0] == node:
1691 if self._revisioncache and self._revisioncache[0] == node:
1692 self._revisioncache = None
1692 self._revisioncache = None
1693
1693
1694 revornode = rev
1694 revornode = rev
1695 if revornode is None:
1695 if revornode is None:
1696 revornode = templatefilters.short(hex(node))
1696 revornode = templatefilters.short(hex(node))
1697 raise error.RevlogError(_("integrity check failed on %s:%s")
1697 raise error.RevlogError(_("integrity check failed on %s:%s")
1698 % (self.indexfile, pycompat.bytestr(revornode)))
1698 % (self.indexfile, pycompat.bytestr(revornode)))
1699 except error.RevlogError:
1699 except error.RevlogError:
1700 if self._censorable and storageutil.iscensoredtext(text):
1700 if self._censorable and storageutil.iscensoredtext(text):
1701 raise error.CensoredNodeError(self.indexfile, node, text)
1701 raise error.CensoredNodeError(self.indexfile, node, text)
1702 raise
1702 raise
1703
1703
1704 def _enforceinlinesize(self, tr, fp=None):
1704 def _enforceinlinesize(self, tr, fp=None):
1705 """Check if the revlog is too big for inline and convert if so.
1705 """Check if the revlog is too big for inline and convert if so.
1706
1706
1707 This should be called after revisions are added to the revlog. If the
1707 This should be called after revisions are added to the revlog. If the
1708 revlog has grown too large to be an inline revlog, it will convert it
1708 revlog has grown too large to be an inline revlog, it will convert it
1709 to use multiple index and data files.
1709 to use multiple index and data files.
1710 """
1710 """
1711 tiprev = len(self) - 1
1711 tiprev = len(self) - 1
1712 if (not self._inline or
1712 if (not self._inline or
1713 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1713 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1714 return
1714 return
1715
1715
1716 trinfo = tr.find(self.indexfile)
1716 trinfo = tr.find(self.indexfile)
1717 if trinfo is None:
1717 if trinfo is None:
1718 raise error.RevlogError(_("%s not found in the transaction")
1718 raise error.RevlogError(_("%s not found in the transaction")
1719 % self.indexfile)
1719 % self.indexfile)
1720
1720
1721 trindex = trinfo[2]
1721 trindex = trinfo[2]
1722 if trindex is not None:
1722 if trindex is not None:
1723 dataoff = self.start(trindex)
1723 dataoff = self.start(trindex)
1724 else:
1724 else:
1725 # revlog was stripped at start of transaction, use all leftover data
1725 # revlog was stripped at start of transaction, use all leftover data
1726 trindex = len(self) - 1
1726 trindex = len(self) - 1
1727 dataoff = self.end(tiprev)
1727 dataoff = self.end(tiprev)
1728
1728
1729 tr.add(self.datafile, dataoff)
1729 tr.add(self.datafile, dataoff)
1730
1730
1731 if fp:
1731 if fp:
1732 fp.flush()
1732 fp.flush()
1733 fp.close()
1733 fp.close()
1734
1734
1735 with self._datafp('w') as df:
1735 with self._datafp('w') as df:
1736 for r in self:
1736 for r in self:
1737 df.write(self._getsegmentforrevs(r, r)[1])
1737 df.write(self._getsegmentforrevs(r, r)[1])
1738
1738
1739 with self._indexfp('w') as fp:
1739 with self._indexfp('w') as fp:
1740 self.version &= ~FLAG_INLINE_DATA
1740 self.version &= ~FLAG_INLINE_DATA
1741 self._inline = False
1741 self._inline = False
1742 io = self._io
1742 io = self._io
1743 for i in self:
1743 for i in self:
1744 e = io.packentry(self.index[i], self.node, self.version, i)
1744 e = io.packentry(self.index[i], self.node, self.version, i)
1745 fp.write(e)
1745 fp.write(e)
1746
1746
1747 # the temp file replace the real index when we exit the context
1747 # the temp file replace the real index when we exit the context
1748 # manager
1748 # manager
1749
1749
1750 tr.replace(self.indexfile, trindex * self._io.size)
1750 tr.replace(self.indexfile, trindex * self._io.size)
1751 self._chunkclear()
1751 self._chunkclear()
1752
1752
1753 def _nodeduplicatecallback(self, transaction, node):
1753 def _nodeduplicatecallback(self, transaction, node):
1754 """called when trying to add a node already stored.
1754 """called when trying to add a node already stored.
1755 """
1755 """
1756
1756
1757 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1757 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1758 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1758 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1759 """add a revision to the log
1759 """add a revision to the log
1760
1760
1761 text - the revision data to add
1761 text - the revision data to add
1762 transaction - the transaction object used for rollback
1762 transaction - the transaction object used for rollback
1763 link - the linkrev data to add
1763 link - the linkrev data to add
1764 p1, p2 - the parent nodeids of the revision
1764 p1, p2 - the parent nodeids of the revision
1765 cachedelta - an optional precomputed delta
1765 cachedelta - an optional precomputed delta
1766 node - nodeid of revision; typically node is not specified, and it is
1766 node - nodeid of revision; typically node is not specified, and it is
1767 computed by default as hash(text, p1, p2), however subclasses might
1767 computed by default as hash(text, p1, p2), however subclasses might
1768 use different hashing method (and override checkhash() in such case)
1768 use different hashing method (and override checkhash() in such case)
1769 flags - the known flags to set on the revision
1769 flags - the known flags to set on the revision
1770 deltacomputer - an optional deltacomputer instance shared between
1770 deltacomputer - an optional deltacomputer instance shared between
1771 multiple calls
1771 multiple calls
1772 """
1772 """
1773 if link == nullrev:
1773 if link == nullrev:
1774 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1774 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1775 % self.indexfile)
1775 % self.indexfile)
1776
1776
1777 if flags:
1777 if flags:
1778 node = node or self.hash(text, p1, p2)
1778 node = node or self.hash(text, p1, p2)
1779
1779
1780 rawtext, validatehash = self._processflags(text, flags, 'write')
1780 rawtext, validatehash = self._processflags(text, flags, 'write')
1781
1781
1782 # If the flag processor modifies the revision data, ignore any provided
1782 # If the flag processor modifies the revision data, ignore any provided
1783 # cachedelta.
1783 # cachedelta.
1784 if rawtext != text:
1784 if rawtext != text:
1785 cachedelta = None
1785 cachedelta = None
1786
1786
1787 if len(rawtext) > _maxentrysize:
1787 if len(rawtext) > _maxentrysize:
1788 raise error.RevlogError(
1788 raise error.RevlogError(
1789 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1789 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1790 % (self.indexfile, len(rawtext)))
1790 % (self.indexfile, len(rawtext)))
1791
1791
1792 node = node or self.hash(rawtext, p1, p2)
1792 node = node or self.hash(rawtext, p1, p2)
1793 if node in self.nodemap:
1793 if node in self.nodemap:
1794 return node
1794 return node
1795
1795
1796 if validatehash:
1796 if validatehash:
1797 self.checkhash(rawtext, node, p1=p1, p2=p2)
1797 self.checkhash(rawtext, node, p1=p1, p2=p2)
1798
1798
1799 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1799 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1800 flags, cachedelta=cachedelta,
1800 flags, cachedelta=cachedelta,
1801 deltacomputer=deltacomputer)
1801 deltacomputer=deltacomputer)
1802
1802
1803 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1803 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1804 cachedelta=None, deltacomputer=None):
1804 cachedelta=None, deltacomputer=None):
1805 """add a raw revision with known flags, node and parents
1805 """add a raw revision with known flags, node and parents
1806 useful when reusing a revision not stored in this revlog (ex: received
1806 useful when reusing a revision not stored in this revlog (ex: received
1807 over wire, or read from an external bundle).
1807 over wire, or read from an external bundle).
1808 """
1808 """
1809 dfh = None
1809 dfh = None
1810 if not self._inline:
1810 if not self._inline:
1811 dfh = self._datafp("a+")
1811 dfh = self._datafp("a+")
1812 ifh = self._indexfp("a+")
1812 ifh = self._indexfp("a+")
1813 try:
1813 try:
1814 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1814 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1815 flags, cachedelta, ifh, dfh,
1815 flags, cachedelta, ifh, dfh,
1816 deltacomputer=deltacomputer)
1816 deltacomputer=deltacomputer)
1817 finally:
1817 finally:
1818 if dfh:
1818 if dfh:
1819 dfh.close()
1819 dfh.close()
1820 ifh.close()
1820 ifh.close()
1821
1821
1822 def compress(self, data):
1822 def compress(self, data):
1823 """Generate a possibly-compressed representation of data."""
1823 """Generate a possibly-compressed representation of data."""
1824 if not data:
1824 if not data:
1825 return '', data
1825 return '', data
1826
1826
1827 compressed = self._compressor.compress(data)
1827 compressed = self._compressor.compress(data)
1828
1828
1829 if compressed:
1829 if compressed:
1830 # The revlog compressor added the header in the returned data.
1830 # The revlog compressor added the header in the returned data.
1831 return '', compressed
1831 return '', compressed
1832
1832
1833 if data[0:1] == '\0':
1833 if data[0:1] == '\0':
1834 return '', data
1834 return '', data
1835 return 'u', data
1835 return 'u', data
1836
1836
1837 def decompress(self, data):
1837 def decompress(self, data):
1838 """Decompress a revlog chunk.
1838 """Decompress a revlog chunk.
1839
1839
1840 The chunk is expected to begin with a header identifying the
1840 The chunk is expected to begin with a header identifying the
1841 format type so it can be routed to an appropriate decompressor.
1841 format type so it can be routed to an appropriate decompressor.
1842 """
1842 """
1843 if not data:
1843 if not data:
1844 return data
1844 return data
1845
1845
1846 # Revlogs are read much more frequently than they are written and many
1846 # Revlogs are read much more frequently than they are written and many
1847 # chunks only take microseconds to decompress, so performance is
1847 # chunks only take microseconds to decompress, so performance is
1848 # important here.
1848 # important here.
1849 #
1849 #
1850 # We can make a few assumptions about revlogs:
1850 # We can make a few assumptions about revlogs:
1851 #
1851 #
1852 # 1) the majority of chunks will be compressed (as opposed to inline
1852 # 1) the majority of chunks will be compressed (as opposed to inline
1853 # raw data).
1853 # raw data).
1854 # 2) decompressing *any* data will likely by at least 10x slower than
1854 # 2) decompressing *any* data will likely by at least 10x slower than
1855 # returning raw inline data.
1855 # returning raw inline data.
1856 # 3) we want to prioritize common and officially supported compression
1856 # 3) we want to prioritize common and officially supported compression
1857 # engines
1857 # engines
1858 #
1858 #
1859 # It follows that we want to optimize for "decompress compressed data
1859 # It follows that we want to optimize for "decompress compressed data
1860 # when encoded with common and officially supported compression engines"
1860 # when encoded with common and officially supported compression engines"
1861 # case over "raw data" and "data encoded by less common or non-official
1861 # case over "raw data" and "data encoded by less common or non-official
1862 # compression engines." That is why we have the inline lookup first
1862 # compression engines." That is why we have the inline lookup first
1863 # followed by the compengines lookup.
1863 # followed by the compengines lookup.
1864 #
1864 #
1865 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1865 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1866 # compressed chunks. And this matters for changelog and manifest reads.
1866 # compressed chunks. And this matters for changelog and manifest reads.
1867 t = data[0:1]
1867 t = data[0:1]
1868
1868
1869 if t == 'x':
1869 if t == 'x':
1870 try:
1870 try:
1871 return _zlibdecompress(data)
1871 return _zlibdecompress(data)
1872 except zlib.error as e:
1872 except zlib.error as e:
1873 raise error.RevlogError(_('revlog decompress error: %s') %
1873 raise error.RevlogError(_('revlog decompress error: %s') %
1874 stringutil.forcebytestr(e))
1874 stringutil.forcebytestr(e))
1875 # '\0' is more common than 'u' so it goes first.
1875 # '\0' is more common than 'u' so it goes first.
1876 elif t == '\0':
1876 elif t == '\0':
1877 return data
1877 return data
1878 elif t == 'u':
1878 elif t == 'u':
1879 return util.buffer(data, 1)
1879 return util.buffer(data, 1)
1880
1880
1881 try:
1881 try:
1882 compressor = self._decompressors[t]
1882 compressor = self._decompressors[t]
1883 except KeyError:
1883 except KeyError:
1884 try:
1884 try:
1885 engine = util.compengines.forrevlogheader(t)
1885 engine = util.compengines.forrevlogheader(t)
1886 compressor = engine.revlogcompressor()
1886 compressor = engine.revlogcompressor()
1887 self._decompressors[t] = compressor
1887 self._decompressors[t] = compressor
1888 except KeyError:
1888 except KeyError:
1889 raise error.RevlogError(_('unknown compression type %r') % t)
1889 raise error.RevlogError(_('unknown compression type %r') % t)
1890
1890
1891 return compressor.decompress(data)
1891 return compressor.decompress(data)
1892
1892
1893 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1893 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1894 cachedelta, ifh, dfh, alwayscache=False,
1894 cachedelta, ifh, dfh, alwayscache=False,
1895 deltacomputer=None):
1895 deltacomputer=None):
1896 """internal function to add revisions to the log
1896 """internal function to add revisions to the log
1897
1897
1898 see addrevision for argument descriptions.
1898 see addrevision for argument descriptions.
1899
1899
1900 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1900 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1901
1901
1902 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1902 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1903 be used.
1903 be used.
1904
1904
1905 invariants:
1905 invariants:
1906 - rawtext is optional (can be None); if not set, cachedelta must be set.
1906 - rawtext is optional (can be None); if not set, cachedelta must be set.
1907 if both are set, they must correspond to each other.
1907 if both are set, they must correspond to each other.
1908 """
1908 """
1909 if node == nullid:
1909 if node == nullid:
1910 raise error.RevlogError(_("%s: attempt to add null revision") %
1910 raise error.RevlogError(_("%s: attempt to add null revision") %
1911 self.indexfile)
1911 self.indexfile)
1912 if node == wdirid or node in wdirfilenodeids:
1912 if node == wdirid or node in wdirfilenodeids:
1913 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1913 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1914 self.indexfile)
1914 self.indexfile)
1915
1915
1916 if self._inline:
1916 if self._inline:
1917 fh = ifh
1917 fh = ifh
1918 else:
1918 else:
1919 fh = dfh
1919 fh = dfh
1920
1920
1921 btext = [rawtext]
1921 btext = [rawtext]
1922
1922
1923 curr = len(self)
1923 curr = len(self)
1924 prev = curr - 1
1924 prev = curr - 1
1925 offset = self.end(prev)
1925 offset = self.end(prev)
1926 p1r, p2r = self.rev(p1), self.rev(p2)
1926 p1r, p2r = self.rev(p1), self.rev(p2)
1927
1927
1928 # full versions are inserted when the needed deltas
1928 # full versions are inserted when the needed deltas
1929 # become comparable to the uncompressed text
1929 # become comparable to the uncompressed text
1930 if rawtext is None:
1930 if rawtext is None:
1931 # need rawtext size, before changed by flag processors, which is
1931 # need rawtext size, before changed by flag processors, which is
1932 # the non-raw size. use revlog explicitly to avoid filelog's extra
1932 # the non-raw size. use revlog explicitly to avoid filelog's extra
1933 # logic that might remove metadata size.
1933 # logic that might remove metadata size.
1934 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1934 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1935 cachedelta[1])
1935 cachedelta[1])
1936 else:
1936 else:
1937 textlen = len(rawtext)
1937 textlen = len(rawtext)
1938
1938
1939 if deltacomputer is None:
1939 if deltacomputer is None:
1940 deltacomputer = deltautil.deltacomputer(self)
1940 deltacomputer = deltautil.deltacomputer(self)
1941
1941
1942 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1942 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1943
1943
1944 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1944 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1945
1945
1946 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1946 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1947 deltainfo.base, link, p1r, p2r, node)
1947 deltainfo.base, link, p1r, p2r, node)
1948 self.index.append(e)
1948 self.index.append(e)
1949 self.nodemap[node] = curr
1949 self.nodemap[node] = curr
1950
1950
1951 # Reset the pure node cache start lookup offset to account for new
1951 # Reset the pure node cache start lookup offset to account for new
1952 # revision.
1952 # revision.
1953 if self._nodepos is not None:
1953 if self._nodepos is not None:
1954 self._nodepos = curr
1954 self._nodepos = curr
1955
1955
1956 entry = self._io.packentry(e, self.node, self.version, curr)
1956 entry = self._io.packentry(e, self.node, self.version, curr)
1957 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1957 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1958 link, offset)
1958 link, offset)
1959
1959
1960 rawtext = btext[0]
1960 rawtext = btext[0]
1961
1961
1962 if alwayscache and rawtext is None:
1962 if alwayscache and rawtext is None:
1963 rawtext = deltacomputer.buildtext(revinfo, fh)
1963 rawtext = deltacomputer.buildtext(revinfo, fh)
1964
1964
1965 if type(rawtext) == bytes: # only accept immutable objects
1965 if type(rawtext) == bytes: # only accept immutable objects
1966 self._revisioncache = (node, curr, rawtext)
1966 self._revisioncache = (node, curr, rawtext)
1967 self._chainbasecache[curr] = deltainfo.chainbase
1967 self._chainbasecache[curr] = deltainfo.chainbase
1968 return node
1968 return node
1969
1969
1970 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1970 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1971 # Files opened in a+ mode have inconsistent behavior on various
1971 # Files opened in a+ mode have inconsistent behavior on various
1972 # platforms. Windows requires that a file positioning call be made
1972 # platforms. Windows requires that a file positioning call be made
1973 # when the file handle transitions between reads and writes. See
1973 # when the file handle transitions between reads and writes. See
1974 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1974 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1975 # platforms, Python or the platform itself can be buggy. Some versions
1975 # platforms, Python or the platform itself can be buggy. Some versions
1976 # of Solaris have been observed to not append at the end of the file
1976 # of Solaris have been observed to not append at the end of the file
1977 # if the file was seeked to before the end. See issue4943 for more.
1977 # if the file was seeked to before the end. See issue4943 for more.
1978 #
1978 #
1979 # We work around this issue by inserting a seek() before writing.
1979 # We work around this issue by inserting a seek() before writing.
1980 # Note: This is likely not necessary on Python 3.
1980 # Note: This is likely not necessary on Python 3.
1981 ifh.seek(0, os.SEEK_END)
1981 ifh.seek(0, os.SEEK_END)
1982 if dfh:
1982 if dfh:
1983 dfh.seek(0, os.SEEK_END)
1983 dfh.seek(0, os.SEEK_END)
1984
1984
1985 curr = len(self) - 1
1985 curr = len(self) - 1
1986 if not self._inline:
1986 if not self._inline:
1987 transaction.add(self.datafile, offset)
1987 transaction.add(self.datafile, offset)
1988 transaction.add(self.indexfile, curr * len(entry))
1988 transaction.add(self.indexfile, curr * len(entry))
1989 if data[0]:
1989 if data[0]:
1990 dfh.write(data[0])
1990 dfh.write(data[0])
1991 dfh.write(data[1])
1991 dfh.write(data[1])
1992 ifh.write(entry)
1992 ifh.write(entry)
1993 else:
1993 else:
1994 offset += curr * self._io.size
1994 offset += curr * self._io.size
1995 transaction.add(self.indexfile, offset, curr)
1995 transaction.add(self.indexfile, offset, curr)
1996 ifh.write(entry)
1996 ifh.write(entry)
1997 ifh.write(data[0])
1997 ifh.write(data[0])
1998 ifh.write(data[1])
1998 ifh.write(data[1])
1999 self._enforceinlinesize(transaction, ifh)
1999 self._enforceinlinesize(transaction, ifh)
2000
2000
2001 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2001 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2002 """
2002 """
2003 add a delta group
2003 add a delta group
2004
2004
2005 given a set of deltas, add them to the revision log. the
2005 given a set of deltas, add them to the revision log. the
2006 first delta is against its parent, which should be in our
2006 first delta is against its parent, which should be in our
2007 log, the rest are against the previous delta.
2007 log, the rest are against the previous delta.
2008
2008
2009 If ``addrevisioncb`` is defined, it will be called with arguments of
2009 If ``addrevisioncb`` is defined, it will be called with arguments of
2010 this revlog and the node that was added.
2010 this revlog and the node that was added.
2011 """
2011 """
2012
2012
2013 nodes = []
2013 nodes = []
2014
2014
2015 r = len(self)
2015 r = len(self)
2016 end = 0
2016 end = 0
2017 if r:
2017 if r:
2018 end = self.end(r - 1)
2018 end = self.end(r - 1)
2019 ifh = self._indexfp("a+")
2019 ifh = self._indexfp("a+")
2020 isize = r * self._io.size
2020 isize = r * self._io.size
2021 if self._inline:
2021 if self._inline:
2022 transaction.add(self.indexfile, end + isize, r)
2022 transaction.add(self.indexfile, end + isize, r)
2023 dfh = None
2023 dfh = None
2024 else:
2024 else:
2025 transaction.add(self.indexfile, isize, r)
2025 transaction.add(self.indexfile, isize, r)
2026 transaction.add(self.datafile, end)
2026 transaction.add(self.datafile, end)
2027 dfh = self._datafp("a+")
2027 dfh = self._datafp("a+")
2028 def flush():
2028 def flush():
2029 if dfh:
2029 if dfh:
2030 dfh.flush()
2030 dfh.flush()
2031 ifh.flush()
2031 ifh.flush()
2032 try:
2032 try:
2033 deltacomputer = deltautil.deltacomputer(self)
2033 deltacomputer = deltautil.deltacomputer(self)
2034 # loop through our set of deltas
2034 # loop through our set of deltas
2035 for data in deltas:
2035 for data in deltas:
2036 node, p1, p2, linknode, deltabase, delta, flags = data
2036 node, p1, p2, linknode, deltabase, delta, flags = data
2037 link = linkmapper(linknode)
2037 link = linkmapper(linknode)
2038 flags = flags or REVIDX_DEFAULT_FLAGS
2038 flags = flags or REVIDX_DEFAULT_FLAGS
2039
2039
2040 nodes.append(node)
2040 nodes.append(node)
2041
2041
2042 if node in self.nodemap:
2042 if node in self.nodemap:
2043 self._nodeduplicatecallback(transaction, node)
2043 self._nodeduplicatecallback(transaction, node)
2044 # this can happen if two branches make the same change
2044 # this can happen if two branches make the same change
2045 continue
2045 continue
2046
2046
2047 for p in (p1, p2):
2047 for p in (p1, p2):
2048 if p not in self.nodemap:
2048 if p not in self.nodemap:
2049 raise error.LookupError(p, self.indexfile,
2049 raise error.LookupError(p, self.indexfile,
2050 _('unknown parent'))
2050 _('unknown parent'))
2051
2051
2052 if deltabase not in self.nodemap:
2052 if deltabase not in self.nodemap:
2053 raise error.LookupError(deltabase, self.indexfile,
2053 raise error.LookupError(deltabase, self.indexfile,
2054 _('unknown delta base'))
2054 _('unknown delta base'))
2055
2055
2056 baserev = self.rev(deltabase)
2056 baserev = self.rev(deltabase)
2057
2057
2058 if baserev != nullrev and self.iscensored(baserev):
2058 if baserev != nullrev and self.iscensored(baserev):
2059 # if base is censored, delta must be full replacement in a
2059 # if base is censored, delta must be full replacement in a
2060 # single patch operation
2060 # single patch operation
2061 hlen = struct.calcsize(">lll")
2061 hlen = struct.calcsize(">lll")
2062 oldlen = self.rawsize(baserev)
2062 oldlen = self.rawsize(baserev)
2063 newlen = len(delta) - hlen
2063 newlen = len(delta) - hlen
2064 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2064 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2065 raise error.CensoredBaseError(self.indexfile,
2065 raise error.CensoredBaseError(self.indexfile,
2066 self.node(baserev))
2066 self.node(baserev))
2067
2067
2068 if not flags and self._peek_iscensored(baserev, delta, flush):
2068 if not flags and self._peek_iscensored(baserev, delta, flush):
2069 flags |= REVIDX_ISCENSORED
2069 flags |= REVIDX_ISCENSORED
2070
2070
2071 # We assume consumers of addrevisioncb will want to retrieve
2071 # We assume consumers of addrevisioncb will want to retrieve
2072 # the added revision, which will require a call to
2072 # the added revision, which will require a call to
2073 # revision(). revision() will fast path if there is a cache
2073 # revision(). revision() will fast path if there is a cache
2074 # hit. So, we tell _addrevision() to always cache in this case.
2074 # hit. So, we tell _addrevision() to always cache in this case.
2075 # We're only using addgroup() in the context of changegroup
2075 # We're only using addgroup() in the context of changegroup
2076 # generation so the revision data can always be handled as raw
2076 # generation so the revision data can always be handled as raw
2077 # by the flagprocessor.
2077 # by the flagprocessor.
2078 self._addrevision(node, None, transaction, link,
2078 self._addrevision(node, None, transaction, link,
2079 p1, p2, flags, (baserev, delta),
2079 p1, p2, flags, (baserev, delta),
2080 ifh, dfh,
2080 ifh, dfh,
2081 alwayscache=bool(addrevisioncb),
2081 alwayscache=bool(addrevisioncb),
2082 deltacomputer=deltacomputer)
2082 deltacomputer=deltacomputer)
2083
2083
2084 if addrevisioncb:
2084 if addrevisioncb:
2085 addrevisioncb(self, node)
2085 addrevisioncb(self, node)
2086
2086
2087 if not dfh and not self._inline:
2087 if not dfh and not self._inline:
2088 # addrevision switched from inline to conventional
2088 # addrevision switched from inline to conventional
2089 # reopen the index
2089 # reopen the index
2090 ifh.close()
2090 ifh.close()
2091 dfh = self._datafp("a+")
2091 dfh = self._datafp("a+")
2092 ifh = self._indexfp("a+")
2092 ifh = self._indexfp("a+")
2093 finally:
2093 finally:
2094 if dfh:
2094 if dfh:
2095 dfh.close()
2095 dfh.close()
2096 ifh.close()
2096 ifh.close()
2097
2097
2098 return nodes
2098 return nodes
2099
2099
2100 def iscensored(self, rev):
2100 def iscensored(self, rev):
2101 """Check if a file revision is censored."""
2101 """Check if a file revision is censored."""
2102 if not self._censorable:
2102 if not self._censorable:
2103 return False
2103 return False
2104
2104
2105 return self.flags(rev) & REVIDX_ISCENSORED
2105 return self.flags(rev) & REVIDX_ISCENSORED
2106
2106
2107 def _peek_iscensored(self, baserev, delta, flush):
2107 def _peek_iscensored(self, baserev, delta, flush):
2108 """Quickly check if a delta produces a censored revision."""
2108 """Quickly check if a delta produces a censored revision."""
2109 if not self._censorable:
2109 if not self._censorable:
2110 return False
2110 return False
2111
2111
2112 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2112 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2113
2113
2114 def getstrippoint(self, minlink):
2114 def getstrippoint(self, minlink):
2115 """find the minimum rev that must be stripped to strip the linkrev
2115 """find the minimum rev that must be stripped to strip the linkrev
2116
2116
2117 Returns a tuple containing the minimum rev and a set of all revs that
2117 Returns a tuple containing the minimum rev and a set of all revs that
2118 have linkrevs that will be broken by this strip.
2118 have linkrevs that will be broken by this strip.
2119 """
2119 """
2120 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2120 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2121 self.headrevs(),
2121 self.headrevs(),
2122 self.linkrev, self.parentrevs)
2122 self.linkrev, self.parentrevs)
2123
2123
2124 def strip(self, minlink, transaction):
2124 def strip(self, minlink, transaction):
2125 """truncate the revlog on the first revision with a linkrev >= minlink
2125 """truncate the revlog on the first revision with a linkrev >= minlink
2126
2126
2127 This function is called when we're stripping revision minlink and
2127 This function is called when we're stripping revision minlink and
2128 its descendants from the repository.
2128 its descendants from the repository.
2129
2129
2130 We have to remove all revisions with linkrev >= minlink, because
2130 We have to remove all revisions with linkrev >= minlink, because
2131 the equivalent changelog revisions will be renumbered after the
2131 the equivalent changelog revisions will be renumbered after the
2132 strip.
2132 strip.
2133
2133
2134 So we truncate the revlog on the first of these revisions, and
2134 So we truncate the revlog on the first of these revisions, and
2135 trust that the caller has saved the revisions that shouldn't be
2135 trust that the caller has saved the revisions that shouldn't be
2136 removed and that it'll re-add them after this truncation.
2136 removed and that it'll re-add them after this truncation.
2137 """
2137 """
2138 if len(self) == 0:
2138 if len(self) == 0:
2139 return
2139 return
2140
2140
2141 rev, _ = self.getstrippoint(minlink)
2141 rev, _ = self.getstrippoint(minlink)
2142 if rev == len(self):
2142 if rev == len(self):
2143 return
2143 return
2144
2144
2145 # first truncate the files on disk
2145 # first truncate the files on disk
2146 end = self.start(rev)
2146 end = self.start(rev)
2147 if not self._inline:
2147 if not self._inline:
2148 transaction.add(self.datafile, end)
2148 transaction.add(self.datafile, end)
2149 end = rev * self._io.size
2149 end = rev * self._io.size
2150 else:
2150 else:
2151 end += rev * self._io.size
2151 end += rev * self._io.size
2152
2152
2153 transaction.add(self.indexfile, end)
2153 transaction.add(self.indexfile, end)
2154
2154
2155 # then reset internal state in memory to forget those revisions
2155 # then reset internal state in memory to forget those revisions
2156 self._revisioncache = None
2156 self._revisioncache = None
2157 self._chaininfocache = {}
2157 self._chaininfocache = {}
2158 self._chunkclear()
2158 self._chunkclear()
2159 for x in pycompat.xrange(rev, len(self)):
2159 for x in pycompat.xrange(rev, len(self)):
2160 del self.nodemap[self.node(x)]
2160 del self.nodemap[self.node(x)]
2161
2161
2162 del self.index[rev:-1]
2162 del self.index[rev:-1]
2163 self._nodepos = None
2163 self._nodepos = None
2164
2164
2165 def checksize(self):
2165 def checksize(self):
2166 expected = 0
2166 expected = 0
2167 if len(self):
2167 if len(self):
2168 expected = max(0, self.end(len(self) - 1))
2168 expected = max(0, self.end(len(self) - 1))
2169
2169
2170 try:
2170 try:
2171 with self._datafp() as f:
2171 with self._datafp() as f:
2172 f.seek(0, 2)
2172 f.seek(0, 2)
2173 actual = f.tell()
2173 actual = f.tell()
2174 dd = actual - expected
2174 dd = actual - expected
2175 except IOError as inst:
2175 except IOError as inst:
2176 if inst.errno != errno.ENOENT:
2176 if inst.errno != errno.ENOENT:
2177 raise
2177 raise
2178 dd = 0
2178 dd = 0
2179
2179
2180 try:
2180 try:
2181 f = self.opener(self.indexfile)
2181 f = self.opener(self.indexfile)
2182 f.seek(0, 2)
2182 f.seek(0, 2)
2183 actual = f.tell()
2183 actual = f.tell()
2184 f.close()
2184 f.close()
2185 s = self._io.size
2185 s = self._io.size
2186 i = max(0, actual // s)
2186 i = max(0, actual // s)
2187 di = actual - (i * s)
2187 di = actual - (i * s)
2188 if self._inline:
2188 if self._inline:
2189 databytes = 0
2189 databytes = 0
2190 for r in self:
2190 for r in self:
2191 databytes += max(0, self.length(r))
2191 databytes += max(0, self.length(r))
2192 dd = 0
2192 dd = 0
2193 di = actual - len(self) * s - databytes
2193 di = actual - len(self) * s - databytes
2194 except IOError as inst:
2194 except IOError as inst:
2195 if inst.errno != errno.ENOENT:
2195 if inst.errno != errno.ENOENT:
2196 raise
2196 raise
2197 di = 0
2197 di = 0
2198
2198
2199 return (dd, di)
2199 return (dd, di)
2200
2200
2201 def files(self):
2201 def files(self):
2202 res = [self.indexfile]
2202 res = [self.indexfile]
2203 if not self._inline:
2203 if not self._inline:
2204 res.append(self.datafile)
2204 res.append(self.datafile)
2205 return res
2205 return res
2206
2206
2207 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2207 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2208 assumehaveparentrevisions=False, deltaprevious=False):
2208 assumehaveparentrevisions=False, deltaprevious=False):
2209 if nodesorder not in ('nodes', 'storage', None):
2209 if nodesorder not in ('nodes', 'storage', 'linear', None):
2210 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2210 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2211 nodesorder)
2211 nodesorder)
2212
2212
2213 if nodesorder is None and not self._generaldelta:
2213 if nodesorder is None and not self._generaldelta:
2214 nodesorder = 'storage'
2214 nodesorder = 'storage'
2215
2215
2216 return storageutil.emitrevisions(
2216 return storageutil.emitrevisions(
2217 self, nodes, nodesorder, revlogrevisiondelta,
2217 self, nodes, nodesorder, revlogrevisiondelta,
2218 deltaparentfn=self.deltaparent,
2218 deltaparentfn=self.deltaparent,
2219 candeltafn=self.candelta,
2219 candeltafn=self.candelta,
2220 rawsizefn=self.rawsize,
2220 rawsizefn=self.rawsize,
2221 revdifffn=self.revdiff,
2221 revdifffn=self.revdiff,
2222 flagsfn=self.flags,
2222 flagsfn=self.flags,
2223 sendfulltext=not self._storedeltachains,
2223 sendfulltext=not self._storedeltachains,
2224 revisiondata=revisiondata,
2224 revisiondata=revisiondata,
2225 assumehaveparentrevisions=assumehaveparentrevisions,
2225 assumehaveparentrevisions=assumehaveparentrevisions,
2226 deltaprevious=deltaprevious)
2226 deltaprevious=deltaprevious)
2227
2227
2228 DELTAREUSEALWAYS = 'always'
2228 DELTAREUSEALWAYS = 'always'
2229 DELTAREUSESAMEREVS = 'samerevs'
2229 DELTAREUSESAMEREVS = 'samerevs'
2230 DELTAREUSENEVER = 'never'
2230 DELTAREUSENEVER = 'never'
2231
2231
2232 DELTAREUSEFULLADD = 'fulladd'
2232 DELTAREUSEFULLADD = 'fulladd'
2233
2233
2234 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2234 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2235
2235
2236 def clone(self, tr, destrevlog, addrevisioncb=None,
2236 def clone(self, tr, destrevlog, addrevisioncb=None,
2237 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2237 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2238 """Copy this revlog to another, possibly with format changes.
2238 """Copy this revlog to another, possibly with format changes.
2239
2239
2240 The destination revlog will contain the same revisions and nodes.
2240 The destination revlog will contain the same revisions and nodes.
2241 However, it may not be bit-for-bit identical due to e.g. delta encoding
2241 However, it may not be bit-for-bit identical due to e.g. delta encoding
2242 differences.
2242 differences.
2243
2243
2244 The ``deltareuse`` argument control how deltas from the existing revlog
2244 The ``deltareuse`` argument control how deltas from the existing revlog
2245 are preserved in the destination revlog. The argument can have the
2245 are preserved in the destination revlog. The argument can have the
2246 following values:
2246 following values:
2247
2247
2248 DELTAREUSEALWAYS
2248 DELTAREUSEALWAYS
2249 Deltas will always be reused (if possible), even if the destination
2249 Deltas will always be reused (if possible), even if the destination
2250 revlog would not select the same revisions for the delta. This is the
2250 revlog would not select the same revisions for the delta. This is the
2251 fastest mode of operation.
2251 fastest mode of operation.
2252 DELTAREUSESAMEREVS
2252 DELTAREUSESAMEREVS
2253 Deltas will be reused if the destination revlog would pick the same
2253 Deltas will be reused if the destination revlog would pick the same
2254 revisions for the delta. This mode strikes a balance between speed
2254 revisions for the delta. This mode strikes a balance between speed
2255 and optimization.
2255 and optimization.
2256 DELTAREUSENEVER
2256 DELTAREUSENEVER
2257 Deltas will never be reused. This is the slowest mode of execution.
2257 Deltas will never be reused. This is the slowest mode of execution.
2258 This mode can be used to recompute deltas (e.g. if the diff/delta
2258 This mode can be used to recompute deltas (e.g. if the diff/delta
2259 algorithm changes).
2259 algorithm changes).
2260
2260
2261 Delta computation can be slow, so the choice of delta reuse policy can
2261 Delta computation can be slow, so the choice of delta reuse policy can
2262 significantly affect run time.
2262 significantly affect run time.
2263
2263
2264 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2264 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2265 two extremes. Deltas will be reused if they are appropriate. But if the
2265 two extremes. Deltas will be reused if they are appropriate. But if the
2266 delta could choose a better revision, it will do so. This means if you
2266 delta could choose a better revision, it will do so. This means if you
2267 are converting a non-generaldelta revlog to a generaldelta revlog,
2267 are converting a non-generaldelta revlog to a generaldelta revlog,
2268 deltas will be recomputed if the delta's parent isn't a parent of the
2268 deltas will be recomputed if the delta's parent isn't a parent of the
2269 revision.
2269 revision.
2270
2270
2271 In addition to the delta policy, the ``deltabothparents`` argument
2271 In addition to the delta policy, the ``deltabothparents`` argument
2272 controls whether to compute deltas against both parents for merges.
2272 controls whether to compute deltas against both parents for merges.
2273 By default, the current default is used.
2273 By default, the current default is used.
2274 """
2274 """
2275 if deltareuse not in self.DELTAREUSEALL:
2275 if deltareuse not in self.DELTAREUSEALL:
2276 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2276 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2277
2277
2278 if len(destrevlog):
2278 if len(destrevlog):
2279 raise ValueError(_('destination revlog is not empty'))
2279 raise ValueError(_('destination revlog is not empty'))
2280
2280
2281 if getattr(self, 'filteredrevs', None):
2281 if getattr(self, 'filteredrevs', None):
2282 raise ValueError(_('source revlog has filtered revisions'))
2282 raise ValueError(_('source revlog has filtered revisions'))
2283 if getattr(destrevlog, 'filteredrevs', None):
2283 if getattr(destrevlog, 'filteredrevs', None):
2284 raise ValueError(_('destination revlog has filtered revisions'))
2284 raise ValueError(_('destination revlog has filtered revisions'))
2285
2285
2286 # lazydeltabase controls whether to reuse a cached delta, if possible.
2286 # lazydeltabase controls whether to reuse a cached delta, if possible.
2287 oldlazydeltabase = destrevlog._lazydeltabase
2287 oldlazydeltabase = destrevlog._lazydeltabase
2288 oldamd = destrevlog._deltabothparents
2288 oldamd = destrevlog._deltabothparents
2289
2289
2290 try:
2290 try:
2291 if deltareuse == self.DELTAREUSEALWAYS:
2291 if deltareuse == self.DELTAREUSEALWAYS:
2292 destrevlog._lazydeltabase = True
2292 destrevlog._lazydeltabase = True
2293 elif deltareuse == self.DELTAREUSESAMEREVS:
2293 elif deltareuse == self.DELTAREUSESAMEREVS:
2294 destrevlog._lazydeltabase = False
2294 destrevlog._lazydeltabase = False
2295
2295
2296 destrevlog._deltabothparents = deltabothparents or oldamd
2296 destrevlog._deltabothparents = deltabothparents or oldamd
2297
2297
2298 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2298 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2299 self.DELTAREUSESAMEREVS)
2299 self.DELTAREUSESAMEREVS)
2300
2300
2301 deltacomputer = deltautil.deltacomputer(destrevlog)
2301 deltacomputer = deltautil.deltacomputer(destrevlog)
2302 index = self.index
2302 index = self.index
2303 for rev in self:
2303 for rev in self:
2304 entry = index[rev]
2304 entry = index[rev]
2305
2305
2306 # Some classes override linkrev to take filtered revs into
2306 # Some classes override linkrev to take filtered revs into
2307 # account. Use raw entry from index.
2307 # account. Use raw entry from index.
2308 flags = entry[0] & 0xffff
2308 flags = entry[0] & 0xffff
2309 linkrev = entry[4]
2309 linkrev = entry[4]
2310 p1 = index[entry[5]][7]
2310 p1 = index[entry[5]][7]
2311 p2 = index[entry[6]][7]
2311 p2 = index[entry[6]][7]
2312 node = entry[7]
2312 node = entry[7]
2313
2313
2314 # (Possibly) reuse the delta from the revlog if allowed and
2314 # (Possibly) reuse the delta from the revlog if allowed and
2315 # the revlog chunk is a delta.
2315 # the revlog chunk is a delta.
2316 cachedelta = None
2316 cachedelta = None
2317 rawtext = None
2317 rawtext = None
2318 if populatecachedelta:
2318 if populatecachedelta:
2319 dp = self.deltaparent(rev)
2319 dp = self.deltaparent(rev)
2320 if dp != nullrev:
2320 if dp != nullrev:
2321 cachedelta = (dp, bytes(self._chunk(rev)))
2321 cachedelta = (dp, bytes(self._chunk(rev)))
2322
2322
2323 if not cachedelta:
2323 if not cachedelta:
2324 rawtext = self.revision(rev, raw=True)
2324 rawtext = self.revision(rev, raw=True)
2325
2325
2326
2326
2327 if deltareuse == self.DELTAREUSEFULLADD:
2327 if deltareuse == self.DELTAREUSEFULLADD:
2328 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2328 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2329 cachedelta=cachedelta,
2329 cachedelta=cachedelta,
2330 node=node, flags=flags,
2330 node=node, flags=flags,
2331 deltacomputer=deltacomputer)
2331 deltacomputer=deltacomputer)
2332 else:
2332 else:
2333 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2333 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2334 checkambig=False)
2334 checkambig=False)
2335 dfh = None
2335 dfh = None
2336 if not destrevlog._inline:
2336 if not destrevlog._inline:
2337 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2337 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2338 try:
2338 try:
2339 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2339 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2340 p2, flags, cachedelta, ifh, dfh,
2340 p2, flags, cachedelta, ifh, dfh,
2341 deltacomputer=deltacomputer)
2341 deltacomputer=deltacomputer)
2342 finally:
2342 finally:
2343 if dfh:
2343 if dfh:
2344 dfh.close()
2344 dfh.close()
2345 ifh.close()
2345 ifh.close()
2346
2346
2347 if addrevisioncb:
2347 if addrevisioncb:
2348 addrevisioncb(self, rev, node)
2348 addrevisioncb(self, rev, node)
2349 finally:
2349 finally:
2350 destrevlog._lazydeltabase = oldlazydeltabase
2350 destrevlog._lazydeltabase = oldlazydeltabase
2351 destrevlog._deltabothparents = oldamd
2351 destrevlog._deltabothparents = oldamd
2352
2352
2353 def censorrevision(self, tr, censornode, tombstone=b''):
2353 def censorrevision(self, tr, censornode, tombstone=b''):
2354 if (self.version & 0xFFFF) == REVLOGV0:
2354 if (self.version & 0xFFFF) == REVLOGV0:
2355 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2355 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2356 self.version)
2356 self.version)
2357
2357
2358 censorrev = self.rev(censornode)
2358 censorrev = self.rev(censornode)
2359 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2359 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2360
2360
2361 if len(tombstone) > self.rawsize(censorrev):
2361 if len(tombstone) > self.rawsize(censorrev):
2362 raise error.Abort(_('censor tombstone must be no longer than '
2362 raise error.Abort(_('censor tombstone must be no longer than '
2363 'censored data'))
2363 'censored data'))
2364
2364
2365 # Rewriting the revlog in place is hard. Our strategy for censoring is
2365 # Rewriting the revlog in place is hard. Our strategy for censoring is
2366 # to create a new revlog, copy all revisions to it, then replace the
2366 # to create a new revlog, copy all revisions to it, then replace the
2367 # revlogs on transaction close.
2367 # revlogs on transaction close.
2368
2368
2369 newindexfile = self.indexfile + b'.tmpcensored'
2369 newindexfile = self.indexfile + b'.tmpcensored'
2370 newdatafile = self.datafile + b'.tmpcensored'
2370 newdatafile = self.datafile + b'.tmpcensored'
2371
2371
2372 # This is a bit dangerous. We could easily have a mismatch of state.
2372 # This is a bit dangerous. We could easily have a mismatch of state.
2373 newrl = revlog(self.opener, newindexfile, newdatafile,
2373 newrl = revlog(self.opener, newindexfile, newdatafile,
2374 censorable=True)
2374 censorable=True)
2375 newrl.version = self.version
2375 newrl.version = self.version
2376 newrl._generaldelta = self._generaldelta
2376 newrl._generaldelta = self._generaldelta
2377 newrl._io = self._io
2377 newrl._io = self._io
2378
2378
2379 for rev in self.revs():
2379 for rev in self.revs():
2380 node = self.node(rev)
2380 node = self.node(rev)
2381 p1, p2 = self.parents(node)
2381 p1, p2 = self.parents(node)
2382
2382
2383 if rev == censorrev:
2383 if rev == censorrev:
2384 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2384 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2385 p1, p2, censornode, REVIDX_ISCENSORED)
2385 p1, p2, censornode, REVIDX_ISCENSORED)
2386
2386
2387 if newrl.deltaparent(rev) != nullrev:
2387 if newrl.deltaparent(rev) != nullrev:
2388 raise error.Abort(_('censored revision stored as delta; '
2388 raise error.Abort(_('censored revision stored as delta; '
2389 'cannot censor'),
2389 'cannot censor'),
2390 hint=_('censoring of revlogs is not '
2390 hint=_('censoring of revlogs is not '
2391 'fully implemented; please report '
2391 'fully implemented; please report '
2392 'this bug'))
2392 'this bug'))
2393 continue
2393 continue
2394
2394
2395 if self.iscensored(rev):
2395 if self.iscensored(rev):
2396 if self.deltaparent(rev) != nullrev:
2396 if self.deltaparent(rev) != nullrev:
2397 raise error.Abort(_('cannot censor due to censored '
2397 raise error.Abort(_('cannot censor due to censored '
2398 'revision having delta stored'))
2398 'revision having delta stored'))
2399 rawtext = self._chunk(rev)
2399 rawtext = self._chunk(rev)
2400 else:
2400 else:
2401 rawtext = self.revision(rev, raw=True)
2401 rawtext = self.revision(rev, raw=True)
2402
2402
2403 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2403 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2404 self.flags(rev))
2404 self.flags(rev))
2405
2405
2406 tr.addbackup(self.indexfile, location='store')
2406 tr.addbackup(self.indexfile, location='store')
2407 if not self._inline:
2407 if not self._inline:
2408 tr.addbackup(self.datafile, location='store')
2408 tr.addbackup(self.datafile, location='store')
2409
2409
2410 self.opener.rename(newrl.indexfile, self.indexfile)
2410 self.opener.rename(newrl.indexfile, self.indexfile)
2411 if not self._inline:
2411 if not self._inline:
2412 self.opener.rename(newrl.datafile, self.datafile)
2412 self.opener.rename(newrl.datafile, self.datafile)
2413
2413
2414 self.clearcaches()
2414 self.clearcaches()
2415 self._loadindex(self.version, None)
2415 self._loadindex(self.version, None)
2416
2416
2417 def verifyintegrity(self, state):
2417 def verifyintegrity(self, state):
2418 """Verifies the integrity of the revlog.
2418 """Verifies the integrity of the revlog.
2419
2419
2420 Yields ``revlogproblem`` instances describing problems that are
2420 Yields ``revlogproblem`` instances describing problems that are
2421 found.
2421 found.
2422 """
2422 """
2423 dd, di = self.checksize()
2423 dd, di = self.checksize()
2424 if dd:
2424 if dd:
2425 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2425 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2426 if di:
2426 if di:
2427 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2427 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2428
2428
2429 version = self.version & 0xFFFF
2429 version = self.version & 0xFFFF
2430
2430
2431 # The verifier tells us what version revlog we should be.
2431 # The verifier tells us what version revlog we should be.
2432 if version != state['expectedversion']:
2432 if version != state['expectedversion']:
2433 yield revlogproblem(
2433 yield revlogproblem(
2434 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2434 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2435 (self.indexfile, version, state['expectedversion']))
2435 (self.indexfile, version, state['expectedversion']))
2436
2436
2437 state['skipread'] = set()
2437 state['skipread'] = set()
2438
2438
2439 for rev in self:
2439 for rev in self:
2440 node = self.node(rev)
2440 node = self.node(rev)
2441
2441
2442 # Verify contents. 4 cases to care about:
2442 # Verify contents. 4 cases to care about:
2443 #
2443 #
2444 # common: the most common case
2444 # common: the most common case
2445 # rename: with a rename
2445 # rename: with a rename
2446 # meta: file content starts with b'\1\n', the metadata
2446 # meta: file content starts with b'\1\n', the metadata
2447 # header defined in filelog.py, but without a rename
2447 # header defined in filelog.py, but without a rename
2448 # ext: content stored externally
2448 # ext: content stored externally
2449 #
2449 #
2450 # More formally, their differences are shown below:
2450 # More formally, their differences are shown below:
2451 #
2451 #
2452 # | common | rename | meta | ext
2452 # | common | rename | meta | ext
2453 # -------------------------------------------------------
2453 # -------------------------------------------------------
2454 # flags() | 0 | 0 | 0 | not 0
2454 # flags() | 0 | 0 | 0 | not 0
2455 # renamed() | False | True | False | ?
2455 # renamed() | False | True | False | ?
2456 # rawtext[0:2]=='\1\n'| False | True | True | ?
2456 # rawtext[0:2]=='\1\n'| False | True | True | ?
2457 #
2457 #
2458 # "rawtext" means the raw text stored in revlog data, which
2458 # "rawtext" means the raw text stored in revlog data, which
2459 # could be retrieved by "revision(rev, raw=True)". "text"
2459 # could be retrieved by "revision(rev, raw=True)". "text"
2460 # mentioned below is "revision(rev, raw=False)".
2460 # mentioned below is "revision(rev, raw=False)".
2461 #
2461 #
2462 # There are 3 different lengths stored physically:
2462 # There are 3 different lengths stored physically:
2463 # 1. L1: rawsize, stored in revlog index
2463 # 1. L1: rawsize, stored in revlog index
2464 # 2. L2: len(rawtext), stored in revlog data
2464 # 2. L2: len(rawtext), stored in revlog data
2465 # 3. L3: len(text), stored in revlog data if flags==0, or
2465 # 3. L3: len(text), stored in revlog data if flags==0, or
2466 # possibly somewhere else if flags!=0
2466 # possibly somewhere else if flags!=0
2467 #
2467 #
2468 # L1 should be equal to L2. L3 could be different from them.
2468 # L1 should be equal to L2. L3 could be different from them.
2469 # "text" may or may not affect commit hash depending on flag
2469 # "text" may or may not affect commit hash depending on flag
2470 # processors (see revlog.addflagprocessor).
2470 # processors (see revlog.addflagprocessor).
2471 #
2471 #
2472 # | common | rename | meta | ext
2472 # | common | rename | meta | ext
2473 # -------------------------------------------------
2473 # -------------------------------------------------
2474 # rawsize() | L1 | L1 | L1 | L1
2474 # rawsize() | L1 | L1 | L1 | L1
2475 # size() | L1 | L2-LM | L1(*) | L1 (?)
2475 # size() | L1 | L2-LM | L1(*) | L1 (?)
2476 # len(rawtext) | L2 | L2 | L2 | L2
2476 # len(rawtext) | L2 | L2 | L2 | L2
2477 # len(text) | L2 | L2 | L2 | L3
2477 # len(text) | L2 | L2 | L2 | L3
2478 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2478 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2479 #
2479 #
2480 # LM: length of metadata, depending on rawtext
2480 # LM: length of metadata, depending on rawtext
2481 # (*): not ideal, see comment in filelog.size
2481 # (*): not ideal, see comment in filelog.size
2482 # (?): could be "- len(meta)" if the resolved content has
2482 # (?): could be "- len(meta)" if the resolved content has
2483 # rename metadata
2483 # rename metadata
2484 #
2484 #
2485 # Checks needed to be done:
2485 # Checks needed to be done:
2486 # 1. length check: L1 == L2, in all cases.
2486 # 1. length check: L1 == L2, in all cases.
2487 # 2. hash check: depending on flag processor, we may need to
2487 # 2. hash check: depending on flag processor, we may need to
2488 # use either "text" (external), or "rawtext" (in revlog).
2488 # use either "text" (external), or "rawtext" (in revlog).
2489
2489
2490 try:
2490 try:
2491 skipflags = state.get('skipflags', 0)
2491 skipflags = state.get('skipflags', 0)
2492 if skipflags:
2492 if skipflags:
2493 skipflags &= self.flags(rev)
2493 skipflags &= self.flags(rev)
2494
2494
2495 if skipflags:
2495 if skipflags:
2496 state['skipread'].add(node)
2496 state['skipread'].add(node)
2497 else:
2497 else:
2498 # Side-effect: read content and verify hash.
2498 # Side-effect: read content and verify hash.
2499 self.revision(node)
2499 self.revision(node)
2500
2500
2501 l1 = self.rawsize(rev)
2501 l1 = self.rawsize(rev)
2502 l2 = len(self.revision(node, raw=True))
2502 l2 = len(self.revision(node, raw=True))
2503
2503
2504 if l1 != l2:
2504 if l1 != l2:
2505 yield revlogproblem(
2505 yield revlogproblem(
2506 error=_('unpacked size is %d, %d expected') % (l2, l1),
2506 error=_('unpacked size is %d, %d expected') % (l2, l1),
2507 node=node)
2507 node=node)
2508
2508
2509 except error.CensoredNodeError:
2509 except error.CensoredNodeError:
2510 if state['erroroncensored']:
2510 if state['erroroncensored']:
2511 yield revlogproblem(error=_('censored file data'),
2511 yield revlogproblem(error=_('censored file data'),
2512 node=node)
2512 node=node)
2513 state['skipread'].add(node)
2513 state['skipread'].add(node)
2514 except Exception as e:
2514 except Exception as e:
2515 yield revlogproblem(
2515 yield revlogproblem(
2516 error=_('unpacking %s: %s') % (short(node),
2516 error=_('unpacking %s: %s') % (short(node),
2517 stringutil.forcebytestr(e)),
2517 stringutil.forcebytestr(e)),
2518 node=node)
2518 node=node)
2519 state['skipread'].add(node)
2519 state['skipread'].add(node)
2520
2520
2521 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2521 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2522 revisionscount=False, trackedsize=False,
2522 revisionscount=False, trackedsize=False,
2523 storedsize=False):
2523 storedsize=False):
2524 d = {}
2524 d = {}
2525
2525
2526 if exclusivefiles:
2526 if exclusivefiles:
2527 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2527 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2528 if not self._inline:
2528 if not self._inline:
2529 d['exclusivefiles'].append((self.opener, self.datafile))
2529 d['exclusivefiles'].append((self.opener, self.datafile))
2530
2530
2531 if sharedfiles:
2531 if sharedfiles:
2532 d['sharedfiles'] = []
2532 d['sharedfiles'] = []
2533
2533
2534 if revisionscount:
2534 if revisionscount:
2535 d['revisionscount'] = len(self)
2535 d['revisionscount'] = len(self)
2536
2536
2537 if trackedsize:
2537 if trackedsize:
2538 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2538 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2539
2539
2540 if storedsize:
2540 if storedsize:
2541 d['storedsize'] = sum(self.opener.stat(path).st_size
2541 d['storedsize'] = sum(self.opener.stat(path).st_size
2542 for path in self.files())
2542 for path in self.files())
2543
2543
2544 return d
2544 return d
General Comments 0
You need to be logged in to leave comments. Login now