##// END OF EJS Templates
revlog: subclass the new `repository.iverifyproblem` Protocol class...
Matt Harbison -
r53365:4ef6dbc2 default
parent child Browse files
Show More
@@ -1,1355 +1,1358
1 # sqlitestore.py - Storage backend that uses SQLite
1 # sqlitestore.py - Storage backend that uses SQLite
2 #
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """store repository data in SQLite (EXPERIMENTAL)
8 """store repository data in SQLite (EXPERIMENTAL)
9
9
10 The sqlitestore extension enables the storage of repository data in SQLite.
10 The sqlitestore extension enables the storage of repository data in SQLite.
11
11
12 This extension is HIGHLY EXPERIMENTAL. There are NO BACKWARDS COMPATIBILITY
12 This extension is HIGHLY EXPERIMENTAL. There are NO BACKWARDS COMPATIBILITY
13 GUARANTEES. This means that repositories created with this extension may
13 GUARANTEES. This means that repositories created with this extension may
14 only be usable with the exact version of this extension/Mercurial that was
14 only be usable with the exact version of this extension/Mercurial that was
15 used. The extension attempts to enforce this in order to prevent repository
15 used. The extension attempts to enforce this in order to prevent repository
16 corruption.
16 corruption.
17
17
18 In addition, several features are not yet supported or have known bugs:
18 In addition, several features are not yet supported or have known bugs:
19
19
20 * Only some data is stored in SQLite. Changeset, manifest, and other repository
20 * Only some data is stored in SQLite. Changeset, manifest, and other repository
21 data is not yet stored in SQLite.
21 data is not yet stored in SQLite.
22 * Transactions are not robust. If the process is aborted at the right time
22 * Transactions are not robust. If the process is aborted at the right time
23 during transaction close/rollback, the repository could be in an inconsistent
23 during transaction close/rollback, the repository could be in an inconsistent
24 state. This problem will diminish once all repository data is tracked by
24 state. This problem will diminish once all repository data is tracked by
25 SQLite.
25 SQLite.
26 * Bundle repositories do not work (the ability to use e.g.
26 * Bundle repositories do not work (the ability to use e.g.
27 `hg -R <bundle-file> log` to automatically overlay a bundle on top of the
27 `hg -R <bundle-file> log` to automatically overlay a bundle on top of the
28 existing repository).
28 existing repository).
29 * Various other features don't work.
29 * Various other features don't work.
30
30
31 This extension should work for basic clone/pull, update, and commit workflows.
31 This extension should work for basic clone/pull, update, and commit workflows.
32 Some history rewriting operations may fail due to lack of support for bundle
32 Some history rewriting operations may fail due to lack of support for bundle
33 repositories.
33 repositories.
34
34
35 To use, activate the extension and set the ``storage.new-repo-backend`` config
35 To use, activate the extension and set the ``storage.new-repo-backend`` config
36 option to ``sqlite`` to enable new repositories to use SQLite for storage.
36 option to ``sqlite`` to enable new repositories to use SQLite for storage.
37 """
37 """
38
38
39 # To run the test suite with repos using SQLite by default, execute the
39 # To run the test suite with repos using SQLite by default, execute the
40 # following:
40 # following:
41 #
41 #
42 # HGREPOFEATURES="sqlitestore" run-tests.py \
42 # HGREPOFEATURES="sqlitestore" run-tests.py \
43 # --extra-config-opt extensions.sqlitestore= \
43 # --extra-config-opt extensions.sqlitestore= \
44 # --extra-config-opt storage.new-repo-backend=sqlite
44 # --extra-config-opt storage.new-repo-backend=sqlite
45
45
46 from __future__ import annotations
46 from __future__ import annotations
47
47
48 import sqlite3
48 import sqlite3
49 import struct
49 import struct
50 import threading
50 import threading
51 import typing
51 import typing
52 import zlib
52 import zlib
53
53
54 from typing import (
55 Optional,
56 )
57
54 from mercurial.i18n import _
58 from mercurial.i18n import _
55 from mercurial.node import (
59 from mercurial.node import (
56 nullrev,
60 nullrev,
57 sha1nodeconstants,
61 sha1nodeconstants,
58 short,
62 short,
59 )
63 )
60 from mercurial.thirdparty import attr
64 from mercurial.thirdparty import attr
61
65
62 # Force pytype to use the non-vendored package
66 # Force pytype to use the non-vendored package
63 if typing.TYPE_CHECKING:
67 if typing.TYPE_CHECKING:
64 # noinspection PyPackageRequirements
68 # noinspection PyPackageRequirements
65 import attr
69 import attr
66
70
67 from mercurial import (
71 from mercurial import (
68 ancestor,
72 ancestor,
69 dagop,
73 dagop,
70 encoding,
74 encoding,
71 error,
75 error,
72 extensions,
76 extensions,
73 localrepo,
77 localrepo,
74 mdiff,
78 mdiff,
75 pycompat,
79 pycompat,
76 registrar,
80 registrar,
77 requirements,
81 requirements,
78 util,
82 util,
79 verify,
83 verify,
80 )
84 )
81 from mercurial.interfaces import (
85 from mercurial.interfaces import (
82 repository,
86 repository,
83 util as interfaceutil,
87 util as interfaceutil,
84 )
88 )
85 from mercurial.utils import (
89 from mercurial.utils import (
86 hashutil,
90 hashutil,
87 storageutil,
91 storageutil,
88 )
92 )
89
93
90 try:
94 try:
91 from mercurial import zstd # pytype: disable=import-error
95 from mercurial import zstd # pytype: disable=import-error
92
96
93 zstd.__version__
97 zstd.__version__
94 except ImportError:
98 except ImportError:
95 zstd = None
99 zstd = None
96
100
97 configtable = {}
101 configtable = {}
98 configitem = registrar.configitem(configtable)
102 configitem = registrar.configitem(configtable)
99
103
100 # experimental config: storage.sqlite.compression
104 # experimental config: storage.sqlite.compression
101 configitem(
105 configitem(
102 b'storage',
106 b'storage',
103 b'sqlite.compression',
107 b'sqlite.compression',
104 default=b'zstd' if zstd else b'zlib',
108 default=b'zstd' if zstd else b'zlib',
105 experimental=True,
109 experimental=True,
106 )
110 )
107
111
108 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
112 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
109 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
113 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
110 # be specifying the version(s) of Mercurial they are tested with, or
114 # be specifying the version(s) of Mercurial they are tested with, or
111 # leave the attribute unspecified.
115 # leave the attribute unspecified.
112 testedwith = b'ships-with-hg-core'
116 testedwith = b'ships-with-hg-core'
113
117
114 REQUIREMENT = b'exp-sqlite-001'
118 REQUIREMENT = b'exp-sqlite-001'
115 REQUIREMENT_ZSTD = b'exp-sqlite-comp-001=zstd'
119 REQUIREMENT_ZSTD = b'exp-sqlite-comp-001=zstd'
116 REQUIREMENT_ZLIB = b'exp-sqlite-comp-001=zlib'
120 REQUIREMENT_ZLIB = b'exp-sqlite-comp-001=zlib'
117 REQUIREMENT_NONE = b'exp-sqlite-comp-001=none'
121 REQUIREMENT_NONE = b'exp-sqlite-comp-001=none'
118 REQUIREMENT_SHALLOW_FILES = b'exp-sqlite-shallow-files'
122 REQUIREMENT_SHALLOW_FILES = b'exp-sqlite-shallow-files'
119
123
120 CURRENT_SCHEMA_VERSION = 1
124 CURRENT_SCHEMA_VERSION = 1
121
125
122 COMPRESSION_NONE = 1
126 COMPRESSION_NONE = 1
123 COMPRESSION_ZSTD = 2
127 COMPRESSION_ZSTD = 2
124 COMPRESSION_ZLIB = 3
128 COMPRESSION_ZLIB = 3
125
129
126 FLAG_CENSORED = 1
130 FLAG_CENSORED = 1
127 FLAG_MISSING_P1 = 2
131 FLAG_MISSING_P1 = 2
128 FLAG_MISSING_P2 = 4
132 FLAG_MISSING_P2 = 4
129
133
130 CREATE_SCHEMA = [
134 CREATE_SCHEMA = [
131 # Deltas are stored as content-indexed blobs.
135 # Deltas are stored as content-indexed blobs.
132 # compression column holds COMPRESSION_* constant for how the
136 # compression column holds COMPRESSION_* constant for how the
133 # delta is encoded.
137 # delta is encoded.
134 'CREATE TABLE delta ('
138 'CREATE TABLE delta ('
135 ' id INTEGER PRIMARY KEY, '
139 ' id INTEGER PRIMARY KEY, '
136 ' compression INTEGER NOT NULL, '
140 ' compression INTEGER NOT NULL, '
137 ' hash BLOB UNIQUE ON CONFLICT ABORT, '
141 ' hash BLOB UNIQUE ON CONFLICT ABORT, '
138 ' delta BLOB NOT NULL '
142 ' delta BLOB NOT NULL '
139 ')',
143 ')',
140 # Tracked paths are denormalized to integers to avoid redundant
144 # Tracked paths are denormalized to integers to avoid redundant
141 # storage of the path name.
145 # storage of the path name.
142 'CREATE TABLE filepath ('
146 'CREATE TABLE filepath ('
143 ' id INTEGER PRIMARY KEY, '
147 ' id INTEGER PRIMARY KEY, '
144 ' path BLOB NOT NULL '
148 ' path BLOB NOT NULL '
145 ')',
149 ')',
146 'CREATE UNIQUE INDEX filepath_path ON filepath (path)',
150 'CREATE UNIQUE INDEX filepath_path ON filepath (path)',
147 # We have a single table for all file revision data.
151 # We have a single table for all file revision data.
148 # Each file revision is uniquely described by a (path, rev) and
152 # Each file revision is uniquely described by a (path, rev) and
149 # (path, node).
153 # (path, node).
150 #
154 #
151 # Revision data is stored as a pointer to the delta producing this
155 # Revision data is stored as a pointer to the delta producing this
152 # revision and the file revision whose delta should be applied before
156 # revision and the file revision whose delta should be applied before
153 # that one. One can reconstruct the delta chain by recursively following
157 # that one. One can reconstruct the delta chain by recursively following
154 # the delta base revision pointers until one encounters NULL.
158 # the delta base revision pointers until one encounters NULL.
155 #
159 #
156 # flags column holds bitwise integer flags controlling storage options.
160 # flags column holds bitwise integer flags controlling storage options.
157 # These flags are defined by the FLAG_* constants.
161 # These flags are defined by the FLAG_* constants.
158 'CREATE TABLE fileindex ('
162 'CREATE TABLE fileindex ('
159 ' id INTEGER PRIMARY KEY, '
163 ' id INTEGER PRIMARY KEY, '
160 ' pathid INTEGER REFERENCES filepath(id), '
164 ' pathid INTEGER REFERENCES filepath(id), '
161 ' revnum INTEGER NOT NULL, '
165 ' revnum INTEGER NOT NULL, '
162 ' p1rev INTEGER NOT NULL, '
166 ' p1rev INTEGER NOT NULL, '
163 ' p2rev INTEGER NOT NULL, '
167 ' p2rev INTEGER NOT NULL, '
164 ' linkrev INTEGER NOT NULL, '
168 ' linkrev INTEGER NOT NULL, '
165 ' flags INTEGER NOT NULL, '
169 ' flags INTEGER NOT NULL, '
166 ' deltaid INTEGER REFERENCES delta(id), '
170 ' deltaid INTEGER REFERENCES delta(id), '
167 ' deltabaseid INTEGER REFERENCES fileindex(id), '
171 ' deltabaseid INTEGER REFERENCES fileindex(id), '
168 ' node BLOB NOT NULL '
172 ' node BLOB NOT NULL '
169 ')',
173 ')',
170 'CREATE UNIQUE INDEX fileindex_pathrevnum '
174 'CREATE UNIQUE INDEX fileindex_pathrevnum '
171 ' ON fileindex (pathid, revnum)',
175 ' ON fileindex (pathid, revnum)',
172 'CREATE UNIQUE INDEX fileindex_pathnode ON fileindex (pathid, node)',
176 'CREATE UNIQUE INDEX fileindex_pathnode ON fileindex (pathid, node)',
173 # Provide a view over all file data for convenience.
177 # Provide a view over all file data for convenience.
174 'CREATE VIEW filedata AS '
178 'CREATE VIEW filedata AS '
175 'SELECT '
179 'SELECT '
176 ' fileindex.id AS id, '
180 ' fileindex.id AS id, '
177 ' filepath.id AS pathid, '
181 ' filepath.id AS pathid, '
178 ' filepath.path AS path, '
182 ' filepath.path AS path, '
179 ' fileindex.revnum AS revnum, '
183 ' fileindex.revnum AS revnum, '
180 ' fileindex.node AS node, '
184 ' fileindex.node AS node, '
181 ' fileindex.p1rev AS p1rev, '
185 ' fileindex.p1rev AS p1rev, '
182 ' fileindex.p2rev AS p2rev, '
186 ' fileindex.p2rev AS p2rev, '
183 ' fileindex.linkrev AS linkrev, '
187 ' fileindex.linkrev AS linkrev, '
184 ' fileindex.flags AS flags, '
188 ' fileindex.flags AS flags, '
185 ' fileindex.deltaid AS deltaid, '
189 ' fileindex.deltaid AS deltaid, '
186 ' fileindex.deltabaseid AS deltabaseid '
190 ' fileindex.deltabaseid AS deltabaseid '
187 'FROM filepath, fileindex '
191 'FROM filepath, fileindex '
188 'WHERE fileindex.pathid=filepath.id',
192 'WHERE fileindex.pathid=filepath.id',
189 'PRAGMA user_version=%d' % CURRENT_SCHEMA_VERSION,
193 'PRAGMA user_version=%d' % CURRENT_SCHEMA_VERSION,
190 ]
194 ]
191
195
192
196
193 def resolvedeltachain(db, pathid, node, revisioncache, stoprids, zstddctx=None):
197 def resolvedeltachain(db, pathid, node, revisioncache, stoprids, zstddctx=None):
194 """Resolve a delta chain for a file node."""
198 """Resolve a delta chain for a file node."""
195
199
196 # TODO the "not in ({stops})" here is possibly slowing down the query
200 # TODO the "not in ({stops})" here is possibly slowing down the query
197 # because it needs to perform the lookup on every recursive invocation.
201 # because it needs to perform the lookup on every recursive invocation.
198 # This could possibly be faster if we created a temporary query with
202 # This could possibly be faster if we created a temporary query with
199 # baseid "poisoned" to null and limited the recursive filter to
203 # baseid "poisoned" to null and limited the recursive filter to
200 # "is not null".
204 # "is not null".
201 res = db.execute(
205 res = db.execute(
202 'WITH RECURSIVE '
206 'WITH RECURSIVE '
203 ' deltachain(deltaid, baseid) AS ('
207 ' deltachain(deltaid, baseid) AS ('
204 ' SELECT deltaid, deltabaseid FROM fileindex '
208 ' SELECT deltaid, deltabaseid FROM fileindex '
205 ' WHERE pathid=? AND node=? '
209 ' WHERE pathid=? AND node=? '
206 ' UNION ALL '
210 ' UNION ALL '
207 ' SELECT fileindex.deltaid, deltabaseid '
211 ' SELECT fileindex.deltaid, deltabaseid '
208 ' FROM fileindex, deltachain '
212 ' FROM fileindex, deltachain '
209 ' WHERE '
213 ' WHERE '
210 ' fileindex.id=deltachain.baseid '
214 ' fileindex.id=deltachain.baseid '
211 ' AND deltachain.baseid IS NOT NULL '
215 ' AND deltachain.baseid IS NOT NULL '
212 ' AND fileindex.id NOT IN ({stops}) '
216 ' AND fileindex.id NOT IN ({stops}) '
213 ' ) '
217 ' ) '
214 'SELECT deltachain.baseid, compression, delta '
218 'SELECT deltachain.baseid, compression, delta '
215 'FROM deltachain, delta '
219 'FROM deltachain, delta '
216 'WHERE delta.id=deltachain.deltaid'.format(
220 'WHERE delta.id=deltachain.deltaid'.format(
217 stops=','.join(['?'] * len(stoprids))
221 stops=','.join(['?'] * len(stoprids))
218 ),
222 ),
219 tuple([pathid, node] + list(stoprids.keys())),
223 tuple([pathid, node] + list(stoprids.keys())),
220 )
224 )
221
225
222 deltas = []
226 deltas = []
223 lastdeltabaseid = None
227 lastdeltabaseid = None
224
228
225 for deltabaseid, compression, delta in res:
229 for deltabaseid, compression, delta in res:
226 lastdeltabaseid = deltabaseid
230 lastdeltabaseid = deltabaseid
227
231
228 if compression == COMPRESSION_ZSTD:
232 if compression == COMPRESSION_ZSTD:
229 delta = zstddctx.decompress(delta)
233 delta = zstddctx.decompress(delta)
230 elif compression == COMPRESSION_NONE:
234 elif compression == COMPRESSION_NONE:
231 delta = delta
235 delta = delta
232 elif compression == COMPRESSION_ZLIB:
236 elif compression == COMPRESSION_ZLIB:
233 delta = zlib.decompress(delta)
237 delta = zlib.decompress(delta)
234 else:
238 else:
235 raise SQLiteStoreError(
239 raise SQLiteStoreError(
236 b'unhandled compression type: %d' % compression
240 b'unhandled compression type: %d' % compression
237 )
241 )
238
242
239 deltas.append(delta)
243 deltas.append(delta)
240
244
241 if lastdeltabaseid in stoprids:
245 if lastdeltabaseid in stoprids:
242 basetext = revisioncache[stoprids[lastdeltabaseid]]
246 basetext = revisioncache[stoprids[lastdeltabaseid]]
243 else:
247 else:
244 basetext = deltas.pop()
248 basetext = deltas.pop()
245
249
246 deltas.reverse()
250 deltas.reverse()
247 fulltext = mdiff.patches(basetext, deltas)
251 fulltext = mdiff.patches(basetext, deltas)
248
252
249 # SQLite returns buffer instances for blob columns on Python 2. This
253 # SQLite returns buffer instances for blob columns on Python 2. This
250 # type can propagate through the delta application layer. Because
254 # type can propagate through the delta application layer. Because
251 # downstream callers assume revisions are bytes, cast as needed.
255 # downstream callers assume revisions are bytes, cast as needed.
252 if not isinstance(fulltext, bytes):
256 if not isinstance(fulltext, bytes):
253 fulltext = bytes(delta)
257 fulltext = bytes(delta)
254
258
255 return fulltext
259 return fulltext
256
260
257
261
258 def insertdelta(db, compression, hash, delta):
262 def insertdelta(db, compression, hash, delta):
259 try:
263 try:
260 return db.execute(
264 return db.execute(
261 'INSERT INTO delta (compression, hash, delta) VALUES (?, ?, ?)',
265 'INSERT INTO delta (compression, hash, delta) VALUES (?, ?, ?)',
262 (compression, hash, delta),
266 (compression, hash, delta),
263 ).lastrowid
267 ).lastrowid
264 except sqlite3.IntegrityError:
268 except sqlite3.IntegrityError:
265 return db.execute(
269 return db.execute(
266 'SELECT id FROM delta WHERE hash=?', (hash,)
270 'SELECT id FROM delta WHERE hash=?', (hash,)
267 ).fetchone()[0]
271 ).fetchone()[0]
268
272
269
273
270 class SQLiteStoreError(error.StorageError):
274 class SQLiteStoreError(error.StorageError):
271 pass
275 pass
272
276
273
277
274 @attr.s
278 @attr.s
275 class revisionentry:
279 class revisionentry:
276 rid = attr.ib()
280 rid = attr.ib()
277 rev = attr.ib()
281 rev = attr.ib()
278 node = attr.ib()
282 node = attr.ib()
279 p1rev = attr.ib()
283 p1rev = attr.ib()
280 p2rev = attr.ib()
284 p2rev = attr.ib()
281 p1node = attr.ib()
285 p1node = attr.ib()
282 p2node = attr.ib()
286 p2node = attr.ib()
283 linkrev = attr.ib()
287 linkrev = attr.ib()
284 flags = attr.ib()
288 flags = attr.ib()
285
289
286
290
287 @interfaceutil.implementer(repository.irevisiondelta)
291 @interfaceutil.implementer(repository.irevisiondelta)
288 @attr.s(slots=True)
292 @attr.s(slots=True)
289 class sqliterevisiondelta:
293 class sqliterevisiondelta:
290 node = attr.ib()
294 node = attr.ib()
291 p1node = attr.ib()
295 p1node = attr.ib()
292 p2node = attr.ib()
296 p2node = attr.ib()
293 basenode = attr.ib()
297 basenode = attr.ib()
294 flags = attr.ib()
298 flags = attr.ib()
295 baserevisionsize = attr.ib()
299 baserevisionsize = attr.ib()
296 revision = attr.ib()
300 revision = attr.ib()
297 delta = attr.ib()
301 delta = attr.ib()
298 sidedata = attr.ib()
302 sidedata = attr.ib()
299 protocol_flags = attr.ib()
303 protocol_flags = attr.ib()
300 linknode = attr.ib(default=None)
304 linknode = attr.ib(default=None)
301
305
302
306
303 @interfaceutil.implementer(repository.iverifyproblem)
304 @attr.s(frozen=True)
307 @attr.s(frozen=True)
305 class sqliteproblem:
308 class sqliteproblem(repository.iverifyproblem):
306 warning = attr.ib(default=None)
309 warning = attr.ib(default=None, type=Optional[bytes])
307 error = attr.ib(default=None)
310 error = attr.ib(default=None, type=Optional[bytes])
308 node = attr.ib(default=None)
311 node = attr.ib(default=None, type=Optional[bytes])
309
312
310
313
311 @interfaceutil.implementer(repository.ifilestorage)
314 @interfaceutil.implementer(repository.ifilestorage)
312 class sqlitefilestore:
315 class sqlitefilestore:
313 """Implements storage for an individual tracked path."""
316 """Implements storage for an individual tracked path."""
314
317
315 def __init__(self, db, path, compression):
318 def __init__(self, db, path, compression):
316 self.nullid = sha1nodeconstants.nullid
319 self.nullid = sha1nodeconstants.nullid
317 self._db = db
320 self._db = db
318 self._path = path
321 self._path = path
319
322
320 self._pathid = None
323 self._pathid = None
321
324
322 # revnum -> node
325 # revnum -> node
323 self._revtonode = {}
326 self._revtonode = {}
324 # node -> revnum
327 # node -> revnum
325 self._nodetorev = {}
328 self._nodetorev = {}
326 # node -> data structure
329 # node -> data structure
327 self._revisions = {}
330 self._revisions = {}
328
331
329 self._revisioncache = util.lrucachedict(10)
332 self._revisioncache = util.lrucachedict(10)
330
333
331 self._compengine = compression
334 self._compengine = compression
332
335
333 if compression == b'zstd':
336 if compression == b'zstd':
334 self._cctx = zstd.ZstdCompressor(level=3)
337 self._cctx = zstd.ZstdCompressor(level=3)
335 self._dctx = zstd.ZstdDecompressor()
338 self._dctx = zstd.ZstdDecompressor()
336 else:
339 else:
337 self._cctx = None
340 self._cctx = None
338 self._dctx = None
341 self._dctx = None
339
342
340 self._refreshindex()
343 self._refreshindex()
341
344
342 def _refreshindex(self):
345 def _refreshindex(self):
343 self._revtonode = {}
346 self._revtonode = {}
344 self._nodetorev = {}
347 self._nodetorev = {}
345 self._revisions = {}
348 self._revisions = {}
346
349
347 res = list(
350 res = list(
348 self._db.execute(
351 self._db.execute(
349 'SELECT id FROM filepath WHERE path=?', (self._path,)
352 'SELECT id FROM filepath WHERE path=?', (self._path,)
350 )
353 )
351 )
354 )
352
355
353 if not res:
356 if not res:
354 self._pathid = None
357 self._pathid = None
355 return
358 return
356
359
357 self._pathid = res[0][0]
360 self._pathid = res[0][0]
358
361
359 res = self._db.execute(
362 res = self._db.execute(
360 'SELECT id, revnum, node, p1rev, p2rev, linkrev, flags '
363 'SELECT id, revnum, node, p1rev, p2rev, linkrev, flags '
361 'FROM fileindex '
364 'FROM fileindex '
362 'WHERE pathid=? '
365 'WHERE pathid=? '
363 'ORDER BY revnum ASC',
366 'ORDER BY revnum ASC',
364 (self._pathid,),
367 (self._pathid,),
365 )
368 )
366
369
367 for i, row in enumerate(res):
370 for i, row in enumerate(res):
368 rid, rev, node, p1rev, p2rev, linkrev, flags = row
371 rid, rev, node, p1rev, p2rev, linkrev, flags = row
369
372
370 if i != rev:
373 if i != rev:
371 raise SQLiteStoreError(
374 raise SQLiteStoreError(
372 _(b'sqlite database has inconsistent revision numbers')
375 _(b'sqlite database has inconsistent revision numbers')
373 )
376 )
374
377
375 if p1rev == nullrev:
378 if p1rev == nullrev:
376 p1node = sha1nodeconstants.nullid
379 p1node = sha1nodeconstants.nullid
377 else:
380 else:
378 p1node = self._revtonode[p1rev]
381 p1node = self._revtonode[p1rev]
379
382
380 if p2rev == nullrev:
383 if p2rev == nullrev:
381 p2node = sha1nodeconstants.nullid
384 p2node = sha1nodeconstants.nullid
382 else:
385 else:
383 p2node = self._revtonode[p2rev]
386 p2node = self._revtonode[p2rev]
384
387
385 entry = revisionentry(
388 entry = revisionentry(
386 rid=rid,
389 rid=rid,
387 rev=rev,
390 rev=rev,
388 node=node,
391 node=node,
389 p1rev=p1rev,
392 p1rev=p1rev,
390 p2rev=p2rev,
393 p2rev=p2rev,
391 p1node=p1node,
394 p1node=p1node,
392 p2node=p2node,
395 p2node=p2node,
393 linkrev=linkrev,
396 linkrev=linkrev,
394 flags=flags,
397 flags=flags,
395 )
398 )
396
399
397 self._revtonode[rev] = node
400 self._revtonode[rev] = node
398 self._nodetorev[node] = rev
401 self._nodetorev[node] = rev
399 self._revisions[node] = entry
402 self._revisions[node] = entry
400
403
401 # Start of ifileindex interface.
404 # Start of ifileindex interface.
402
405
403 def __len__(self):
406 def __len__(self):
404 return len(self._revisions)
407 return len(self._revisions)
405
408
406 def __iter__(self):
409 def __iter__(self):
407 return iter(range(len(self._revisions)))
410 return iter(range(len(self._revisions)))
408
411
409 def hasnode(self, node):
412 def hasnode(self, node):
410 if node == sha1nodeconstants.nullid:
413 if node == sha1nodeconstants.nullid:
411 return False
414 return False
412
415
413 return node in self._nodetorev
416 return node in self._nodetorev
414
417
415 def revs(self, start=0, stop=None):
418 def revs(self, start=0, stop=None):
416 return storageutil.iterrevs(
419 return storageutil.iterrevs(
417 len(self._revisions), start=start, stop=stop
420 len(self._revisions), start=start, stop=stop
418 )
421 )
419
422
420 def parents(self, node):
423 def parents(self, node):
421 if node == sha1nodeconstants.nullid:
424 if node == sha1nodeconstants.nullid:
422 return sha1nodeconstants.nullid, sha1nodeconstants.nullid
425 return sha1nodeconstants.nullid, sha1nodeconstants.nullid
423
426
424 if node not in self._revisions:
427 if node not in self._revisions:
425 raise error.LookupError(node, self._path, _(b'no node'))
428 raise error.LookupError(node, self._path, _(b'no node'))
426
429
427 entry = self._revisions[node]
430 entry = self._revisions[node]
428 return entry.p1node, entry.p2node
431 return entry.p1node, entry.p2node
429
432
430 def parentrevs(self, rev):
433 def parentrevs(self, rev):
431 if rev == nullrev:
434 if rev == nullrev:
432 return nullrev, nullrev
435 return nullrev, nullrev
433
436
434 if rev not in self._revtonode:
437 if rev not in self._revtonode:
435 raise IndexError(rev)
438 raise IndexError(rev)
436
439
437 entry = self._revisions[self._revtonode[rev]]
440 entry = self._revisions[self._revtonode[rev]]
438 return entry.p1rev, entry.p2rev
441 return entry.p1rev, entry.p2rev
439
442
440 def ancestors(self, revs, stoprev=0, inclusive=False):
443 def ancestors(self, revs, stoprev=0, inclusive=False):
441 """Generate the ancestors of 'revs' in reverse revision order.
444 """Generate the ancestors of 'revs' in reverse revision order.
442 Does not generate revs lower than stoprev.
445 Does not generate revs lower than stoprev.
443
446
444 See the documentation for ancestor.lazyancestors for more details."""
447 See the documentation for ancestor.lazyancestors for more details."""
445
448
446 # first, make sure start revisions aren't filtered
449 # first, make sure start revisions aren't filtered
447 revs = list(revs)
450 revs = list(revs)
448 checkrev = self.node
451 checkrev = self.node
449 for r in revs:
452 for r in revs:
450 checkrev(r)
453 checkrev(r)
451
454
452 return ancestor.lazyancestors(
455 return ancestor.lazyancestors(
453 self.parentrevs,
456 self.parentrevs,
454 revs,
457 revs,
455 stoprev=stoprev,
458 stoprev=stoprev,
456 inclusive=inclusive,
459 inclusive=inclusive,
457 )
460 )
458
461
459 def rev(self, node):
462 def rev(self, node):
460 if node == sha1nodeconstants.nullid:
463 if node == sha1nodeconstants.nullid:
461 return nullrev
464 return nullrev
462
465
463 if node not in self._nodetorev:
466 if node not in self._nodetorev:
464 raise error.LookupError(node, self._path, _(b'no node'))
467 raise error.LookupError(node, self._path, _(b'no node'))
465
468
466 return self._nodetorev[node]
469 return self._nodetorev[node]
467
470
468 def node(self, rev):
471 def node(self, rev):
469 if rev == nullrev:
472 if rev == nullrev:
470 return sha1nodeconstants.nullid
473 return sha1nodeconstants.nullid
471
474
472 if rev not in self._revtonode:
475 if rev not in self._revtonode:
473 raise IndexError(rev)
476 raise IndexError(rev)
474
477
475 return self._revtonode[rev]
478 return self._revtonode[rev]
476
479
477 def lookup(self, node):
480 def lookup(self, node):
478 return storageutil.fileidlookup(self, node, self._path)
481 return storageutil.fileidlookup(self, node, self._path)
479
482
480 def linkrev(self, rev):
483 def linkrev(self, rev):
481 if rev == nullrev:
484 if rev == nullrev:
482 return nullrev
485 return nullrev
483
486
484 if rev not in self._revtonode:
487 if rev not in self._revtonode:
485 raise IndexError(rev)
488 raise IndexError(rev)
486
489
487 entry = self._revisions[self._revtonode[rev]]
490 entry = self._revisions[self._revtonode[rev]]
488 return entry.linkrev
491 return entry.linkrev
489
492
490 def iscensored(self, rev):
493 def iscensored(self, rev):
491 if rev == nullrev:
494 if rev == nullrev:
492 return False
495 return False
493
496
494 if rev not in self._revtonode:
497 if rev not in self._revtonode:
495 raise IndexError(rev)
498 raise IndexError(rev)
496
499
497 return self._revisions[self._revtonode[rev]].flags & FLAG_CENSORED
500 return self._revisions[self._revtonode[rev]].flags & FLAG_CENSORED
498
501
499 def commonancestorsheads(self, node1, node2):
502 def commonancestorsheads(self, node1, node2):
500 rev1 = self.rev(node1)
503 rev1 = self.rev(node1)
501 rev2 = self.rev(node2)
504 rev2 = self.rev(node2)
502
505
503 ancestors = ancestor.commonancestorsheads(self.parentrevs, rev1, rev2)
506 ancestors = ancestor.commonancestorsheads(self.parentrevs, rev1, rev2)
504 return pycompat.maplist(self.node, ancestors)
507 return pycompat.maplist(self.node, ancestors)
505
508
506 def descendants(self, revs):
509 def descendants(self, revs):
507 # TODO we could implement this using a recursive SQL query, which
510 # TODO we could implement this using a recursive SQL query, which
508 # might be faster.
511 # might be faster.
509 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
512 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
510
513
511 def heads(self, start=None, stop=None):
514 def heads(self, start=None, stop=None):
512 if start is None and stop is None:
515 if start is None and stop is None:
513 if not len(self):
516 if not len(self):
514 return [sha1nodeconstants.nullid]
517 return [sha1nodeconstants.nullid]
515
518
516 startrev = self.rev(start) if start is not None else nullrev
519 startrev = self.rev(start) if start is not None else nullrev
517 stoprevs = {self.rev(n) for n in stop or []}
520 stoprevs = {self.rev(n) for n in stop or []}
518
521
519 revs = dagop.headrevssubset(
522 revs = dagop.headrevssubset(
520 self.revs, self.parentrevs, startrev=startrev, stoprevs=stoprevs
523 self.revs, self.parentrevs, startrev=startrev, stoprevs=stoprevs
521 )
524 )
522
525
523 return [self.node(rev) for rev in revs]
526 return [self.node(rev) for rev in revs]
524
527
525 def children(self, node):
528 def children(self, node):
526 rev = self.rev(node)
529 rev = self.rev(node)
527
530
528 res = self._db.execute(
531 res = self._db.execute(
529 'SELECT'
532 'SELECT'
530 ' node '
533 ' node '
531 ' FROM filedata '
534 ' FROM filedata '
532 ' WHERE path=? AND (p1rev=? OR p2rev=?) '
535 ' WHERE path=? AND (p1rev=? OR p2rev=?) '
533 ' ORDER BY revnum ASC',
536 ' ORDER BY revnum ASC',
534 (self._path, rev, rev),
537 (self._path, rev, rev),
535 )
538 )
536
539
537 return [row[0] for row in res]
540 return [row[0] for row in res]
538
541
539 # End of ifileindex interface.
542 # End of ifileindex interface.
540
543
541 # Start of ifiledata interface.
544 # Start of ifiledata interface.
542
545
543 def size(self, rev):
546 def size(self, rev):
544 if rev == nullrev:
547 if rev == nullrev:
545 return 0
548 return 0
546
549
547 if rev not in self._revtonode:
550 if rev not in self._revtonode:
548 raise IndexError(rev)
551 raise IndexError(rev)
549
552
550 node = self._revtonode[rev]
553 node = self._revtonode[rev]
551
554
552 if self.renamed(node):
555 if self.renamed(node):
553 return len(self.read(node))
556 return len(self.read(node))
554
557
555 return len(self.revision(node))
558 return len(self.revision(node))
556
559
557 def revision(self, node, raw=False, _verifyhash=True):
560 def revision(self, node, raw=False, _verifyhash=True):
558 if node in (sha1nodeconstants.nullid, nullrev):
561 if node in (sha1nodeconstants.nullid, nullrev):
559 return b''
562 return b''
560
563
561 if isinstance(node, int):
564 if isinstance(node, int):
562 node = self.node(node)
565 node = self.node(node)
563
566
564 if node not in self._nodetorev:
567 if node not in self._nodetorev:
565 raise error.LookupError(node, self._path, _(b'no node'))
568 raise error.LookupError(node, self._path, _(b'no node'))
566
569
567 if node in self._revisioncache:
570 if node in self._revisioncache:
568 return self._revisioncache[node]
571 return self._revisioncache[node]
569
572
570 # Because we have a fulltext revision cache, we are able to
573 # Because we have a fulltext revision cache, we are able to
571 # short-circuit delta chain traversal and decompression as soon as
574 # short-circuit delta chain traversal and decompression as soon as
572 # we encounter a revision in the cache.
575 # we encounter a revision in the cache.
573
576
574 stoprids = {self._revisions[n].rid: n for n in self._revisioncache}
577 stoprids = {self._revisions[n].rid: n for n in self._revisioncache}
575
578
576 if not stoprids:
579 if not stoprids:
577 stoprids[-1] = None
580 stoprids[-1] = None
578
581
579 fulltext = resolvedeltachain(
582 fulltext = resolvedeltachain(
580 self._db,
583 self._db,
581 self._pathid,
584 self._pathid,
582 node,
585 node,
583 self._revisioncache,
586 self._revisioncache,
584 stoprids,
587 stoprids,
585 zstddctx=self._dctx,
588 zstddctx=self._dctx,
586 )
589 )
587
590
588 # Don't verify hashes if parent nodes were rewritten, as the hash
591 # Don't verify hashes if parent nodes were rewritten, as the hash
589 # wouldn't verify.
592 # wouldn't verify.
590 if self._revisions[node].flags & (FLAG_MISSING_P1 | FLAG_MISSING_P2):
593 if self._revisions[node].flags & (FLAG_MISSING_P1 | FLAG_MISSING_P2):
591 _verifyhash = False
594 _verifyhash = False
592
595
593 if _verifyhash:
596 if _verifyhash:
594 self._checkhash(fulltext, node)
597 self._checkhash(fulltext, node)
595 self._revisioncache[node] = fulltext
598 self._revisioncache[node] = fulltext
596
599
597 return fulltext
600 return fulltext
598
601
599 def rawdata(self, *args, **kwargs):
602 def rawdata(self, *args, **kwargs):
600 return self.revision(*args, **kwargs)
603 return self.revision(*args, **kwargs)
601
604
602 def read(self, node):
605 def read(self, node):
603 return storageutil.filtermetadata(self.revision(node))
606 return storageutil.filtermetadata(self.revision(node))
604
607
605 def renamed(self, node):
608 def renamed(self, node):
606 return storageutil.filerevisioncopied(self, node)
609 return storageutil.filerevisioncopied(self, node)
607
610
608 def cmp(self, node, fulltext):
611 def cmp(self, node, fulltext):
609 return not storageutil.filedataequivalent(self, node, fulltext)
612 return not storageutil.filedataequivalent(self, node, fulltext)
610
613
611 def emitrevisions(
614 def emitrevisions(
612 self,
615 self,
613 nodes,
616 nodes,
614 nodesorder=None,
617 nodesorder=None,
615 revisiondata=False,
618 revisiondata=False,
616 assumehaveparentrevisions=False,
619 assumehaveparentrevisions=False,
617 deltamode=repository.CG_DELTAMODE_STD,
620 deltamode=repository.CG_DELTAMODE_STD,
618 sidedata_helpers=None,
621 sidedata_helpers=None,
619 debug_info=None,
622 debug_info=None,
620 ):
623 ):
621 if nodesorder not in (b'nodes', b'storage', b'linear', None):
624 if nodesorder not in (b'nodes', b'storage', b'linear', None):
622 raise error.ProgrammingError(
625 raise error.ProgrammingError(
623 b'unhandled value for nodesorder: %s' % nodesorder
626 b'unhandled value for nodesorder: %s' % nodesorder
624 )
627 )
625
628
626 nodes = [n for n in nodes if n != sha1nodeconstants.nullid]
629 nodes = [n for n in nodes if n != sha1nodeconstants.nullid]
627
630
628 if not nodes:
631 if not nodes:
629 return
632 return
630
633
631 # TODO perform in a single query.
634 # TODO perform in a single query.
632 res = self._db.execute(
635 res = self._db.execute(
633 'SELECT revnum, deltaid FROM fileindex '
636 'SELECT revnum, deltaid FROM fileindex '
634 'WHERE pathid=? '
637 'WHERE pathid=? '
635 ' AND node in (%s)' % (','.join(['?'] * len(nodes))),
638 ' AND node in (%s)' % (','.join(['?'] * len(nodes))),
636 tuple([self._pathid] + nodes),
639 tuple([self._pathid] + nodes),
637 )
640 )
638
641
639 deltabases = {}
642 deltabases = {}
640
643
641 for rev, deltaid in res:
644 for rev, deltaid in res:
642 res = self._db.execute(
645 res = self._db.execute(
643 'SELECT revnum from fileindex WHERE pathid=? AND deltaid=?',
646 'SELECT revnum from fileindex WHERE pathid=? AND deltaid=?',
644 (self._pathid, deltaid),
647 (self._pathid, deltaid),
645 )
648 )
646 deltabases[rev] = res.fetchone()[0]
649 deltabases[rev] = res.fetchone()[0]
647
650
648 # TODO define revdifffn so we can use delta from storage.
651 # TODO define revdifffn so we can use delta from storage.
649 for delta in storageutil.emitrevisions(
652 for delta in storageutil.emitrevisions(
650 self,
653 self,
651 nodes,
654 nodes,
652 nodesorder,
655 nodesorder,
653 sqliterevisiondelta,
656 sqliterevisiondelta,
654 deltaparentfn=deltabases.__getitem__,
657 deltaparentfn=deltabases.__getitem__,
655 revisiondata=revisiondata,
658 revisiondata=revisiondata,
656 assumehaveparentrevisions=assumehaveparentrevisions,
659 assumehaveparentrevisions=assumehaveparentrevisions,
657 deltamode=deltamode,
660 deltamode=deltamode,
658 sidedata_helpers=sidedata_helpers,
661 sidedata_helpers=sidedata_helpers,
659 ):
662 ):
660 yield delta
663 yield delta
661
664
662 # End of ifiledata interface.
665 # End of ifiledata interface.
663
666
664 # Start of ifilemutation interface.
667 # Start of ifilemutation interface.
665
668
666 def add(self, filedata, meta, transaction, linkrev, p1, p2):
669 def add(self, filedata, meta, transaction, linkrev, p1, p2):
667 if meta or filedata.startswith(b'\x01\n'):
670 if meta or filedata.startswith(b'\x01\n'):
668 filedata = storageutil.packmeta(meta, filedata)
671 filedata = storageutil.packmeta(meta, filedata)
669
672
670 rev = self.addrevision(filedata, transaction, linkrev, p1, p2)
673 rev = self.addrevision(filedata, transaction, linkrev, p1, p2)
671 return self.node(rev)
674 return self.node(rev)
672
675
673 def addrevision(
676 def addrevision(
674 self,
677 self,
675 revisiondata,
678 revisiondata,
676 transaction,
679 transaction,
677 linkrev,
680 linkrev,
678 p1,
681 p1,
679 p2,
682 p2,
680 node=None,
683 node=None,
681 flags=0,
684 flags=0,
682 cachedelta=None,
685 cachedelta=None,
683 ):
686 ):
684 if flags:
687 if flags:
685 raise SQLiteStoreError(_(b'flags not supported on revisions'))
688 raise SQLiteStoreError(_(b'flags not supported on revisions'))
686
689
687 validatehash = node is not None
690 validatehash = node is not None
688 node = node or storageutil.hashrevisionsha1(revisiondata, p1, p2)
691 node = node or storageutil.hashrevisionsha1(revisiondata, p1, p2)
689
692
690 if validatehash:
693 if validatehash:
691 self._checkhash(revisiondata, node, p1, p2)
694 self._checkhash(revisiondata, node, p1, p2)
692
695
693 rev = self._nodetorev.get(node)
696 rev = self._nodetorev.get(node)
694 if rev is not None:
697 if rev is not None:
695 return rev
698 return rev
696
699
697 rev = self._addrawrevision(
700 rev = self._addrawrevision(
698 node, revisiondata, transaction, linkrev, p1, p2
701 node, revisiondata, transaction, linkrev, p1, p2
699 )
702 )
700
703
701 self._revisioncache[node] = revisiondata
704 self._revisioncache[node] = revisiondata
702 return rev
705 return rev
703
706
704 def addgroup(
707 def addgroup(
705 self,
708 self,
706 deltas,
709 deltas,
707 linkmapper,
710 linkmapper,
708 transaction,
711 transaction,
709 addrevisioncb=None,
712 addrevisioncb=None,
710 duplicaterevisioncb=None,
713 duplicaterevisioncb=None,
711 maybemissingparents=False,
714 maybemissingparents=False,
712 ):
715 ):
713 empty = True
716 empty = True
714
717
715 for (
718 for (
716 node,
719 node,
717 p1,
720 p1,
718 p2,
721 p2,
719 linknode,
722 linknode,
720 deltabase,
723 deltabase,
721 delta,
724 delta,
722 wireflags,
725 wireflags,
723 sidedata,
726 sidedata,
724 ) in deltas:
727 ) in deltas:
725 storeflags = 0
728 storeflags = 0
726
729
727 if wireflags & repository.REVISION_FLAG_CENSORED:
730 if wireflags & repository.REVISION_FLAG_CENSORED:
728 storeflags |= FLAG_CENSORED
731 storeflags |= FLAG_CENSORED
729
732
730 if wireflags & ~repository.REVISION_FLAG_CENSORED:
733 if wireflags & ~repository.REVISION_FLAG_CENSORED:
731 raise SQLiteStoreError(b'unhandled revision flag')
734 raise SQLiteStoreError(b'unhandled revision flag')
732
735
733 if maybemissingparents:
736 if maybemissingparents:
734 if p1 != sha1nodeconstants.nullid and not self.hasnode(p1):
737 if p1 != sha1nodeconstants.nullid and not self.hasnode(p1):
735 p1 = sha1nodeconstants.nullid
738 p1 = sha1nodeconstants.nullid
736 storeflags |= FLAG_MISSING_P1
739 storeflags |= FLAG_MISSING_P1
737
740
738 if p2 != sha1nodeconstants.nullid and not self.hasnode(p2):
741 if p2 != sha1nodeconstants.nullid and not self.hasnode(p2):
739 p2 = sha1nodeconstants.nullid
742 p2 = sha1nodeconstants.nullid
740 storeflags |= FLAG_MISSING_P2
743 storeflags |= FLAG_MISSING_P2
741
744
742 baserev = self.rev(deltabase)
745 baserev = self.rev(deltabase)
743
746
744 # If base is censored, delta must be full replacement in a single
747 # If base is censored, delta must be full replacement in a single
745 # patch operation.
748 # patch operation.
746 if baserev != nullrev and self.iscensored(baserev):
749 if baserev != nullrev and self.iscensored(baserev):
747 hlen = struct.calcsize(b'>lll')
750 hlen = struct.calcsize(b'>lll')
748 oldlen = len(self.rawdata(deltabase, _verifyhash=False))
751 oldlen = len(self.rawdata(deltabase, _verifyhash=False))
749 newlen = len(delta) - hlen
752 newlen = len(delta) - hlen
750
753
751 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
754 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
752 raise error.CensoredBaseError(self._path, deltabase)
755 raise error.CensoredBaseError(self._path, deltabase)
753
756
754 if not (storeflags & FLAG_CENSORED) and storageutil.deltaiscensored(
757 if not (storeflags & FLAG_CENSORED) and storageutil.deltaiscensored(
755 delta, baserev, lambda x: len(self.rawdata(x))
758 delta, baserev, lambda x: len(self.rawdata(x))
756 ):
759 ):
757 storeflags |= FLAG_CENSORED
760 storeflags |= FLAG_CENSORED
758
761
759 linkrev = linkmapper(linknode)
762 linkrev = linkmapper(linknode)
760
763
761 if node in self._revisions:
764 if node in self._revisions:
762 # Possibly reset parents to make them proper.
765 # Possibly reset parents to make them proper.
763 entry = self._revisions[node]
766 entry = self._revisions[node]
764
767
765 if (
768 if (
766 entry.flags & FLAG_MISSING_P1
769 entry.flags & FLAG_MISSING_P1
767 and p1 != sha1nodeconstants.nullid
770 and p1 != sha1nodeconstants.nullid
768 ):
771 ):
769 entry.p1node = p1
772 entry.p1node = p1
770 entry.p1rev = self._nodetorev[p1]
773 entry.p1rev = self._nodetorev[p1]
771 entry.flags &= ~FLAG_MISSING_P1
774 entry.flags &= ~FLAG_MISSING_P1
772
775
773 self._db.execute(
776 self._db.execute(
774 'UPDATE fileindex SET p1rev=?, flags=? WHERE id=?',
777 'UPDATE fileindex SET p1rev=?, flags=? WHERE id=?',
775 (self._nodetorev[p1], entry.flags, entry.rid),
778 (self._nodetorev[p1], entry.flags, entry.rid),
776 )
779 )
777
780
778 if (
781 if (
779 entry.flags & FLAG_MISSING_P2
782 entry.flags & FLAG_MISSING_P2
780 and p2 != sha1nodeconstants.nullid
783 and p2 != sha1nodeconstants.nullid
781 ):
784 ):
782 entry.p2node = p2
785 entry.p2node = p2
783 entry.p2rev = self._nodetorev[p2]
786 entry.p2rev = self._nodetorev[p2]
784 entry.flags &= ~FLAG_MISSING_P2
787 entry.flags &= ~FLAG_MISSING_P2
785
788
786 self._db.execute(
789 self._db.execute(
787 'UPDATE fileindex SET p2rev=?, flags=? WHERE id=?',
790 'UPDATE fileindex SET p2rev=?, flags=? WHERE id=?',
788 (self._nodetorev[p1], entry.flags, entry.rid),
791 (self._nodetorev[p1], entry.flags, entry.rid),
789 )
792 )
790
793
791 if duplicaterevisioncb:
794 if duplicaterevisioncb:
792 duplicaterevisioncb(self, self.rev(node))
795 duplicaterevisioncb(self, self.rev(node))
793 empty = False
796 empty = False
794 continue
797 continue
795
798
796 if deltabase == sha1nodeconstants.nullid:
799 if deltabase == sha1nodeconstants.nullid:
797 text = mdiff.patch(b'', delta)
800 text = mdiff.patch(b'', delta)
798 storedelta = None
801 storedelta = None
799 else:
802 else:
800 text = None
803 text = None
801 storedelta = (deltabase, delta)
804 storedelta = (deltabase, delta)
802
805
803 rev = self._addrawrevision(
806 rev = self._addrawrevision(
804 node,
807 node,
805 text,
808 text,
806 transaction,
809 transaction,
807 linkrev,
810 linkrev,
808 p1,
811 p1,
809 p2,
812 p2,
810 storedelta=storedelta,
813 storedelta=storedelta,
811 flags=storeflags,
814 flags=storeflags,
812 )
815 )
813
816
814 if addrevisioncb:
817 if addrevisioncb:
815 addrevisioncb(self, rev)
818 addrevisioncb(self, rev)
816 empty = False
819 empty = False
817
820
818 return not empty
821 return not empty
819
822
820 def censorrevision(self, tr, censor_nodes, tombstone=b''):
823 def censorrevision(self, tr, censor_nodes, tombstone=b''):
821 for node in censor_nodes:
824 for node in censor_nodes:
822 self._censor_one_revision(tr, node, tombstone=tombstone)
825 self._censor_one_revision(tr, node, tombstone=tombstone)
823
826
824 def _censor_one_revision(self, tr, censornode, tombstone):
827 def _censor_one_revision(self, tr, censornode, tombstone):
825 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
828 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
826
829
827 # This restriction is cargo culted from revlogs and makes no sense for
830 # This restriction is cargo culted from revlogs and makes no sense for
828 # SQLite, since columns can be resized at will.
831 # SQLite, since columns can be resized at will.
829 if len(tombstone) > len(self.rawdata(censornode)):
832 if len(tombstone) > len(self.rawdata(censornode)):
830 raise error.Abort(
833 raise error.Abort(
831 _(b'censor tombstone must be no longer than censored data')
834 _(b'censor tombstone must be no longer than censored data')
832 )
835 )
833
836
834 # We need to replace the censored revision's data with the tombstone.
837 # We need to replace the censored revision's data with the tombstone.
835 # But replacing that data will have implications for delta chains that
838 # But replacing that data will have implications for delta chains that
836 # reference it.
839 # reference it.
837 #
840 #
838 # While "better," more complex strategies are possible, we do something
841 # While "better," more complex strategies are possible, we do something
839 # simple: we find delta chain children of the censored revision and we
842 # simple: we find delta chain children of the censored revision and we
840 # replace those incremental deltas with fulltexts of their corresponding
843 # replace those incremental deltas with fulltexts of their corresponding
841 # revision. Then we delete the now-unreferenced delta and original
844 # revision. Then we delete the now-unreferenced delta and original
842 # revision and insert a replacement.
845 # revision and insert a replacement.
843
846
844 # Find the delta to be censored.
847 # Find the delta to be censored.
845 censoreddeltaid = self._db.execute(
848 censoreddeltaid = self._db.execute(
846 'SELECT deltaid FROM fileindex WHERE id=?',
849 'SELECT deltaid FROM fileindex WHERE id=?',
847 (self._revisions[censornode].rid,),
850 (self._revisions[censornode].rid,),
848 ).fetchone()[0]
851 ).fetchone()[0]
849
852
850 # Find all its delta chain children.
853 # Find all its delta chain children.
851 # TODO once we support storing deltas for !files, we'll need to look
854 # TODO once we support storing deltas for !files, we'll need to look
852 # for those delta chains too.
855 # for those delta chains too.
853 rows = list(
856 rows = list(
854 self._db.execute(
857 self._db.execute(
855 'SELECT id, pathid, node FROM fileindex '
858 'SELECT id, pathid, node FROM fileindex '
856 'WHERE deltabaseid=? OR deltaid=?',
859 'WHERE deltabaseid=? OR deltaid=?',
857 (censoreddeltaid, censoreddeltaid),
860 (censoreddeltaid, censoreddeltaid),
858 )
861 )
859 )
862 )
860
863
861 for row in rows:
864 for row in rows:
862 rid, pathid, node = row
865 rid, pathid, node = row
863
866
864 fulltext = resolvedeltachain(
867 fulltext = resolvedeltachain(
865 self._db, pathid, node, {}, {-1: None}, zstddctx=self._dctx
868 self._db, pathid, node, {}, {-1: None}, zstddctx=self._dctx
866 )
869 )
867
870
868 deltahash = hashutil.sha1(fulltext).digest()
871 deltahash = hashutil.sha1(fulltext).digest()
869
872
870 if self._compengine == b'zstd':
873 if self._compengine == b'zstd':
871 deltablob = self._cctx.compress(fulltext)
874 deltablob = self._cctx.compress(fulltext)
872 compression = COMPRESSION_ZSTD
875 compression = COMPRESSION_ZSTD
873 elif self._compengine == b'zlib':
876 elif self._compengine == b'zlib':
874 deltablob = zlib.compress(fulltext)
877 deltablob = zlib.compress(fulltext)
875 compression = COMPRESSION_ZLIB
878 compression = COMPRESSION_ZLIB
876 elif self._compengine == b'none':
879 elif self._compengine == b'none':
877 deltablob = fulltext
880 deltablob = fulltext
878 compression = COMPRESSION_NONE
881 compression = COMPRESSION_NONE
879 else:
882 else:
880 raise error.ProgrammingError(
883 raise error.ProgrammingError(
881 b'unhandled compression engine: %s' % self._compengine
884 b'unhandled compression engine: %s' % self._compengine
882 )
885 )
883
886
884 if len(deltablob) >= len(fulltext):
887 if len(deltablob) >= len(fulltext):
885 deltablob = fulltext
888 deltablob = fulltext
886 compression = COMPRESSION_NONE
889 compression = COMPRESSION_NONE
887
890
888 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
891 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
889
892
890 self._db.execute(
893 self._db.execute(
891 'UPDATE fileindex SET deltaid=?, deltabaseid=NULL '
894 'UPDATE fileindex SET deltaid=?, deltabaseid=NULL '
892 'WHERE id=?',
895 'WHERE id=?',
893 (deltaid, rid),
896 (deltaid, rid),
894 )
897 )
895
898
896 # Now create the tombstone delta and replace the delta on the censored
899 # Now create the tombstone delta and replace the delta on the censored
897 # node.
900 # node.
898 deltahash = hashutil.sha1(tombstone).digest()
901 deltahash = hashutil.sha1(tombstone).digest()
899 tombstonedeltaid = insertdelta(
902 tombstonedeltaid = insertdelta(
900 self._db, COMPRESSION_NONE, deltahash, tombstone
903 self._db, COMPRESSION_NONE, deltahash, tombstone
901 )
904 )
902
905
903 flags = self._revisions[censornode].flags
906 flags = self._revisions[censornode].flags
904 flags |= FLAG_CENSORED
907 flags |= FLAG_CENSORED
905
908
906 self._db.execute(
909 self._db.execute(
907 'UPDATE fileindex SET flags=?, deltaid=?, deltabaseid=NULL '
910 'UPDATE fileindex SET flags=?, deltaid=?, deltabaseid=NULL '
908 'WHERE pathid=? AND node=?',
911 'WHERE pathid=? AND node=?',
909 (flags, tombstonedeltaid, self._pathid, censornode),
912 (flags, tombstonedeltaid, self._pathid, censornode),
910 )
913 )
911
914
912 self._db.execute('DELETE FROM delta WHERE id=?', (censoreddeltaid,))
915 self._db.execute('DELETE FROM delta WHERE id=?', (censoreddeltaid,))
913
916
914 self._refreshindex()
917 self._refreshindex()
915 self._revisioncache.clear()
918 self._revisioncache.clear()
916
919
917 def getstrippoint(self, minlink):
920 def getstrippoint(self, minlink):
918 return storageutil.resolvestripinfo(
921 return storageutil.resolvestripinfo(
919 minlink,
922 minlink,
920 len(self) - 1,
923 len(self) - 1,
921 [self.rev(n) for n in self.heads()],
924 [self.rev(n) for n in self.heads()],
922 self.linkrev,
925 self.linkrev,
923 self.parentrevs,
926 self.parentrevs,
924 )
927 )
925
928
926 def strip(self, minlink, transaction):
929 def strip(self, minlink, transaction):
927 if not len(self):
930 if not len(self):
928 return
931 return
929
932
930 rev, _ignored = self.getstrippoint(minlink)
933 rev, _ignored = self.getstrippoint(minlink)
931
934
932 if rev == len(self):
935 if rev == len(self):
933 return
936 return
934
937
935 for rev in self.revs(rev):
938 for rev in self.revs(rev):
936 self._db.execute(
939 self._db.execute(
937 'DELETE FROM fileindex WHERE pathid=? AND node=?',
940 'DELETE FROM fileindex WHERE pathid=? AND node=?',
938 (self._pathid, self.node(rev)),
941 (self._pathid, self.node(rev)),
939 )
942 )
940
943
941 # TODO how should we garbage collect data in delta table?
944 # TODO how should we garbage collect data in delta table?
942
945
943 self._refreshindex()
946 self._refreshindex()
944
947
945 # End of ifilemutation interface.
948 # End of ifilemutation interface.
946
949
947 # Start of ifilestorage interface.
950 # Start of ifilestorage interface.
948
951
949 def files(self):
952 def files(self):
950 return []
953 return []
951
954
952 def sidedata(self, nodeorrev, _df=None):
955 def sidedata(self, nodeorrev, _df=None):
953 # Not supported for now
956 # Not supported for now
954 return {}
957 return {}
955
958
956 def storageinfo(
959 def storageinfo(
957 self,
960 self,
958 exclusivefiles=False,
961 exclusivefiles=False,
959 sharedfiles=False,
962 sharedfiles=False,
960 revisionscount=False,
963 revisionscount=False,
961 trackedsize=False,
964 trackedsize=False,
962 storedsize=False,
965 storedsize=False,
963 ):
966 ):
964 d = {}
967 d = {}
965
968
966 if exclusivefiles:
969 if exclusivefiles:
967 d[b'exclusivefiles'] = []
970 d[b'exclusivefiles'] = []
968
971
969 if sharedfiles:
972 if sharedfiles:
970 # TODO list sqlite file(s) here.
973 # TODO list sqlite file(s) here.
971 d[b'sharedfiles'] = []
974 d[b'sharedfiles'] = []
972
975
973 if revisionscount:
976 if revisionscount:
974 d[b'revisionscount'] = len(self)
977 d[b'revisionscount'] = len(self)
975
978
976 if trackedsize:
979 if trackedsize:
977 d[b'trackedsize'] = sum(
980 d[b'trackedsize'] = sum(
978 len(self.revision(node)) for node in self._nodetorev
981 len(self.revision(node)) for node in self._nodetorev
979 )
982 )
980
983
981 if storedsize:
984 if storedsize:
982 # TODO implement this?
985 # TODO implement this?
983 d[b'storedsize'] = None
986 d[b'storedsize'] = None
984
987
985 return d
988 return d
986
989
987 def verifyintegrity(self, state):
990 def verifyintegrity(self, state):
988 state[b'skipread'] = set()
991 state[b'skipread'] = set()
989
992
990 for rev in self:
993 for rev in self:
991 node = self.node(rev)
994 node = self.node(rev)
992
995
993 try:
996 try:
994 self.revision(node)
997 self.revision(node)
995 except Exception as e:
998 except Exception as e:
996 yield sqliteproblem(
999 yield sqliteproblem(
997 error=_(b'unpacking %s: %s') % (short(node), e), node=node
1000 error=_(b'unpacking %s: %s') % (short(node), e), node=node
998 )
1001 )
999
1002
1000 state[b'skipread'].add(node)
1003 state[b'skipread'].add(node)
1001
1004
1002 # End of ifilestorage interface.
1005 # End of ifilestorage interface.
1003
1006
1004 def _checkhash(self, fulltext, node, p1=None, p2=None):
1007 def _checkhash(self, fulltext, node, p1=None, p2=None):
1005 if p1 is None and p2 is None:
1008 if p1 is None and p2 is None:
1006 p1, p2 = self.parents(node)
1009 p1, p2 = self.parents(node)
1007
1010
1008 if node == storageutil.hashrevisionsha1(fulltext, p1, p2):
1011 if node == storageutil.hashrevisionsha1(fulltext, p1, p2):
1009 return
1012 return
1010
1013
1011 try:
1014 try:
1012 del self._revisioncache[node]
1015 del self._revisioncache[node]
1013 except KeyError:
1016 except KeyError:
1014 pass
1017 pass
1015
1018
1016 if storageutil.iscensoredtext(fulltext):
1019 if storageutil.iscensoredtext(fulltext):
1017 raise error.CensoredNodeError(self._path, node, fulltext)
1020 raise error.CensoredNodeError(self._path, node, fulltext)
1018
1021
1019 raise SQLiteStoreError(_(b'integrity check failed on %s') % self._path)
1022 raise SQLiteStoreError(_(b'integrity check failed on %s') % self._path)
1020
1023
1021 def _addrawrevision(
1024 def _addrawrevision(
1022 self,
1025 self,
1023 node,
1026 node,
1024 revisiondata,
1027 revisiondata,
1025 transaction,
1028 transaction,
1026 linkrev,
1029 linkrev,
1027 p1,
1030 p1,
1028 p2,
1031 p2,
1029 storedelta=None,
1032 storedelta=None,
1030 flags=0,
1033 flags=0,
1031 ):
1034 ):
1032 if self._pathid is None:
1035 if self._pathid is None:
1033 res = self._db.execute(
1036 res = self._db.execute(
1034 'INSERT INTO filepath (path) VALUES (?)', (self._path,)
1037 'INSERT INTO filepath (path) VALUES (?)', (self._path,)
1035 )
1038 )
1036 self._pathid = res.lastrowid
1039 self._pathid = res.lastrowid
1037
1040
1038 # For simplicity, always store a delta against p1.
1041 # For simplicity, always store a delta against p1.
1039 # TODO we need a lot more logic here to make behavior reasonable.
1042 # TODO we need a lot more logic here to make behavior reasonable.
1040
1043
1041 if storedelta:
1044 if storedelta:
1042 deltabase, delta = storedelta
1045 deltabase, delta = storedelta
1043
1046
1044 if isinstance(deltabase, int):
1047 if isinstance(deltabase, int):
1045 deltabase = self.node(deltabase)
1048 deltabase = self.node(deltabase)
1046
1049
1047 else:
1050 else:
1048 assert revisiondata is not None
1051 assert revisiondata is not None
1049 deltabase = p1
1052 deltabase = p1
1050
1053
1051 if deltabase == sha1nodeconstants.nullid:
1054 if deltabase == sha1nodeconstants.nullid:
1052 delta = revisiondata
1055 delta = revisiondata
1053 else:
1056 else:
1054 delta = mdiff.textdiff(
1057 delta = mdiff.textdiff(
1055 self.revision(self.rev(deltabase)), revisiondata
1058 self.revision(self.rev(deltabase)), revisiondata
1056 )
1059 )
1057
1060
1058 # File index stores a pointer to its delta and the parent delta.
1061 # File index stores a pointer to its delta and the parent delta.
1059 # The parent delta is stored via a pointer to the fileindex PK.
1062 # The parent delta is stored via a pointer to the fileindex PK.
1060 if deltabase == sha1nodeconstants.nullid:
1063 if deltabase == sha1nodeconstants.nullid:
1061 baseid = None
1064 baseid = None
1062 else:
1065 else:
1063 baseid = self._revisions[deltabase].rid
1066 baseid = self._revisions[deltabase].rid
1064
1067
1065 # Deltas are stored with a hash of their content. This allows
1068 # Deltas are stored with a hash of their content. This allows
1066 # us to de-duplicate. The table is configured to ignore conflicts
1069 # us to de-duplicate. The table is configured to ignore conflicts
1067 # and it is faster to just insert and silently noop than to look
1070 # and it is faster to just insert and silently noop than to look
1068 # first.
1071 # first.
1069 deltahash = hashutil.sha1(delta).digest()
1072 deltahash = hashutil.sha1(delta).digest()
1070
1073
1071 if self._compengine == b'zstd':
1074 if self._compengine == b'zstd':
1072 deltablob = self._cctx.compress(delta)
1075 deltablob = self._cctx.compress(delta)
1073 compression = COMPRESSION_ZSTD
1076 compression = COMPRESSION_ZSTD
1074 elif self._compengine == b'zlib':
1077 elif self._compengine == b'zlib':
1075 deltablob = zlib.compress(delta)
1078 deltablob = zlib.compress(delta)
1076 compression = COMPRESSION_ZLIB
1079 compression = COMPRESSION_ZLIB
1077 elif self._compengine == b'none':
1080 elif self._compengine == b'none':
1078 deltablob = delta
1081 deltablob = delta
1079 compression = COMPRESSION_NONE
1082 compression = COMPRESSION_NONE
1080 else:
1083 else:
1081 raise error.ProgrammingError(
1084 raise error.ProgrammingError(
1082 b'unhandled compression engine: %s' % self._compengine
1085 b'unhandled compression engine: %s' % self._compengine
1083 )
1086 )
1084
1087
1085 # Don't store compressed data if it isn't practical.
1088 # Don't store compressed data if it isn't practical.
1086 if len(deltablob) >= len(delta):
1089 if len(deltablob) >= len(delta):
1087 deltablob = delta
1090 deltablob = delta
1088 compression = COMPRESSION_NONE
1091 compression = COMPRESSION_NONE
1089
1092
1090 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
1093 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
1091
1094
1092 rev = len(self)
1095 rev = len(self)
1093
1096
1094 if p1 == sha1nodeconstants.nullid:
1097 if p1 == sha1nodeconstants.nullid:
1095 p1rev = nullrev
1098 p1rev = nullrev
1096 else:
1099 else:
1097 p1rev = self._nodetorev[p1]
1100 p1rev = self._nodetorev[p1]
1098
1101
1099 if p2 == sha1nodeconstants.nullid:
1102 if p2 == sha1nodeconstants.nullid:
1100 p2rev = nullrev
1103 p2rev = nullrev
1101 else:
1104 else:
1102 p2rev = self._nodetorev[p2]
1105 p2rev = self._nodetorev[p2]
1103
1106
1104 rid = self._db.execute(
1107 rid = self._db.execute(
1105 'INSERT INTO fileindex ('
1108 'INSERT INTO fileindex ('
1106 ' pathid, revnum, node, p1rev, p2rev, linkrev, flags, '
1109 ' pathid, revnum, node, p1rev, p2rev, linkrev, flags, '
1107 ' deltaid, deltabaseid) '
1110 ' deltaid, deltabaseid) '
1108 ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
1111 ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
1109 (
1112 (
1110 self._pathid,
1113 self._pathid,
1111 rev,
1114 rev,
1112 node,
1115 node,
1113 p1rev,
1116 p1rev,
1114 p2rev,
1117 p2rev,
1115 linkrev,
1118 linkrev,
1116 flags,
1119 flags,
1117 deltaid,
1120 deltaid,
1118 baseid,
1121 baseid,
1119 ),
1122 ),
1120 ).lastrowid
1123 ).lastrowid
1121
1124
1122 entry = revisionentry(
1125 entry = revisionentry(
1123 rid=rid,
1126 rid=rid,
1124 rev=rev,
1127 rev=rev,
1125 node=node,
1128 node=node,
1126 p1rev=p1rev,
1129 p1rev=p1rev,
1127 p2rev=p2rev,
1130 p2rev=p2rev,
1128 p1node=p1,
1131 p1node=p1,
1129 p2node=p2,
1132 p2node=p2,
1130 linkrev=linkrev,
1133 linkrev=linkrev,
1131 flags=flags,
1134 flags=flags,
1132 )
1135 )
1133
1136
1134 self._nodetorev[node] = rev
1137 self._nodetorev[node] = rev
1135 self._revtonode[rev] = node
1138 self._revtonode[rev] = node
1136 self._revisions[node] = entry
1139 self._revisions[node] = entry
1137
1140
1138 return rev
1141 return rev
1139
1142
1140
1143
1141 class sqliterepository(localrepo.localrepository):
1144 class sqliterepository(localrepo.localrepository):
1142 def cancopy(self):
1145 def cancopy(self):
1143 return False
1146 return False
1144
1147
1145 def transaction(self, *args, **kwargs):
1148 def transaction(self, *args, **kwargs):
1146 current = self.currenttransaction()
1149 current = self.currenttransaction()
1147
1150
1148 tr = super(sqliterepository, self).transaction(*args, **kwargs)
1151 tr = super(sqliterepository, self).transaction(*args, **kwargs)
1149
1152
1150 if current:
1153 if current:
1151 return tr
1154 return tr
1152
1155
1153 self._dbconn.execute('BEGIN TRANSACTION')
1156 self._dbconn.execute('BEGIN TRANSACTION')
1154
1157
1155 def committransaction(_):
1158 def committransaction(_):
1156 self._dbconn.commit()
1159 self._dbconn.commit()
1157
1160
1158 tr.addfinalize(b'sqlitestore', committransaction)
1161 tr.addfinalize(b'sqlitestore', committransaction)
1159
1162
1160 return tr
1163 return tr
1161
1164
1162 @property
1165 @property
1163 def _dbconn(self):
1166 def _dbconn(self):
1164 # SQLite connections can only be used on the thread that created
1167 # SQLite connections can only be used on the thread that created
1165 # them. In most cases, this "just works." However, hgweb uses
1168 # them. In most cases, this "just works." However, hgweb uses
1166 # multiple threads.
1169 # multiple threads.
1167 tid = threading.current_thread().ident
1170 tid = threading.current_thread().ident
1168
1171
1169 if self._db:
1172 if self._db:
1170 if self._db[0] == tid:
1173 if self._db[0] == tid:
1171 return self._db[1]
1174 return self._db[1]
1172
1175
1173 db = makedb(self.svfs.join(b'db.sqlite'))
1176 db = makedb(self.svfs.join(b'db.sqlite'))
1174 self._db = (tid, db)
1177 self._db = (tid, db)
1175
1178
1176 return db
1179 return db
1177
1180
1178
1181
1179 def makedb(path):
1182 def makedb(path):
1180 """Construct a database handle for a database at path."""
1183 """Construct a database handle for a database at path."""
1181
1184
1182 db = sqlite3.connect(encoding.strfromlocal(path))
1185 db = sqlite3.connect(encoding.strfromlocal(path))
1183 db.text_factory = bytes
1186 db.text_factory = bytes
1184
1187
1185 res = db.execute('PRAGMA user_version').fetchone()[0]
1188 res = db.execute('PRAGMA user_version').fetchone()[0]
1186
1189
1187 # New database.
1190 # New database.
1188 if res == 0:
1191 if res == 0:
1189 for statement in CREATE_SCHEMA:
1192 for statement in CREATE_SCHEMA:
1190 db.execute(statement)
1193 db.execute(statement)
1191
1194
1192 db.commit()
1195 db.commit()
1193
1196
1194 elif res == CURRENT_SCHEMA_VERSION:
1197 elif res == CURRENT_SCHEMA_VERSION:
1195 pass
1198 pass
1196
1199
1197 else:
1200 else:
1198 raise error.Abort(_(b'sqlite database has unrecognized version'))
1201 raise error.Abort(_(b'sqlite database has unrecognized version'))
1199
1202
1200 db.execute('PRAGMA journal_mode=WAL')
1203 db.execute('PRAGMA journal_mode=WAL')
1201
1204
1202 return db
1205 return db
1203
1206
1204
1207
1205 def featuresetup(ui, supported):
1208 def featuresetup(ui, supported):
1206 supported.add(REQUIREMENT)
1209 supported.add(REQUIREMENT)
1207
1210
1208 if zstd:
1211 if zstd:
1209 supported.add(REQUIREMENT_ZSTD)
1212 supported.add(REQUIREMENT_ZSTD)
1210
1213
1211 supported.add(REQUIREMENT_ZLIB)
1214 supported.add(REQUIREMENT_ZLIB)
1212 supported.add(REQUIREMENT_NONE)
1215 supported.add(REQUIREMENT_NONE)
1213 supported.add(REQUIREMENT_SHALLOW_FILES)
1216 supported.add(REQUIREMENT_SHALLOW_FILES)
1214 supported.add(requirements.NARROW_REQUIREMENT)
1217 supported.add(requirements.NARROW_REQUIREMENT)
1215
1218
1216
1219
1217 def newreporequirements(orig, ui, createopts):
1220 def newreporequirements(orig, ui, createopts):
1218 if createopts[b'backend'] != b'sqlite':
1221 if createopts[b'backend'] != b'sqlite':
1219 return orig(ui, createopts)
1222 return orig(ui, createopts)
1220
1223
1221 # This restriction can be lifted once we have more confidence.
1224 # This restriction can be lifted once we have more confidence.
1222 if b'sharedrepo' in createopts:
1225 if b'sharedrepo' in createopts:
1223 raise error.Abort(
1226 raise error.Abort(
1224 _(b'shared repositories not supported with SQLite store')
1227 _(b'shared repositories not supported with SQLite store')
1225 )
1228 )
1226
1229
1227 # This filtering is out of an abundance of caution: we want to ensure
1230 # This filtering is out of an abundance of caution: we want to ensure
1228 # we honor creation options and we do that by annotating exactly the
1231 # we honor creation options and we do that by annotating exactly the
1229 # creation options we recognize.
1232 # creation options we recognize.
1230 known = {
1233 known = {
1231 b'narrowfiles',
1234 b'narrowfiles',
1232 b'backend',
1235 b'backend',
1233 b'shallowfilestore',
1236 b'shallowfilestore',
1234 }
1237 }
1235
1238
1236 unsupported = set(createopts) - known
1239 unsupported = set(createopts) - known
1237 if unsupported:
1240 if unsupported:
1238 raise error.Abort(
1241 raise error.Abort(
1239 _(b'SQLite store does not support repo creation option: %s')
1242 _(b'SQLite store does not support repo creation option: %s')
1240 % b', '.join(sorted(unsupported))
1243 % b', '.join(sorted(unsupported))
1241 )
1244 )
1242
1245
1243 # Since we're a hybrid store that still relies on revlogs, we fall back
1246 # Since we're a hybrid store that still relies on revlogs, we fall back
1244 # to using the revlogv1 backend's storage requirements then adding our
1247 # to using the revlogv1 backend's storage requirements then adding our
1245 # own requirement.
1248 # own requirement.
1246 createopts[b'backend'] = b'revlogv1'
1249 createopts[b'backend'] = b'revlogv1'
1247 requirements = orig(ui, createopts)
1250 requirements = orig(ui, createopts)
1248 requirements.add(REQUIREMENT)
1251 requirements.add(REQUIREMENT)
1249
1252
1250 compression = ui.config(b'storage', b'sqlite.compression')
1253 compression = ui.config(b'storage', b'sqlite.compression')
1251
1254
1252 if compression == b'zstd' and not zstd:
1255 if compression == b'zstd' and not zstd:
1253 raise error.Abort(
1256 raise error.Abort(
1254 _(
1257 _(
1255 b'storage.sqlite.compression set to "zstd" but '
1258 b'storage.sqlite.compression set to "zstd" but '
1256 b'zstandard compression not available to this '
1259 b'zstandard compression not available to this '
1257 b'Mercurial install'
1260 b'Mercurial install'
1258 )
1261 )
1259 )
1262 )
1260
1263
1261 if compression == b'zstd':
1264 if compression == b'zstd':
1262 requirements.add(REQUIREMENT_ZSTD)
1265 requirements.add(REQUIREMENT_ZSTD)
1263 elif compression == b'zlib':
1266 elif compression == b'zlib':
1264 requirements.add(REQUIREMENT_ZLIB)
1267 requirements.add(REQUIREMENT_ZLIB)
1265 elif compression == b'none':
1268 elif compression == b'none':
1266 requirements.add(REQUIREMENT_NONE)
1269 requirements.add(REQUIREMENT_NONE)
1267 else:
1270 else:
1268 raise error.Abort(
1271 raise error.Abort(
1269 _(
1272 _(
1270 b'unknown compression engine defined in '
1273 b'unknown compression engine defined in '
1271 b'storage.sqlite.compression: %s'
1274 b'storage.sqlite.compression: %s'
1272 )
1275 )
1273 % compression
1276 % compression
1274 )
1277 )
1275
1278
1276 if createopts.get(b'shallowfilestore'):
1279 if createopts.get(b'shallowfilestore'):
1277 requirements.add(REQUIREMENT_SHALLOW_FILES)
1280 requirements.add(REQUIREMENT_SHALLOW_FILES)
1278
1281
1279 return requirements
1282 return requirements
1280
1283
1281
1284
1282 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1285 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1283 class sqlitefilestorage:
1286 class sqlitefilestorage:
1284 """Repository file storage backed by SQLite."""
1287 """Repository file storage backed by SQLite."""
1285
1288
1286 def file(self, path):
1289 def file(self, path):
1287 if path[0] == b'/':
1290 if path[0] == b'/':
1288 path = path[1:]
1291 path = path[1:]
1289
1292
1290 if REQUIREMENT_ZSTD in self.requirements:
1293 if REQUIREMENT_ZSTD in self.requirements:
1291 compression = b'zstd'
1294 compression = b'zstd'
1292 elif REQUIREMENT_ZLIB in self.requirements:
1295 elif REQUIREMENT_ZLIB in self.requirements:
1293 compression = b'zlib'
1296 compression = b'zlib'
1294 elif REQUIREMENT_NONE in self.requirements:
1297 elif REQUIREMENT_NONE in self.requirements:
1295 compression = b'none'
1298 compression = b'none'
1296 else:
1299 else:
1297 raise error.Abort(
1300 raise error.Abort(
1298 _(
1301 _(
1299 b'unable to determine what compression engine '
1302 b'unable to determine what compression engine '
1300 b'to use for SQLite storage'
1303 b'to use for SQLite storage'
1301 )
1304 )
1302 )
1305 )
1303
1306
1304 return sqlitefilestore(self._dbconn, path, compression)
1307 return sqlitefilestore(self._dbconn, path, compression)
1305
1308
1306
1309
1307 def makefilestorage(orig, requirements, features, **kwargs):
1310 def makefilestorage(orig, requirements, features, **kwargs):
1308 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1311 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1309 if REQUIREMENT in requirements:
1312 if REQUIREMENT in requirements:
1310 if REQUIREMENT_SHALLOW_FILES in requirements:
1313 if REQUIREMENT_SHALLOW_FILES in requirements:
1311 features.add(repository.REPO_FEATURE_SHALLOW_FILE_STORAGE)
1314 features.add(repository.REPO_FEATURE_SHALLOW_FILE_STORAGE)
1312
1315
1313 return sqlitefilestorage
1316 return sqlitefilestorage
1314 else:
1317 else:
1315 return orig(requirements=requirements, features=features, **kwargs)
1318 return orig(requirements=requirements, features=features, **kwargs)
1316
1319
1317
1320
1318 def makemain(orig, ui, requirements, **kwargs):
1321 def makemain(orig, ui, requirements, **kwargs):
1319 if REQUIREMENT in requirements:
1322 if REQUIREMENT in requirements:
1320 if REQUIREMENT_ZSTD in requirements and not zstd:
1323 if REQUIREMENT_ZSTD in requirements and not zstd:
1321 raise error.Abort(
1324 raise error.Abort(
1322 _(
1325 _(
1323 b'repository uses zstandard compression, which '
1326 b'repository uses zstandard compression, which '
1324 b'is not available to this Mercurial install'
1327 b'is not available to this Mercurial install'
1325 )
1328 )
1326 )
1329 )
1327
1330
1328 return sqliterepository
1331 return sqliterepository
1329
1332
1330 return orig(requirements=requirements, **kwargs)
1333 return orig(requirements=requirements, **kwargs)
1331
1334
1332
1335
1333 def verifierinit(orig, self, *args, **kwargs):
1336 def verifierinit(orig, self, *args, **kwargs):
1334 orig(self, *args, **kwargs)
1337 orig(self, *args, **kwargs)
1335
1338
1336 # We don't care that files in the store don't align with what is
1339 # We don't care that files in the store don't align with what is
1337 # advertised. So suppress these warnings.
1340 # advertised. So suppress these warnings.
1338 self.warnorphanstorefiles = False
1341 self.warnorphanstorefiles = False
1339
1342
1340
1343
1341 def extsetup(ui):
1344 def extsetup(ui):
1342 localrepo.featuresetupfuncs.add(featuresetup)
1345 localrepo.featuresetupfuncs.add(featuresetup)
1343 extensions.wrapfunction(
1346 extensions.wrapfunction(
1344 localrepo, 'newreporequirements', newreporequirements
1347 localrepo, 'newreporequirements', newreporequirements
1345 )
1348 )
1346 extensions.wrapfunction(localrepo, 'makefilestorage', makefilestorage)
1349 extensions.wrapfunction(localrepo, 'makefilestorage', makefilestorage)
1347 extensions.wrapfunction(localrepo, 'makemain', makemain)
1350 extensions.wrapfunction(localrepo, 'makemain', makemain)
1348 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
1351 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
1349
1352
1350
1353
1351 def reposetup(ui, repo):
1354 def reposetup(ui, repo):
1352 if isinstance(repo, sqliterepository):
1355 if isinstance(repo, sqliterepository):
1353 repo._db = None
1356 repo._db = None
1354
1357
1355 # TODO check for bundlerepository?
1358 # TODO check for bundlerepository?
@@ -1,320 +1,320
1 # filelog.py - file history class for mercurial
1 # filelog.py - file history class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import annotations
8 from __future__ import annotations
9
9
10 from typing import (
10 from typing import (
11 Iterable,
11 Iterable,
12 Iterator,
12 Iterator,
13 )
13 )
14
14
15 from .i18n import _
15 from .i18n import _
16 from .node import nullrev
16 from .node import nullrev
17 from . import (
17 from . import (
18 error,
18 error,
19 revlog,
19 revlog,
20 )
20 )
21 from .interfaces import (
21 from .interfaces import (
22 repository,
22 repository,
23 )
23 )
24 from .utils import storageutil
24 from .utils import storageutil
25 from .revlogutils import (
25 from .revlogutils import (
26 constants as revlog_constants,
26 constants as revlog_constants,
27 rewrite,
27 rewrite,
28 )
28 )
29
29
30
30
31 class filelog: # (repository.ifilestorage)
31 class filelog: # (repository.ifilestorage)
32 _revlog: revlog.revlog
32 _revlog: revlog.revlog
33 nullid: bytes
33 nullid: bytes
34 _fix_issue6528: bool
34 _fix_issue6528: bool
35
35
36 def __init__(self, opener, path, try_split=False):
36 def __init__(self, opener, path, try_split=False):
37 self._revlog = revlog.revlog(
37 self._revlog = revlog.revlog(
38 opener,
38 opener,
39 # XXX should use the unencoded path
39 # XXX should use the unencoded path
40 target=(revlog_constants.KIND_FILELOG, path),
40 target=(revlog_constants.KIND_FILELOG, path),
41 radix=b'/'.join((b'data', path)),
41 radix=b'/'.join((b'data', path)),
42 censorable=True,
42 censorable=True,
43 canonical_parent_order=False, # see comment in revlog.py
43 canonical_parent_order=False, # see comment in revlog.py
44 try_split=try_split,
44 try_split=try_split,
45 )
45 )
46 # Full name of the user visible file, relative to the repository root.
46 # Full name of the user visible file, relative to the repository root.
47 # Used by LFS.
47 # Used by LFS.
48 self._revlog.filename = path
48 self._revlog.filename = path
49 self.nullid = self._revlog.nullid
49 self.nullid = self._revlog.nullid
50 opts = opener.options
50 opts = opener.options
51 self._fix_issue6528 = opts.get(b'issue6528.fix-incoming', True)
51 self._fix_issue6528 = opts.get(b'issue6528.fix-incoming', True)
52
52
53 def get_revlog(self) -> revlog.revlog:
53 def get_revlog(self) -> revlog.revlog:
54 """return an actual revlog instance if any
54 """return an actual revlog instance if any
55
55
56 This exist because a lot of code leverage the fact the underlying
56 This exist because a lot of code leverage the fact the underlying
57 storage is a revlog for optimization, so giving simple way to access
57 storage is a revlog for optimization, so giving simple way to access
58 the revlog instance helps such code.
58 the revlog instance helps such code.
59 """
59 """
60 return self._revlog
60 return self._revlog
61
61
62 def __len__(self) -> int:
62 def __len__(self) -> int:
63 return len(self._revlog)
63 return len(self._revlog)
64
64
65 def __iter__(self) -> Iterator[int]:
65 def __iter__(self) -> Iterator[int]:
66 return self._revlog.__iter__()
66 return self._revlog.__iter__()
67
67
68 def hasnode(self, node):
68 def hasnode(self, node):
69 if node in (self.nullid, nullrev):
69 if node in (self.nullid, nullrev):
70 return False
70 return False
71
71
72 try:
72 try:
73 self._revlog.rev(node)
73 self._revlog.rev(node)
74 return True
74 return True
75 except (TypeError, ValueError, IndexError, error.LookupError):
75 except (TypeError, ValueError, IndexError, error.LookupError):
76 return False
76 return False
77
77
78 def revs(self, start=0, stop=None):
78 def revs(self, start=0, stop=None):
79 return self._revlog.revs(start=start, stop=stop)
79 return self._revlog.revs(start=start, stop=stop)
80
80
81 def parents(self, node):
81 def parents(self, node):
82 return self._revlog.parents(node)
82 return self._revlog.parents(node)
83
83
84 def parentrevs(self, rev):
84 def parentrevs(self, rev):
85 return self._revlog.parentrevs(rev)
85 return self._revlog.parentrevs(rev)
86
86
87 def rev(self, node):
87 def rev(self, node):
88 return self._revlog.rev(node)
88 return self._revlog.rev(node)
89
89
90 def node(self, rev):
90 def node(self, rev):
91 return self._revlog.node(rev)
91 return self._revlog.node(rev)
92
92
93 def lookup(self, node):
93 def lookup(self, node):
94 return storageutil.fileidlookup(
94 return storageutil.fileidlookup(
95 self._revlog, node, self._revlog.display_id
95 self._revlog, node, self._revlog.display_id
96 )
96 )
97
97
98 def linkrev(self, rev):
98 def linkrev(self, rev):
99 return self._revlog.linkrev(rev)
99 return self._revlog.linkrev(rev)
100
100
101 def commonancestorsheads(self, node1, node2):
101 def commonancestorsheads(self, node1, node2):
102 return self._revlog.commonancestorsheads(node1, node2)
102 return self._revlog.commonancestorsheads(node1, node2)
103
103
104 # Used by dagop.blockdescendants().
104 # Used by dagop.blockdescendants().
105 def descendants(self, revs):
105 def descendants(self, revs):
106 return self._revlog.descendants(revs)
106 return self._revlog.descendants(revs)
107
107
108 def heads(self, start=None, stop=None):
108 def heads(self, start=None, stop=None):
109 return self._revlog.heads(start, stop)
109 return self._revlog.heads(start, stop)
110
110
111 # Used by hgweb, children extension.
111 # Used by hgweb, children extension.
112 def children(self, node):
112 def children(self, node):
113 return self._revlog.children(node)
113 return self._revlog.children(node)
114
114
115 def iscensored(self, rev):
115 def iscensored(self, rev):
116 return self._revlog.iscensored(rev)
116 return self._revlog.iscensored(rev)
117
117
118 def revision(self, node):
118 def revision(self, node):
119 return self._revlog.revision(node)
119 return self._revlog.revision(node)
120
120
121 def rawdata(self, node):
121 def rawdata(self, node):
122 return self._revlog.rawdata(node)
122 return self._revlog.rawdata(node)
123
123
124 def emitrevisions(
124 def emitrevisions(
125 self,
125 self,
126 nodes,
126 nodes,
127 nodesorder=None,
127 nodesorder=None,
128 revisiondata=False,
128 revisiondata=False,
129 assumehaveparentrevisions=False,
129 assumehaveparentrevisions=False,
130 deltamode=repository.CG_DELTAMODE_STD,
130 deltamode=repository.CG_DELTAMODE_STD,
131 sidedata_helpers=None,
131 sidedata_helpers=None,
132 debug_info=None,
132 debug_info=None,
133 ):
133 ):
134 return self._revlog.emitrevisions(
134 return self._revlog.emitrevisions(
135 nodes,
135 nodes,
136 nodesorder=nodesorder,
136 nodesorder=nodesorder,
137 revisiondata=revisiondata,
137 revisiondata=revisiondata,
138 assumehaveparentrevisions=assumehaveparentrevisions,
138 assumehaveparentrevisions=assumehaveparentrevisions,
139 deltamode=deltamode,
139 deltamode=deltamode,
140 sidedata_helpers=sidedata_helpers,
140 sidedata_helpers=sidedata_helpers,
141 debug_info=debug_info,
141 debug_info=debug_info,
142 )
142 )
143
143
144 def addrevision(
144 def addrevision(
145 self,
145 self,
146 revisiondata,
146 revisiondata,
147 transaction,
147 transaction,
148 linkrev,
148 linkrev,
149 p1,
149 p1,
150 p2,
150 p2,
151 node=None,
151 node=None,
152 flags=revlog.REVIDX_DEFAULT_FLAGS,
152 flags=revlog.REVIDX_DEFAULT_FLAGS,
153 cachedelta=None,
153 cachedelta=None,
154 ):
154 ):
155 return self._revlog.addrevision(
155 return self._revlog.addrevision(
156 revisiondata,
156 revisiondata,
157 transaction,
157 transaction,
158 linkrev,
158 linkrev,
159 p1,
159 p1,
160 p2,
160 p2,
161 node=node,
161 node=node,
162 flags=flags,
162 flags=flags,
163 cachedelta=cachedelta,
163 cachedelta=cachedelta,
164 )
164 )
165
165
166 def addgroup(
166 def addgroup(
167 self,
167 self,
168 deltas,
168 deltas,
169 linkmapper,
169 linkmapper,
170 transaction,
170 transaction,
171 addrevisioncb=None,
171 addrevisioncb=None,
172 duplicaterevisioncb=None,
172 duplicaterevisioncb=None,
173 maybemissingparents=False,
173 maybemissingparents=False,
174 debug_info=None,
174 debug_info=None,
175 delta_base_reuse_policy=None,
175 delta_base_reuse_policy=None,
176 ):
176 ):
177 if maybemissingparents:
177 if maybemissingparents:
178 raise error.Abort(
178 raise error.Abort(
179 _(
179 _(
180 b'revlog storage does not support missing '
180 b'revlog storage does not support missing '
181 b'parents write mode'
181 b'parents write mode'
182 )
182 )
183 )
183 )
184
184
185 with self._revlog._writing(transaction):
185 with self._revlog._writing(transaction):
186 if self._fix_issue6528:
186 if self._fix_issue6528:
187 deltas = rewrite.filter_delta_issue6528(self._revlog, deltas)
187 deltas = rewrite.filter_delta_issue6528(self._revlog, deltas)
188
188
189 return self._revlog.addgroup(
189 return self._revlog.addgroup(
190 deltas,
190 deltas,
191 linkmapper,
191 linkmapper,
192 transaction,
192 transaction,
193 addrevisioncb=addrevisioncb,
193 addrevisioncb=addrevisioncb,
194 duplicaterevisioncb=duplicaterevisioncb,
194 duplicaterevisioncb=duplicaterevisioncb,
195 debug_info=debug_info,
195 debug_info=debug_info,
196 delta_base_reuse_policy=delta_base_reuse_policy,
196 delta_base_reuse_policy=delta_base_reuse_policy,
197 )
197 )
198
198
199 def getstrippoint(self, minlink):
199 def getstrippoint(self, minlink):
200 return self._revlog.getstrippoint(minlink)
200 return self._revlog.getstrippoint(minlink)
201
201
202 def strip(self, minlink, transaction):
202 def strip(self, minlink, transaction):
203 return self._revlog.strip(minlink, transaction)
203 return self._revlog.strip(minlink, transaction)
204
204
205 def censorrevision(self, tr, node, tombstone=b''):
205 def censorrevision(self, tr, node, tombstone=b''):
206 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
206 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
207
207
208 def files(self):
208 def files(self):
209 return self._revlog.files()
209 return self._revlog.files()
210
210
211 def read(self, node):
211 def read(self, node):
212 return storageutil.filtermetadata(self.revision(node))
212 return storageutil.filtermetadata(self.revision(node))
213
213
214 def add(self, text, meta, transaction, link, p1=None, p2=None):
214 def add(self, text, meta, transaction, link, p1=None, p2=None):
215 if meta or text.startswith(b'\1\n'):
215 if meta or text.startswith(b'\1\n'):
216 text = storageutil.packmeta(meta, text)
216 text = storageutil.packmeta(meta, text)
217 rev = self.addrevision(text, transaction, link, p1, p2)
217 rev = self.addrevision(text, transaction, link, p1, p2)
218 return self.node(rev)
218 return self.node(rev)
219
219
220 def renamed(self, node):
220 def renamed(self, node):
221 return storageutil.filerevisioncopied(self, node)
221 return storageutil.filerevisioncopied(self, node)
222
222
223 def size(self, rev):
223 def size(self, rev):
224 """return the size of a given revision"""
224 """return the size of a given revision"""
225
225
226 # for revisions with renames, we have to go the slow way
226 # for revisions with renames, we have to go the slow way
227 node = self.node(rev)
227 node = self.node(rev)
228 if self.iscensored(rev):
228 if self.iscensored(rev):
229 return 0
229 return 0
230 if self.renamed(node):
230 if self.renamed(node):
231 return len(self.read(node))
231 return len(self.read(node))
232
232
233 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
233 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
234 # XXX See also basefilectx.cmp.
234 # XXX See also basefilectx.cmp.
235 return self._revlog.size(rev)
235 return self._revlog.size(rev)
236
236
237 def cmp(self, node, text):
237 def cmp(self, node, text):
238 """compare text with a given file revision
238 """compare text with a given file revision
239
239
240 returns True if text is different than what is stored.
240 returns True if text is different than what is stored.
241 """
241 """
242 return not storageutil.filedataequivalent(self, node, text)
242 return not storageutil.filedataequivalent(self, node, text)
243
243
244 def verifyintegrity(self, state) -> Iterable[revlog.revlogproblem]:
244 def verifyintegrity(self, state) -> Iterable[repository.iverifyproblem]:
245 return self._revlog.verifyintegrity(state)
245 return self._revlog.verifyintegrity(state)
246
246
247 def storageinfo(
247 def storageinfo(
248 self,
248 self,
249 exclusivefiles=False,
249 exclusivefiles=False,
250 sharedfiles=False,
250 sharedfiles=False,
251 revisionscount=False,
251 revisionscount=False,
252 trackedsize=False,
252 trackedsize=False,
253 storedsize=False,
253 storedsize=False,
254 ):
254 ):
255 return self._revlog.storageinfo(
255 return self._revlog.storageinfo(
256 exclusivefiles=exclusivefiles,
256 exclusivefiles=exclusivefiles,
257 sharedfiles=sharedfiles,
257 sharedfiles=sharedfiles,
258 revisionscount=revisionscount,
258 revisionscount=revisionscount,
259 trackedsize=trackedsize,
259 trackedsize=trackedsize,
260 storedsize=storedsize,
260 storedsize=storedsize,
261 )
261 )
262
262
263 # Used by repo upgrade.
263 # Used by repo upgrade.
264 def clone(self, tr, destrevlog, **kwargs):
264 def clone(self, tr, destrevlog, **kwargs):
265 if not isinstance(destrevlog, filelog):
265 if not isinstance(destrevlog, filelog):
266 msg = b'expected filelog to clone(), not %r'
266 msg = b'expected filelog to clone(), not %r'
267 msg %= destrevlog
267 msg %= destrevlog
268 raise error.ProgrammingError(msg)
268 raise error.ProgrammingError(msg)
269
269
270 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
270 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
271
271
272
272
273 class narrowfilelog(filelog):
273 class narrowfilelog(filelog):
274 """Filelog variation to be used with narrow stores."""
274 """Filelog variation to be used with narrow stores."""
275
275
276 def __init__(self, opener, path, narrowmatch, try_split=False):
276 def __init__(self, opener, path, narrowmatch, try_split=False):
277 super(narrowfilelog, self).__init__(opener, path, try_split=try_split)
277 super(narrowfilelog, self).__init__(opener, path, try_split=try_split)
278 self._narrowmatch = narrowmatch
278 self._narrowmatch = narrowmatch
279
279
280 def renamed(self, node):
280 def renamed(self, node):
281 res = super(narrowfilelog, self).renamed(node)
281 res = super(narrowfilelog, self).renamed(node)
282
282
283 # Renames that come from outside the narrowspec are problematic
283 # Renames that come from outside the narrowspec are problematic
284 # because we may lack the base text for the rename. This can result
284 # because we may lack the base text for the rename. This can result
285 # in code attempting to walk the ancestry or compute a diff
285 # in code attempting to walk the ancestry or compute a diff
286 # encountering a missing revision. We address this by silently
286 # encountering a missing revision. We address this by silently
287 # removing rename metadata if the source file is outside the
287 # removing rename metadata if the source file is outside the
288 # narrow spec.
288 # narrow spec.
289 #
289 #
290 # A better solution would be to see if the base revision is available,
290 # A better solution would be to see if the base revision is available,
291 # rather than assuming it isn't.
291 # rather than assuming it isn't.
292 #
292 #
293 # An even better solution would be to teach all consumers of rename
293 # An even better solution would be to teach all consumers of rename
294 # metadata that the base revision may not be available.
294 # metadata that the base revision may not be available.
295 #
295 #
296 # TODO consider better ways of doing this.
296 # TODO consider better ways of doing this.
297 if res and not self._narrowmatch(res[0]):
297 if res and not self._narrowmatch(res[0]):
298 return None
298 return None
299
299
300 return res
300 return res
301
301
302 def size(self, rev):
302 def size(self, rev):
303 # Because we have a custom renamed() that may lie, we need to call
303 # Because we have a custom renamed() that may lie, we need to call
304 # the base renamed() to report accurate results.
304 # the base renamed() to report accurate results.
305 node = self.node(rev)
305 node = self.node(rev)
306 if super(narrowfilelog, self).renamed(node):
306 if super(narrowfilelog, self).renamed(node):
307 return len(self.read(node))
307 return len(self.read(node))
308 else:
308 else:
309 return super(narrowfilelog, self).size(rev)
309 return super(narrowfilelog, self).size(rev)
310
310
311 def cmp(self, node, text):
311 def cmp(self, node, text):
312 # We don't call `super` because narrow parents can be buggy in case of a
312 # We don't call `super` because narrow parents can be buggy in case of a
313 # ambiguous dirstate. Always take the slow path until there is a better
313 # ambiguous dirstate. Always take the slow path until there is a better
314 # fix, see issue6150.
314 # fix, see issue6150.
315
315
316 # Censored files compare against the empty file.
316 # Censored files compare against the empty file.
317 if self.iscensored(self.rev(node)):
317 if self.iscensored(self.rev(node)):
318 return text != b''
318 return text != b''
319
319
320 return self.read(node) != text
320 return self.read(node) != text
@@ -1,4244 +1,4244
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15 from __future__ import annotations
15 from __future__ import annotations
16
16
17 import binascii
17 import binascii
18 import collections
18 import collections
19 import contextlib
19 import contextlib
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import typing
23 import typing
24 import weakref
24 import weakref
25 import zlib
25 import zlib
26
26
27 from typing import (
27 from typing import (
28 Iterable,
28 Iterable,
29 Iterator,
29 Iterator,
30 Optional,
30 Optional,
31 Tuple,
31 Tuple,
32 )
32 )
33
33
34 # import stuff from node for others to import from revlog
34 # import stuff from node for others to import from revlog
35 from .node import (
35 from .node import (
36 bin,
36 bin,
37 hex,
37 hex,
38 nullrev,
38 nullrev,
39 sha1nodeconstants,
39 sha1nodeconstants,
40 short,
40 short,
41 wdirrev,
41 wdirrev,
42 )
42 )
43 from .i18n import _
43 from .i18n import _
44 from .revlogutils.constants import (
44 from .revlogutils.constants import (
45 ALL_KINDS,
45 ALL_KINDS,
46 CHANGELOGV2,
46 CHANGELOGV2,
47 COMP_MODE_DEFAULT,
47 COMP_MODE_DEFAULT,
48 COMP_MODE_INLINE,
48 COMP_MODE_INLINE,
49 COMP_MODE_PLAIN,
49 COMP_MODE_PLAIN,
50 DELTA_BASE_REUSE_NO,
50 DELTA_BASE_REUSE_NO,
51 DELTA_BASE_REUSE_TRY,
51 DELTA_BASE_REUSE_TRY,
52 ENTRY_RANK,
52 ENTRY_RANK,
53 FEATURES_BY_VERSION,
53 FEATURES_BY_VERSION,
54 FLAG_GENERALDELTA,
54 FLAG_GENERALDELTA,
55 FLAG_INLINE_DATA,
55 FLAG_INLINE_DATA,
56 INDEX_HEADER,
56 INDEX_HEADER,
57 KIND_CHANGELOG,
57 KIND_CHANGELOG,
58 KIND_FILELOG,
58 KIND_FILELOG,
59 RANK_UNKNOWN,
59 RANK_UNKNOWN,
60 REVLOGV0,
60 REVLOGV0,
61 REVLOGV1,
61 REVLOGV1,
62 REVLOGV1_FLAGS,
62 REVLOGV1_FLAGS,
63 REVLOGV2,
63 REVLOGV2,
64 REVLOGV2_FLAGS,
64 REVLOGV2_FLAGS,
65 REVLOG_DEFAULT_FLAGS,
65 REVLOG_DEFAULT_FLAGS,
66 REVLOG_DEFAULT_FORMAT,
66 REVLOG_DEFAULT_FORMAT,
67 REVLOG_DEFAULT_VERSION,
67 REVLOG_DEFAULT_VERSION,
68 SUPPORTED_FLAGS,
68 SUPPORTED_FLAGS,
69 )
69 )
70 from .revlogutils.flagutil import (
70 from .revlogutils.flagutil import (
71 REVIDX_DEFAULT_FLAGS,
71 REVIDX_DEFAULT_FLAGS,
72 REVIDX_ELLIPSIS,
72 REVIDX_ELLIPSIS,
73 REVIDX_EXTSTORED,
73 REVIDX_EXTSTORED,
74 REVIDX_FLAGS_ORDER,
74 REVIDX_FLAGS_ORDER,
75 REVIDX_HASCOPIESINFO,
75 REVIDX_HASCOPIESINFO,
76 REVIDX_ISCENSORED,
76 REVIDX_ISCENSORED,
77 REVIDX_RAWTEXT_CHANGING_FLAGS,
77 REVIDX_RAWTEXT_CHANGING_FLAGS,
78 )
78 )
79 from .thirdparty import attr
79 from .thirdparty import attr
80
80
81 # Force pytype to use the non-vendored package
81 # Force pytype to use the non-vendored package
82 if typing.TYPE_CHECKING:
82 if typing.TYPE_CHECKING:
83 # noinspection PyPackageRequirements
83 # noinspection PyPackageRequirements
84 import attr
84 import attr
85 from .pure.parsers import BaseIndexObject
85 from .pure.parsers import BaseIndexObject
86
86
87 from . import (
87 from . import (
88 ancestor,
88 ancestor,
89 dagop,
89 dagop,
90 error,
90 error,
91 mdiff,
91 mdiff,
92 policy,
92 policy,
93 pycompat,
93 pycompat,
94 revlogutils,
94 revlogutils,
95 templatefilters,
95 templatefilters,
96 util,
96 util,
97 vfs as vfsmod,
97 vfs as vfsmod,
98 )
98 )
99 from .interfaces import (
99 from .interfaces import (
100 repository,
100 repository,
101 )
101 )
102 from .revlogutils import (
102 from .revlogutils import (
103 deltas as deltautil,
103 deltas as deltautil,
104 docket as docketutil,
104 docket as docketutil,
105 flagutil,
105 flagutil,
106 nodemap as nodemaputil,
106 nodemap as nodemaputil,
107 randomaccessfile,
107 randomaccessfile,
108 revlogv0,
108 revlogv0,
109 rewrite,
109 rewrite,
110 sidedata as sidedatautil,
110 sidedata as sidedatautil,
111 )
111 )
112 from .utils import (
112 from .utils import (
113 storageutil,
113 storageutil,
114 stringutil,
114 stringutil,
115 )
115 )
116
116
117 # blanked usage of all the name to prevent pyflakes constraints
117 # blanked usage of all the name to prevent pyflakes constraints
118 # We need these name available in the module for extensions.
118 # We need these name available in the module for extensions.
119
119
120 REVLOGV0
120 REVLOGV0
121 REVLOGV1
121 REVLOGV1
122 REVLOGV2
122 REVLOGV2
123 CHANGELOGV2
123 CHANGELOGV2
124 FLAG_INLINE_DATA
124 FLAG_INLINE_DATA
125 FLAG_GENERALDELTA
125 FLAG_GENERALDELTA
126 REVLOG_DEFAULT_FLAGS
126 REVLOG_DEFAULT_FLAGS
127 REVLOG_DEFAULT_FORMAT
127 REVLOG_DEFAULT_FORMAT
128 REVLOG_DEFAULT_VERSION
128 REVLOG_DEFAULT_VERSION
129 REVLOGV1_FLAGS
129 REVLOGV1_FLAGS
130 REVLOGV2_FLAGS
130 REVLOGV2_FLAGS
131 REVIDX_ISCENSORED
131 REVIDX_ISCENSORED
132 REVIDX_ELLIPSIS
132 REVIDX_ELLIPSIS
133 REVIDX_HASCOPIESINFO
133 REVIDX_HASCOPIESINFO
134 REVIDX_EXTSTORED
134 REVIDX_EXTSTORED
135 REVIDX_DEFAULT_FLAGS
135 REVIDX_DEFAULT_FLAGS
136 REVIDX_FLAGS_ORDER
136 REVIDX_FLAGS_ORDER
137 REVIDX_RAWTEXT_CHANGING_FLAGS
137 REVIDX_RAWTEXT_CHANGING_FLAGS
138
138
139 parsers = policy.importmod('parsers')
139 parsers = policy.importmod('parsers')
140 rustancestor = policy.importrust('ancestor')
140 rustancestor = policy.importrust('ancestor')
141 rustdagop = policy.importrust('dagop', pyo3=True)
141 rustdagop = policy.importrust('dagop', pyo3=True)
142 rustrevlog = policy.importrust('revlog')
142 rustrevlog = policy.importrust('revlog')
143
143
144 # Aliased for performance.
144 # Aliased for performance.
145 _zlibdecompress = zlib.decompress
145 _zlibdecompress = zlib.decompress
146
146
147 # max size of inline data embedded into a revlog
147 # max size of inline data embedded into a revlog
148 _maxinline = 131072
148 _maxinline = 131072
149
149
150
150
151 # Flag processors for REVIDX_ELLIPSIS.
151 # Flag processors for REVIDX_ELLIPSIS.
152 def ellipsisreadprocessor(rl, text):
152 def ellipsisreadprocessor(rl, text):
153 return text, False
153 return text, False
154
154
155
155
156 def ellipsiswriteprocessor(rl, text):
156 def ellipsiswriteprocessor(rl, text):
157 return text, False
157 return text, False
158
158
159
159
160 def ellipsisrawprocessor(rl, text):
160 def ellipsisrawprocessor(rl, text):
161 return False
161 return False
162
162
163
163
164 ellipsisprocessor = (
164 ellipsisprocessor = (
165 ellipsisreadprocessor,
165 ellipsisreadprocessor,
166 ellipsiswriteprocessor,
166 ellipsiswriteprocessor,
167 ellipsisrawprocessor,
167 ellipsisrawprocessor,
168 )
168 )
169
169
170
170
171 def _verify_revision(rl, skipflags, state, node):
171 def _verify_revision(rl, skipflags, state, node):
172 """Verify the integrity of the given revlog ``node`` while providing a hook
172 """Verify the integrity of the given revlog ``node`` while providing a hook
173 point for extensions to influence the operation."""
173 point for extensions to influence the operation."""
174 if skipflags:
174 if skipflags:
175 state[b'skipread'].add(node)
175 state[b'skipread'].add(node)
176 else:
176 else:
177 # Side-effect: read content and verify hash.
177 # Side-effect: read content and verify hash.
178 rl.revision(node)
178 rl.revision(node)
179
179
180
180
181 # True if a fast implementation for persistent-nodemap is available
181 # True if a fast implementation for persistent-nodemap is available
182 #
182 #
183 # We also consider we have a "fast" implementation in "pure" python because
183 # We also consider we have a "fast" implementation in "pure" python because
184 # people using pure don't really have performance consideration (and a
184 # people using pure don't really have performance consideration (and a
185 # wheelbarrow of other slowness source)
185 # wheelbarrow of other slowness source)
186 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
186 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
187 parsers, 'BaseIndexObject'
187 parsers, 'BaseIndexObject'
188 )
188 )
189
189
190
190
191 @attr.s(slots=True)
191 @attr.s(slots=True)
192 class revlogrevisiondelta: # (repository.irevisiondelta)
192 class revlogrevisiondelta: # (repository.irevisiondelta)
193 node = attr.ib()
193 node = attr.ib()
194 p1node = attr.ib()
194 p1node = attr.ib()
195 p2node = attr.ib()
195 p2node = attr.ib()
196 basenode = attr.ib()
196 basenode = attr.ib()
197 flags = attr.ib()
197 flags = attr.ib()
198 baserevisionsize = attr.ib()
198 baserevisionsize = attr.ib()
199 revision = attr.ib()
199 revision = attr.ib()
200 delta = attr.ib()
200 delta = attr.ib()
201 sidedata = attr.ib()
201 sidedata = attr.ib()
202 protocol_flags = attr.ib()
202 protocol_flags = attr.ib()
203 linknode = attr.ib(default=None)
203 linknode = attr.ib(default=None)
204
204
205
205
206 @attr.s(frozen=True)
206 @attr.s(frozen=True)
207 class revlogproblem: # (repository.iverifyproblem)
207 class revlogproblem(repository.iverifyproblem):
208 warning = attr.ib(default=None, type=Optional[bytes])
208 warning = attr.ib(default=None, type=Optional[bytes])
209 error = attr.ib(default=None, type=Optional[bytes])
209 error = attr.ib(default=None, type=Optional[bytes])
210 node = attr.ib(default=None, type=Optional[bytes])
210 node = attr.ib(default=None, type=Optional[bytes])
211
211
212
212
213 def parse_index_v1(data, inline):
213 def parse_index_v1(data, inline):
214 # call the C implementation to parse the index data
214 # call the C implementation to parse the index data
215 index, cache = parsers.parse_index2(data, inline)
215 index, cache = parsers.parse_index2(data, inline)
216 return index, cache
216 return index, cache
217
217
218
218
219 def parse_index_v2(data, inline):
219 def parse_index_v2(data, inline):
220 # call the C implementation to parse the index data
220 # call the C implementation to parse the index data
221 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
221 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
222 return index, cache
222 return index, cache
223
223
224
224
225 def parse_index_cl_v2(data, inline):
225 def parse_index_cl_v2(data, inline):
226 # call the C implementation to parse the index data
226 # call the C implementation to parse the index data
227 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
227 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
228 return index, cache
228 return index, cache
229
229
230
230
231 if hasattr(parsers, 'parse_index_devel_nodemap'):
231 if hasattr(parsers, 'parse_index_devel_nodemap'):
232
232
233 def parse_index_v1_nodemap(data, inline):
233 def parse_index_v1_nodemap(data, inline):
234 index, cache = parsers.parse_index_devel_nodemap(data, inline)
234 index, cache = parsers.parse_index_devel_nodemap(data, inline)
235 return index, cache
235 return index, cache
236
236
237 else:
237 else:
238 parse_index_v1_nodemap = None
238 parse_index_v1_nodemap = None
239
239
240
240
241 def parse_index_v1_rust(data, inline, default_header):
241 def parse_index_v1_rust(data, inline, default_header):
242 cache = (0, data) if inline else None
242 cache = (0, data) if inline else None
243 return rustrevlog.Index(data, default_header), cache
243 return rustrevlog.Index(data, default_header), cache
244
244
245
245
246 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
246 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
247 # signed integer)
247 # signed integer)
248 _maxentrysize = 0x7FFFFFFF
248 _maxentrysize = 0x7FFFFFFF
249
249
250 FILE_TOO_SHORT_MSG = _(
250 FILE_TOO_SHORT_MSG = _(
251 b'cannot read from revlog %s;'
251 b'cannot read from revlog %s;'
252 b' expected %d bytes from offset %d, data size is %d'
252 b' expected %d bytes from offset %d, data size is %d'
253 )
253 )
254
254
255 hexdigits = b'0123456789abcdefABCDEF'
255 hexdigits = b'0123456789abcdefABCDEF'
256
256
257
257
258 class _Config:
258 class _Config:
259 def copy(self):
259 def copy(self):
260 return self.__class__(**self.__dict__)
260 return self.__class__(**self.__dict__)
261
261
262
262
263 @attr.s()
263 @attr.s()
264 class FeatureConfig(_Config):
264 class FeatureConfig(_Config):
265 """Hold configuration values about the available revlog features"""
265 """Hold configuration values about the available revlog features"""
266
266
267 # the default compression engine
267 # the default compression engine
268 compression_engine = attr.ib(default=b'zlib')
268 compression_engine = attr.ib(default=b'zlib')
269 # compression engines options
269 # compression engines options
270 compression_engine_options = attr.ib(default=attr.Factory(dict))
270 compression_engine_options = attr.ib(default=attr.Factory(dict))
271
271
272 # can we use censor on this revlog
272 # can we use censor on this revlog
273 censorable = attr.ib(default=False)
273 censorable = attr.ib(default=False)
274 # does this revlog use the "side data" feature
274 # does this revlog use the "side data" feature
275 has_side_data = attr.ib(default=False)
275 has_side_data = attr.ib(default=False)
276 # might remove rank configuration once the computation has no impact
276 # might remove rank configuration once the computation has no impact
277 compute_rank = attr.ib(default=False)
277 compute_rank = attr.ib(default=False)
278 # parent order is supposed to be semantically irrelevant, so we
278 # parent order is supposed to be semantically irrelevant, so we
279 # normally resort parents to ensure that the first parent is non-null,
279 # normally resort parents to ensure that the first parent is non-null,
280 # if there is a non-null parent at all.
280 # if there is a non-null parent at all.
281 # filelog abuses the parent order as flag to mark some instances of
281 # filelog abuses the parent order as flag to mark some instances of
282 # meta-encoded files, so allow it to disable this behavior.
282 # meta-encoded files, so allow it to disable this behavior.
283 canonical_parent_order = attr.ib(default=False)
283 canonical_parent_order = attr.ib(default=False)
284 # can ellipsis commit be used
284 # can ellipsis commit be used
285 enable_ellipsis = attr.ib(default=False)
285 enable_ellipsis = attr.ib(default=False)
286
286
287 def copy(self):
287 def copy(self):
288 new = super().copy()
288 new = super().copy()
289 new.compression_engine_options = self.compression_engine_options.copy()
289 new.compression_engine_options = self.compression_engine_options.copy()
290 return new
290 return new
291
291
292
292
293 @attr.s()
293 @attr.s()
294 class DataConfig(_Config):
294 class DataConfig(_Config):
295 """Hold configuration value about how the revlog data are read"""
295 """Hold configuration value about how the revlog data are read"""
296
296
297 # should we try to open the "pending" version of the revlog
297 # should we try to open the "pending" version of the revlog
298 try_pending = attr.ib(default=False)
298 try_pending = attr.ib(default=False)
299 # should we try to open the "splitted" version of the revlog
299 # should we try to open the "splitted" version of the revlog
300 try_split = attr.ib(default=False)
300 try_split = attr.ib(default=False)
301 # When True, indexfile should be opened with checkambig=True at writing,
301 # When True, indexfile should be opened with checkambig=True at writing,
302 # to avoid file stat ambiguity.
302 # to avoid file stat ambiguity.
303 check_ambig = attr.ib(default=False)
303 check_ambig = attr.ib(default=False)
304
304
305 # If true, use mmap instead of reading to deal with large index
305 # If true, use mmap instead of reading to deal with large index
306 mmap_large_index = attr.ib(default=False)
306 mmap_large_index = attr.ib(default=False)
307 # how much data is large
307 # how much data is large
308 mmap_index_threshold = attr.ib(default=None)
308 mmap_index_threshold = attr.ib(default=None)
309 # How much data to read and cache into the raw revlog data cache.
309 # How much data to read and cache into the raw revlog data cache.
310 chunk_cache_size = attr.ib(default=65536)
310 chunk_cache_size = attr.ib(default=65536)
311
311
312 # The size of the uncompressed cache compared to the largest revision seen.
312 # The size of the uncompressed cache compared to the largest revision seen.
313 uncompressed_cache_factor = attr.ib(default=None)
313 uncompressed_cache_factor = attr.ib(default=None)
314
314
315 # The number of chunk cached
315 # The number of chunk cached
316 uncompressed_cache_count = attr.ib(default=None)
316 uncompressed_cache_count = attr.ib(default=None)
317
317
318 # Allow sparse reading of the revlog data
318 # Allow sparse reading of the revlog data
319 with_sparse_read = attr.ib(default=False)
319 with_sparse_read = attr.ib(default=False)
320 # minimal density of a sparse read chunk
320 # minimal density of a sparse read chunk
321 sr_density_threshold = attr.ib(default=0.50)
321 sr_density_threshold = attr.ib(default=0.50)
322 # minimal size of data we skip when performing sparse read
322 # minimal size of data we skip when performing sparse read
323 sr_min_gap_size = attr.ib(default=262144)
323 sr_min_gap_size = attr.ib(default=262144)
324
324
325 # are delta encoded against arbitrary bases.
325 # are delta encoded against arbitrary bases.
326 generaldelta = attr.ib(default=False)
326 generaldelta = attr.ib(default=False)
327
327
328
328
329 @attr.s()
329 @attr.s()
330 class DeltaConfig(_Config):
330 class DeltaConfig(_Config):
331 """Hold configuration value about how new delta are computed
331 """Hold configuration value about how new delta are computed
332
332
333 Some attributes are duplicated from DataConfig to help havign each object
333 Some attributes are duplicated from DataConfig to help havign each object
334 self contained.
334 self contained.
335 """
335 """
336
336
337 # can delta be encoded against arbitrary bases.
337 # can delta be encoded against arbitrary bases.
338 general_delta = attr.ib(default=False)
338 general_delta = attr.ib(default=False)
339 # Allow sparse writing of the revlog data
339 # Allow sparse writing of the revlog data
340 sparse_revlog = attr.ib(default=False)
340 sparse_revlog = attr.ib(default=False)
341 # maximum length of a delta chain
341 # maximum length of a delta chain
342 max_chain_len = attr.ib(default=None)
342 max_chain_len = attr.ib(default=None)
343 # Maximum distance between delta chain base start and end
343 # Maximum distance between delta chain base start and end
344 max_deltachain_span = attr.ib(default=-1)
344 max_deltachain_span = attr.ib(default=-1)
345 # If `upper_bound_comp` is not None, this is the expected maximal gain from
345 # If `upper_bound_comp` is not None, this is the expected maximal gain from
346 # compression for the data content.
346 # compression for the data content.
347 upper_bound_comp = attr.ib(default=None)
347 upper_bound_comp = attr.ib(default=None)
348 # Should we try a delta against both parent
348 # Should we try a delta against both parent
349 delta_both_parents = attr.ib(default=True)
349 delta_both_parents = attr.ib(default=True)
350 # Test delta base candidate group by chunk of this maximal size.
350 # Test delta base candidate group by chunk of this maximal size.
351 candidate_group_chunk_size = attr.ib(default=0)
351 candidate_group_chunk_size = attr.ib(default=0)
352 # Should we display debug information about delta computation
352 # Should we display debug information about delta computation
353 debug_delta = attr.ib(default=False)
353 debug_delta = attr.ib(default=False)
354 # trust incoming delta by default
354 # trust incoming delta by default
355 lazy_delta = attr.ib(default=True)
355 lazy_delta = attr.ib(default=True)
356 # trust the base of incoming delta by default
356 # trust the base of incoming delta by default
357 lazy_delta_base = attr.ib(default=False)
357 lazy_delta_base = attr.ib(default=False)
358
358
359
359
360 class _InnerRevlog:
360 class _InnerRevlog:
361 """An inner layer of the revlog object
361 """An inner layer of the revlog object
362
362
363 That layer exist to be able to delegate some operation to Rust, its
363 That layer exist to be able to delegate some operation to Rust, its
364 boundaries are arbitrary and based on what we can delegate to Rust.
364 boundaries are arbitrary and based on what we can delegate to Rust.
365 """
365 """
366
366
367 opener: vfsmod.vfs
367 opener: vfsmod.vfs
368
368
369 def __init__(
369 def __init__(
370 self,
370 self,
371 opener: vfsmod.vfs,
371 opener: vfsmod.vfs,
372 index,
372 index,
373 index_file,
373 index_file,
374 data_file,
374 data_file,
375 sidedata_file,
375 sidedata_file,
376 inline,
376 inline,
377 data_config,
377 data_config,
378 delta_config,
378 delta_config,
379 feature_config,
379 feature_config,
380 chunk_cache,
380 chunk_cache,
381 default_compression_header,
381 default_compression_header,
382 ):
382 ):
383 self.opener = opener
383 self.opener = opener
384 self.index: BaseIndexObject = index
384 self.index: BaseIndexObject = index
385
385
386 self.index_file = index_file
386 self.index_file = index_file
387 self.data_file = data_file
387 self.data_file = data_file
388 self.sidedata_file = sidedata_file
388 self.sidedata_file = sidedata_file
389 self.inline = inline
389 self.inline = inline
390 self.data_config = data_config
390 self.data_config = data_config
391 self.delta_config = delta_config
391 self.delta_config = delta_config
392 self.feature_config = feature_config
392 self.feature_config = feature_config
393
393
394 # used during diverted write.
394 # used during diverted write.
395 self._orig_index_file = None
395 self._orig_index_file = None
396
396
397 self._default_compression_header = default_compression_header
397 self._default_compression_header = default_compression_header
398
398
399 # index
399 # index
400
400
401 # 3-tuple of file handles being used for active writing.
401 # 3-tuple of file handles being used for active writing.
402 self._writinghandles = None
402 self._writinghandles = None
403
403
404 self._segmentfile = randomaccessfile.randomaccessfile(
404 self._segmentfile = randomaccessfile.randomaccessfile(
405 self.opener,
405 self.opener,
406 (self.index_file if self.inline else self.data_file),
406 (self.index_file if self.inline else self.data_file),
407 self.data_config.chunk_cache_size,
407 self.data_config.chunk_cache_size,
408 chunk_cache,
408 chunk_cache,
409 )
409 )
410 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
410 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
411 self.opener,
411 self.opener,
412 self.sidedata_file,
412 self.sidedata_file,
413 self.data_config.chunk_cache_size,
413 self.data_config.chunk_cache_size,
414 )
414 )
415
415
416 # revlog header -> revlog compressor
416 # revlog header -> revlog compressor
417 self._decompressors = {}
417 self._decompressors = {}
418 # 3-tuple of (node, rev, text) for a raw revision.
418 # 3-tuple of (node, rev, text) for a raw revision.
419 self._revisioncache = None
419 self._revisioncache = None
420
420
421 # cache some uncompressed chunks
421 # cache some uncompressed chunks
422 # rev β†’ uncompressed_chunk
422 # rev β†’ uncompressed_chunk
423 #
423 #
424 # the max cost is dynamically updated to be proportionnal to the
424 # the max cost is dynamically updated to be proportionnal to the
425 # size of revision we actually encounter.
425 # size of revision we actually encounter.
426 self._uncompressed_chunk_cache = None
426 self._uncompressed_chunk_cache = None
427 if self.data_config.uncompressed_cache_factor is not None:
427 if self.data_config.uncompressed_cache_factor is not None:
428 self._uncompressed_chunk_cache = util.lrucachedict(
428 self._uncompressed_chunk_cache = util.lrucachedict(
429 self.data_config.uncompressed_cache_count,
429 self.data_config.uncompressed_cache_count,
430 maxcost=65536, # some arbitrary initial value
430 maxcost=65536, # some arbitrary initial value
431 )
431 )
432
432
433 self._delay_buffer = None
433 self._delay_buffer = None
434
434
435 def __len__(self):
435 def __len__(self):
436 return len(self.index)
436 return len(self.index)
437
437
438 def clear_cache(self):
438 def clear_cache(self):
439 assert not self.is_delaying
439 assert not self.is_delaying
440 self._revisioncache = None
440 self._revisioncache = None
441 if self._uncompressed_chunk_cache is not None:
441 if self._uncompressed_chunk_cache is not None:
442 self._uncompressed_chunk_cache.clear()
442 self._uncompressed_chunk_cache.clear()
443 self._segmentfile.clear_cache()
443 self._segmentfile.clear_cache()
444 self._segmentfile_sidedata.clear_cache()
444 self._segmentfile_sidedata.clear_cache()
445
445
446 @property
446 @property
447 def canonical_index_file(self):
447 def canonical_index_file(self):
448 if self._orig_index_file is not None:
448 if self._orig_index_file is not None:
449 return self._orig_index_file
449 return self._orig_index_file
450 return self.index_file
450 return self.index_file
451
451
452 @property
452 @property
453 def is_delaying(self):
453 def is_delaying(self):
454 """is the revlog is currently delaying the visibility of written data?
454 """is the revlog is currently delaying the visibility of written data?
455
455
456 The delaying mechanism can be either in-memory or written on disk in a
456 The delaying mechanism can be either in-memory or written on disk in a
457 side-file."""
457 side-file."""
458 return (self._delay_buffer is not None) or (
458 return (self._delay_buffer is not None) or (
459 self._orig_index_file is not None
459 self._orig_index_file is not None
460 )
460 )
461
461
462 # Derived from index values.
462 # Derived from index values.
463
463
464 def start(self, rev):
464 def start(self, rev):
465 """the offset of the data chunk for this revision"""
465 """the offset of the data chunk for this revision"""
466 return int(self.index[rev][0] >> 16)
466 return int(self.index[rev][0] >> 16)
467
467
468 def length(self, rev):
468 def length(self, rev):
469 """the length of the data chunk for this revision"""
469 """the length of the data chunk for this revision"""
470 return self.index[rev][1]
470 return self.index[rev][1]
471
471
472 def end(self, rev):
472 def end(self, rev):
473 """the end of the data chunk for this revision"""
473 """the end of the data chunk for this revision"""
474 return self.start(rev) + self.length(rev)
474 return self.start(rev) + self.length(rev)
475
475
476 def deltaparent(self, rev):
476 def deltaparent(self, rev):
477 """return deltaparent of the given revision"""
477 """return deltaparent of the given revision"""
478 base = self.index[rev][3]
478 base = self.index[rev][3]
479 if base == rev:
479 if base == rev:
480 return nullrev
480 return nullrev
481 elif self.delta_config.general_delta:
481 elif self.delta_config.general_delta:
482 return base
482 return base
483 else:
483 else:
484 return rev - 1
484 return rev - 1
485
485
486 def issnapshot(self, rev):
486 def issnapshot(self, rev):
487 """tells whether rev is a snapshot"""
487 """tells whether rev is a snapshot"""
488 if not self.delta_config.sparse_revlog:
488 if not self.delta_config.sparse_revlog:
489 return self.deltaparent(rev) == nullrev
489 return self.deltaparent(rev) == nullrev
490 elif hasattr(self.index, 'issnapshot'):
490 elif hasattr(self.index, 'issnapshot'):
491 # directly assign the method to cache the testing and access
491 # directly assign the method to cache the testing and access
492 self.issnapshot = self.index.issnapshot
492 self.issnapshot = self.index.issnapshot
493 return self.issnapshot(rev)
493 return self.issnapshot(rev)
494 if rev == nullrev:
494 if rev == nullrev:
495 return True
495 return True
496 entry = self.index[rev]
496 entry = self.index[rev]
497 base = entry[3]
497 base = entry[3]
498 if base == rev:
498 if base == rev:
499 return True
499 return True
500 if base == nullrev:
500 if base == nullrev:
501 return True
501 return True
502 p1 = entry[5]
502 p1 = entry[5]
503 while self.length(p1) == 0:
503 while self.length(p1) == 0:
504 b = self.deltaparent(p1)
504 b = self.deltaparent(p1)
505 if b == p1:
505 if b == p1:
506 break
506 break
507 p1 = b
507 p1 = b
508 p2 = entry[6]
508 p2 = entry[6]
509 while self.length(p2) == 0:
509 while self.length(p2) == 0:
510 b = self.deltaparent(p2)
510 b = self.deltaparent(p2)
511 if b == p2:
511 if b == p2:
512 break
512 break
513 p2 = b
513 p2 = b
514 if base == p1 or base == p2:
514 if base == p1 or base == p2:
515 return False
515 return False
516 return self.issnapshot(base)
516 return self.issnapshot(base)
517
517
518 def _deltachain(self, rev, stoprev=None):
518 def _deltachain(self, rev, stoprev=None):
519 """Obtain the delta chain for a revision.
519 """Obtain the delta chain for a revision.
520
520
521 ``stoprev`` specifies a revision to stop at. If not specified, we
521 ``stoprev`` specifies a revision to stop at. If not specified, we
522 stop at the base of the chain.
522 stop at the base of the chain.
523
523
524 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
524 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
525 revs in ascending order and ``stopped`` is a bool indicating whether
525 revs in ascending order and ``stopped`` is a bool indicating whether
526 ``stoprev`` was hit.
526 ``stoprev`` was hit.
527 """
527 """
528 generaldelta = self.delta_config.general_delta
528 generaldelta = self.delta_config.general_delta
529 # Try C implementation.
529 # Try C implementation.
530 try:
530 try:
531 return self.index.deltachain(
531 return self.index.deltachain(
532 rev, stoprev, generaldelta
532 rev, stoprev, generaldelta
533 ) # pytype: disable=attribute-error
533 ) # pytype: disable=attribute-error
534 except AttributeError:
534 except AttributeError:
535 pass
535 pass
536
536
537 chain = []
537 chain = []
538
538
539 # Alias to prevent attribute lookup in tight loop.
539 # Alias to prevent attribute lookup in tight loop.
540 index = self.index
540 index = self.index
541
541
542 iterrev = rev
542 iterrev = rev
543 e = index[iterrev]
543 e = index[iterrev]
544 while iterrev != e[3] and iterrev != stoprev:
544 while iterrev != e[3] and iterrev != stoprev:
545 chain.append(iterrev)
545 chain.append(iterrev)
546 if generaldelta:
546 if generaldelta:
547 iterrev = e[3]
547 iterrev = e[3]
548 else:
548 else:
549 iterrev -= 1
549 iterrev -= 1
550 e = index[iterrev]
550 e = index[iterrev]
551
551
552 if iterrev == stoprev:
552 if iterrev == stoprev:
553 stopped = True
553 stopped = True
554 else:
554 else:
555 chain.append(iterrev)
555 chain.append(iterrev)
556 stopped = False
556 stopped = False
557
557
558 chain.reverse()
558 chain.reverse()
559 return chain, stopped
559 return chain, stopped
560
560
561 @util.propertycache
561 @util.propertycache
562 def _compressor(self):
562 def _compressor(self):
563 engine = util.compengines[self.feature_config.compression_engine]
563 engine = util.compengines[self.feature_config.compression_engine]
564 return engine.revlogcompressor(
564 return engine.revlogcompressor(
565 self.feature_config.compression_engine_options
565 self.feature_config.compression_engine_options
566 )
566 )
567
567
568 @util.propertycache
568 @util.propertycache
569 def _decompressor(self):
569 def _decompressor(self):
570 """the default decompressor"""
570 """the default decompressor"""
571 if self._default_compression_header is None:
571 if self._default_compression_header is None:
572 return None
572 return None
573 t = self._default_compression_header
573 t = self._default_compression_header
574 c = self._get_decompressor(t)
574 c = self._get_decompressor(t)
575 return c.decompress
575 return c.decompress
576
576
577 def _get_decompressor(self, t: bytes):
577 def _get_decompressor(self, t: bytes):
578 try:
578 try:
579 compressor = self._decompressors[t]
579 compressor = self._decompressors[t]
580 except KeyError:
580 except KeyError:
581 try:
581 try:
582 engine = util.compengines.forrevlogheader(t)
582 engine = util.compengines.forrevlogheader(t)
583 compressor = engine.revlogcompressor(
583 compressor = engine.revlogcompressor(
584 self.feature_config.compression_engine_options
584 self.feature_config.compression_engine_options
585 )
585 )
586 self._decompressors[t] = compressor
586 self._decompressors[t] = compressor
587 except KeyError:
587 except KeyError:
588 raise error.RevlogError(
588 raise error.RevlogError(
589 _(b'unknown compression type %s') % binascii.hexlify(t)
589 _(b'unknown compression type %s') % binascii.hexlify(t)
590 )
590 )
591 return compressor
591 return compressor
592
592
593 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
593 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
594 """Generate a possibly-compressed representation of data."""
594 """Generate a possibly-compressed representation of data."""
595 if not data:
595 if not data:
596 return b'', data
596 return b'', data
597
597
598 compressed = self._compressor.compress(data)
598 compressed = self._compressor.compress(data)
599
599
600 if compressed:
600 if compressed:
601 # The revlog compressor added the header in the returned data.
601 # The revlog compressor added the header in the returned data.
602 return b'', compressed
602 return b'', compressed
603
603
604 if data[0:1] == b'\0':
604 if data[0:1] == b'\0':
605 return b'', data
605 return b'', data
606 return b'u', data
606 return b'u', data
607
607
608 def decompress(self, data: bytes):
608 def decompress(self, data: bytes):
609 """Decompress a revlog chunk.
609 """Decompress a revlog chunk.
610
610
611 The chunk is expected to begin with a header identifying the
611 The chunk is expected to begin with a header identifying the
612 format type so it can be routed to an appropriate decompressor.
612 format type so it can be routed to an appropriate decompressor.
613 """
613 """
614 if not data:
614 if not data:
615 return data
615 return data
616
616
617 # Revlogs are read much more frequently than they are written and many
617 # Revlogs are read much more frequently than they are written and many
618 # chunks only take microseconds to decompress, so performance is
618 # chunks only take microseconds to decompress, so performance is
619 # important here.
619 # important here.
620 #
620 #
621 # We can make a few assumptions about revlogs:
621 # We can make a few assumptions about revlogs:
622 #
622 #
623 # 1) the majority of chunks will be compressed (as opposed to inline
623 # 1) the majority of chunks will be compressed (as opposed to inline
624 # raw data).
624 # raw data).
625 # 2) decompressing *any* data will likely by at least 10x slower than
625 # 2) decompressing *any* data will likely by at least 10x slower than
626 # returning raw inline data.
626 # returning raw inline data.
627 # 3) we want to prioritize common and officially supported compression
627 # 3) we want to prioritize common and officially supported compression
628 # engines
628 # engines
629 #
629 #
630 # It follows that we want to optimize for "decompress compressed data
630 # It follows that we want to optimize for "decompress compressed data
631 # when encoded with common and officially supported compression engines"
631 # when encoded with common and officially supported compression engines"
632 # case over "raw data" and "data encoded by less common or non-official
632 # case over "raw data" and "data encoded by less common or non-official
633 # compression engines." That is why we have the inline lookup first
633 # compression engines." That is why we have the inline lookup first
634 # followed by the compengines lookup.
634 # followed by the compengines lookup.
635 #
635 #
636 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
636 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
637 # compressed chunks. And this matters for changelog and manifest reads.
637 # compressed chunks. And this matters for changelog and manifest reads.
638 t = data[0:1]
638 t = data[0:1]
639
639
640 if t == b'x':
640 if t == b'x':
641 try:
641 try:
642 return _zlibdecompress(data)
642 return _zlibdecompress(data)
643 except zlib.error as e:
643 except zlib.error as e:
644 raise error.RevlogError(
644 raise error.RevlogError(
645 _(b'revlog decompress error: %s')
645 _(b'revlog decompress error: %s')
646 % stringutil.forcebytestr(e)
646 % stringutil.forcebytestr(e)
647 )
647 )
648 # '\0' is more common than 'u' so it goes first.
648 # '\0' is more common than 'u' so it goes first.
649 elif t == b'\0':
649 elif t == b'\0':
650 return data
650 return data
651 elif t == b'u':
651 elif t == b'u':
652 return util.buffer(data, 1)
652 return util.buffer(data, 1)
653
653
654 compressor = self._get_decompressor(t)
654 compressor = self._get_decompressor(t)
655
655
656 return compressor.decompress(data)
656 return compressor.decompress(data)
657
657
658 @contextlib.contextmanager
658 @contextlib.contextmanager
659 def reading(self):
659 def reading(self):
660 """Context manager that keeps data and sidedata files open for reading"""
660 """Context manager that keeps data and sidedata files open for reading"""
661 if len(self.index) == 0:
661 if len(self.index) == 0:
662 yield # nothing to be read
662 yield # nothing to be read
663 elif self._delay_buffer is not None and self.inline:
663 elif self._delay_buffer is not None and self.inline:
664 msg = "revlog with delayed write should not be inline"
664 msg = "revlog with delayed write should not be inline"
665 raise error.ProgrammingError(msg)
665 raise error.ProgrammingError(msg)
666 else:
666 else:
667 with self._segmentfile.reading():
667 with self._segmentfile.reading():
668 with self._segmentfile_sidedata.reading():
668 with self._segmentfile_sidedata.reading():
669 yield
669 yield
670
670
671 @property
671 @property
672 def is_writing(self):
672 def is_writing(self):
673 """True is a writing context is open"""
673 """True is a writing context is open"""
674 return self._writinghandles is not None
674 return self._writinghandles is not None
675
675
676 @property
676 @property
677 def is_open(self):
677 def is_open(self):
678 """True if any file handle is being held
678 """True if any file handle is being held
679
679
680 Used for assert and debug in the python code"""
680 Used for assert and debug in the python code"""
681 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
681 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
682
682
683 @contextlib.contextmanager
683 @contextlib.contextmanager
684 def writing(self, transaction, data_end=None, sidedata_end=None):
684 def writing(self, transaction, data_end=None, sidedata_end=None):
685 """Open the revlog files for writing
685 """Open the revlog files for writing
686
686
687 Add content to a revlog should be done within such context.
687 Add content to a revlog should be done within such context.
688 """
688 """
689 if self.is_writing:
689 if self.is_writing:
690 yield
690 yield
691 else:
691 else:
692 ifh = dfh = sdfh = None
692 ifh = dfh = sdfh = None
693 try:
693 try:
694 r = len(self.index)
694 r = len(self.index)
695 # opening the data file.
695 # opening the data file.
696 dsize = 0
696 dsize = 0
697 if r:
697 if r:
698 dsize = self.end(r - 1)
698 dsize = self.end(r - 1)
699 dfh = None
699 dfh = None
700 if not self.inline:
700 if not self.inline:
701 try:
701 try:
702 dfh = self.opener(self.data_file, mode=b"r+")
702 dfh = self.opener(self.data_file, mode=b"r+")
703 if data_end is None:
703 if data_end is None:
704 dfh.seek(0, os.SEEK_END)
704 dfh.seek(0, os.SEEK_END)
705 else:
705 else:
706 dfh.seek(data_end, os.SEEK_SET)
706 dfh.seek(data_end, os.SEEK_SET)
707 except FileNotFoundError:
707 except FileNotFoundError:
708 dfh = self.opener(self.data_file, mode=b"w+")
708 dfh = self.opener(self.data_file, mode=b"w+")
709 transaction.add(self.data_file, dsize)
709 transaction.add(self.data_file, dsize)
710 if self.sidedata_file is not None:
710 if self.sidedata_file is not None:
711 assert sidedata_end is not None
711 assert sidedata_end is not None
712 # revlog-v2 does not inline, help Pytype
712 # revlog-v2 does not inline, help Pytype
713 assert dfh is not None
713 assert dfh is not None
714 try:
714 try:
715 sdfh = self.opener(self.sidedata_file, mode=b"r+")
715 sdfh = self.opener(self.sidedata_file, mode=b"r+")
716 dfh.seek(sidedata_end, os.SEEK_SET)
716 dfh.seek(sidedata_end, os.SEEK_SET)
717 except FileNotFoundError:
717 except FileNotFoundError:
718 sdfh = self.opener(self.sidedata_file, mode=b"w+")
718 sdfh = self.opener(self.sidedata_file, mode=b"w+")
719 transaction.add(self.sidedata_file, sidedata_end)
719 transaction.add(self.sidedata_file, sidedata_end)
720
720
721 # opening the index file.
721 # opening the index file.
722 isize = r * self.index.entry_size
722 isize = r * self.index.entry_size
723 ifh = self.__index_write_fp()
723 ifh = self.__index_write_fp()
724 if self.inline:
724 if self.inline:
725 transaction.add(self.index_file, dsize + isize)
725 transaction.add(self.index_file, dsize + isize)
726 else:
726 else:
727 transaction.add(self.index_file, isize)
727 transaction.add(self.index_file, isize)
728 # exposing all file handle for writing.
728 # exposing all file handle for writing.
729 self._writinghandles = (ifh, dfh, sdfh)
729 self._writinghandles = (ifh, dfh, sdfh)
730 self._segmentfile.writing_handle = ifh if self.inline else dfh
730 self._segmentfile.writing_handle = ifh if self.inline else dfh
731 self._segmentfile_sidedata.writing_handle = sdfh
731 self._segmentfile_sidedata.writing_handle = sdfh
732 yield
732 yield
733 finally:
733 finally:
734 self._writinghandles = None
734 self._writinghandles = None
735 self._segmentfile.writing_handle = None
735 self._segmentfile.writing_handle = None
736 self._segmentfile_sidedata.writing_handle = None
736 self._segmentfile_sidedata.writing_handle = None
737 if dfh is not None:
737 if dfh is not None:
738 dfh.close()
738 dfh.close()
739 if sdfh is not None:
739 if sdfh is not None:
740 sdfh.close()
740 sdfh.close()
741 # closing the index file last to avoid exposing referent to
741 # closing the index file last to avoid exposing referent to
742 # potential unflushed data content.
742 # potential unflushed data content.
743 if ifh is not None:
743 if ifh is not None:
744 ifh.close()
744 ifh.close()
745
745
746 def __index_write_fp(self, index_end=None):
746 def __index_write_fp(self, index_end=None):
747 """internal method to open the index file for writing
747 """internal method to open the index file for writing
748
748
749 You should not use this directly and use `_writing` instead
749 You should not use this directly and use `_writing` instead
750 """
750 """
751 try:
751 try:
752 if self._delay_buffer is None:
752 if self._delay_buffer is None:
753 f = self.opener(
753 f = self.opener(
754 self.index_file,
754 self.index_file,
755 mode=b"r+",
755 mode=b"r+",
756 checkambig=self.data_config.check_ambig,
756 checkambig=self.data_config.check_ambig,
757 )
757 )
758 else:
758 else:
759 # check_ambig affect we way we open file for writing, however
759 # check_ambig affect we way we open file for writing, however
760 # here, we do not actually open a file for writting as write
760 # here, we do not actually open a file for writting as write
761 # will appened to a delay_buffer. So check_ambig is not
761 # will appened to a delay_buffer. So check_ambig is not
762 # meaningful and unneeded here.
762 # meaningful and unneeded here.
763 f = randomaccessfile.appender(
763 f = randomaccessfile.appender(
764 self.opener, self.index_file, b"r+", self._delay_buffer
764 self.opener, self.index_file, b"r+", self._delay_buffer
765 )
765 )
766 if index_end is None:
766 if index_end is None:
767 f.seek(0, os.SEEK_END)
767 f.seek(0, os.SEEK_END)
768 else:
768 else:
769 f.seek(index_end, os.SEEK_SET)
769 f.seek(index_end, os.SEEK_SET)
770 return f
770 return f
771 except FileNotFoundError:
771 except FileNotFoundError:
772 if self._delay_buffer is None:
772 if self._delay_buffer is None:
773 return self.opener(
773 return self.opener(
774 self.index_file,
774 self.index_file,
775 mode=b"w+",
775 mode=b"w+",
776 checkambig=self.data_config.check_ambig,
776 checkambig=self.data_config.check_ambig,
777 )
777 )
778 else:
778 else:
779 return randomaccessfile.appender(
779 return randomaccessfile.appender(
780 self.opener, self.index_file, b"w+", self._delay_buffer
780 self.opener, self.index_file, b"w+", self._delay_buffer
781 )
781 )
782
782
783 def __index_new_fp(self):
783 def __index_new_fp(self):
784 """internal method to create a new index file for writing
784 """internal method to create a new index file for writing
785
785
786 You should not use this unless you are upgrading from inline revlog
786 You should not use this unless you are upgrading from inline revlog
787 """
787 """
788 return self.opener(
788 return self.opener(
789 self.index_file,
789 self.index_file,
790 mode=b"w",
790 mode=b"w",
791 checkambig=self.data_config.check_ambig,
791 checkambig=self.data_config.check_ambig,
792 )
792 )
793
793
794 def split_inline(self, tr, header, new_index_file_path=None):
794 def split_inline(self, tr, header, new_index_file_path=None):
795 """split the data of an inline revlog into an index and a data file"""
795 """split the data of an inline revlog into an index and a data file"""
796 assert self._delay_buffer is None
796 assert self._delay_buffer is None
797 existing_handles = False
797 existing_handles = False
798 if self._writinghandles is not None:
798 if self._writinghandles is not None:
799 existing_handles = True
799 existing_handles = True
800 fp = self._writinghandles[0]
800 fp = self._writinghandles[0]
801 fp.flush()
801 fp.flush()
802 fp.close()
802 fp.close()
803 # We can't use the cached file handle after close(). So prevent
803 # We can't use the cached file handle after close(). So prevent
804 # its usage.
804 # its usage.
805 self._writinghandles = None
805 self._writinghandles = None
806 self._segmentfile.writing_handle = None
806 self._segmentfile.writing_handle = None
807 # No need to deal with sidedata writing handle as it is only
807 # No need to deal with sidedata writing handle as it is only
808 # relevant with revlog-v2 which is never inline, not reaching
808 # relevant with revlog-v2 which is never inline, not reaching
809 # this code
809 # this code
810
810
811 new_dfh = self.opener(self.data_file, mode=b"w+")
811 new_dfh = self.opener(self.data_file, mode=b"w+")
812 new_dfh.truncate(0) # drop any potentially existing data
812 new_dfh.truncate(0) # drop any potentially existing data
813 try:
813 try:
814 with self.reading():
814 with self.reading():
815 for r in range(len(self.index)):
815 for r in range(len(self.index)):
816 new_dfh.write(self.get_segment_for_revs(r, r)[1])
816 new_dfh.write(self.get_segment_for_revs(r, r)[1])
817 new_dfh.flush()
817 new_dfh.flush()
818
818
819 if new_index_file_path is not None:
819 if new_index_file_path is not None:
820 self.index_file = new_index_file_path
820 self.index_file = new_index_file_path
821 with self.__index_new_fp() as fp:
821 with self.__index_new_fp() as fp:
822 self.inline = False
822 self.inline = False
823 for i in range(len(self.index)):
823 for i in range(len(self.index)):
824 e = self.index.entry_binary(i)
824 e = self.index.entry_binary(i)
825 if i == 0:
825 if i == 0:
826 packed_header = self.index.pack_header(header)
826 packed_header = self.index.pack_header(header)
827 e = packed_header + e
827 e = packed_header + e
828 fp.write(e)
828 fp.write(e)
829
829
830 # If we don't use side-write, the temp file replace the real
830 # If we don't use side-write, the temp file replace the real
831 # index when we exit the context manager
831 # index when we exit the context manager
832
832
833 self._segmentfile = randomaccessfile.randomaccessfile(
833 self._segmentfile = randomaccessfile.randomaccessfile(
834 self.opener,
834 self.opener,
835 self.data_file,
835 self.data_file,
836 self.data_config.chunk_cache_size,
836 self.data_config.chunk_cache_size,
837 )
837 )
838
838
839 if existing_handles:
839 if existing_handles:
840 # switched from inline to conventional reopen the index
840 # switched from inline to conventional reopen the index
841 ifh = self.__index_write_fp()
841 ifh = self.__index_write_fp()
842 self._writinghandles = (ifh, new_dfh, None)
842 self._writinghandles = (ifh, new_dfh, None)
843 self._segmentfile.writing_handle = new_dfh
843 self._segmentfile.writing_handle = new_dfh
844 new_dfh = None
844 new_dfh = None
845 # No need to deal with sidedata writing handle as it is only
845 # No need to deal with sidedata writing handle as it is only
846 # relevant with revlog-v2 which is never inline, not reaching
846 # relevant with revlog-v2 which is never inline, not reaching
847 # this code
847 # this code
848 finally:
848 finally:
849 if new_dfh is not None:
849 if new_dfh is not None:
850 new_dfh.close()
850 new_dfh.close()
851 return self.index_file
851 return self.index_file
852
852
853 def get_segment_for_revs(self, startrev, endrev):
853 def get_segment_for_revs(self, startrev, endrev):
854 """Obtain a segment of raw data corresponding to a range of revisions.
854 """Obtain a segment of raw data corresponding to a range of revisions.
855
855
856 Accepts the start and end revisions and an optional already-open
856 Accepts the start and end revisions and an optional already-open
857 file handle to be used for reading. If the file handle is read, its
857 file handle to be used for reading. If the file handle is read, its
858 seek position will not be preserved.
858 seek position will not be preserved.
859
859
860 Requests for data may be satisfied by a cache.
860 Requests for data may be satisfied by a cache.
861
861
862 Returns a 2-tuple of (offset, data) for the requested range of
862 Returns a 2-tuple of (offset, data) for the requested range of
863 revisions. Offset is the integer offset from the beginning of the
863 revisions. Offset is the integer offset from the beginning of the
864 revlog and data is a str or buffer of the raw byte data.
864 revlog and data is a str or buffer of the raw byte data.
865
865
866 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
866 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
867 to determine where each revision's data begins and ends.
867 to determine where each revision's data begins and ends.
868
868
869 API: we should consider making this a private part of the InnerRevlog
869 API: we should consider making this a private part of the InnerRevlog
870 at some point.
870 at some point.
871 """
871 """
872 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
872 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
873 # (functions are expensive).
873 # (functions are expensive).
874 index = self.index
874 index = self.index
875 istart = index[startrev]
875 istart = index[startrev]
876 start = int(istart[0] >> 16)
876 start = int(istart[0] >> 16)
877 if startrev == endrev:
877 if startrev == endrev:
878 end = start + istart[1]
878 end = start + istart[1]
879 else:
879 else:
880 iend = index[endrev]
880 iend = index[endrev]
881 end = int(iend[0] >> 16) + iend[1]
881 end = int(iend[0] >> 16) + iend[1]
882
882
883 if self.inline:
883 if self.inline:
884 start += (startrev + 1) * self.index.entry_size
884 start += (startrev + 1) * self.index.entry_size
885 end += (endrev + 1) * self.index.entry_size
885 end += (endrev + 1) * self.index.entry_size
886 length = end - start
886 length = end - start
887
887
888 return start, self._segmentfile.read_chunk(start, length)
888 return start, self._segmentfile.read_chunk(start, length)
889
889
890 def _chunk(self, rev):
890 def _chunk(self, rev):
891 """Obtain a single decompressed chunk for a revision.
891 """Obtain a single decompressed chunk for a revision.
892
892
893 Accepts an integer revision and an optional already-open file handle
893 Accepts an integer revision and an optional already-open file handle
894 to be used for reading. If used, the seek position of the file will not
894 to be used for reading. If used, the seek position of the file will not
895 be preserved.
895 be preserved.
896
896
897 Returns a str holding uncompressed data for the requested revision.
897 Returns a str holding uncompressed data for the requested revision.
898 """
898 """
899 if self._uncompressed_chunk_cache is not None:
899 if self._uncompressed_chunk_cache is not None:
900 uncomp = self._uncompressed_chunk_cache.get(rev)
900 uncomp = self._uncompressed_chunk_cache.get(rev)
901 if uncomp is not None:
901 if uncomp is not None:
902 return uncomp
902 return uncomp
903
903
904 compression_mode = self.index[rev][10]
904 compression_mode = self.index[rev][10]
905 data = self.get_segment_for_revs(rev, rev)[1]
905 data = self.get_segment_for_revs(rev, rev)[1]
906 if compression_mode == COMP_MODE_PLAIN:
906 if compression_mode == COMP_MODE_PLAIN:
907 uncomp = data
907 uncomp = data
908 elif compression_mode == COMP_MODE_DEFAULT:
908 elif compression_mode == COMP_MODE_DEFAULT:
909 uncomp = self._decompressor(data)
909 uncomp = self._decompressor(data)
910 elif compression_mode == COMP_MODE_INLINE:
910 elif compression_mode == COMP_MODE_INLINE:
911 uncomp = self.decompress(data)
911 uncomp = self.decompress(data)
912 else:
912 else:
913 msg = b'unknown compression mode %d'
913 msg = b'unknown compression mode %d'
914 msg %= compression_mode
914 msg %= compression_mode
915 raise error.RevlogError(msg)
915 raise error.RevlogError(msg)
916 if self._uncompressed_chunk_cache is not None:
916 if self._uncompressed_chunk_cache is not None:
917 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
917 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
918 return uncomp
918 return uncomp
919
919
920 def _chunks(self, revs, targetsize=None):
920 def _chunks(self, revs, targetsize=None):
921 """Obtain decompressed chunks for the specified revisions.
921 """Obtain decompressed chunks for the specified revisions.
922
922
923 Accepts an iterable of numeric revisions that are assumed to be in
923 Accepts an iterable of numeric revisions that are assumed to be in
924 ascending order.
924 ascending order.
925
925
926 This function is similar to calling ``self._chunk()`` multiple times,
926 This function is similar to calling ``self._chunk()`` multiple times,
927 but is faster.
927 but is faster.
928
928
929 Returns a list with decompressed data for each requested revision.
929 Returns a list with decompressed data for each requested revision.
930 """
930 """
931 if not revs:
931 if not revs:
932 return []
932 return []
933 start = self.start
933 start = self.start
934 length = self.length
934 length = self.length
935 inline = self.inline
935 inline = self.inline
936 iosize = self.index.entry_size
936 iosize = self.index.entry_size
937 buffer = util.buffer
937 buffer = util.buffer
938
938
939 fetched_revs = []
939 fetched_revs = []
940 fadd = fetched_revs.append
940 fadd = fetched_revs.append
941
941
942 chunks = []
942 chunks = []
943 ladd = chunks.append
943 ladd = chunks.append
944
944
945 if self._uncompressed_chunk_cache is None:
945 if self._uncompressed_chunk_cache is None:
946 fetched_revs = revs
946 fetched_revs = revs
947 else:
947 else:
948 for rev in revs:
948 for rev in revs:
949 cached_value = self._uncompressed_chunk_cache.get(rev)
949 cached_value = self._uncompressed_chunk_cache.get(rev)
950 if cached_value is None:
950 if cached_value is None:
951 fadd(rev)
951 fadd(rev)
952 else:
952 else:
953 ladd((rev, cached_value))
953 ladd((rev, cached_value))
954
954
955 if not fetched_revs:
955 if not fetched_revs:
956 slicedchunks = ()
956 slicedchunks = ()
957 elif not self.data_config.with_sparse_read:
957 elif not self.data_config.with_sparse_read:
958 slicedchunks = (fetched_revs,)
958 slicedchunks = (fetched_revs,)
959 else:
959 else:
960 slicedchunks = deltautil.slicechunk(
960 slicedchunks = deltautil.slicechunk(
961 self,
961 self,
962 fetched_revs,
962 fetched_revs,
963 targetsize=targetsize,
963 targetsize=targetsize,
964 )
964 )
965
965
966 for revschunk in slicedchunks:
966 for revschunk in slicedchunks:
967 firstrev = revschunk[0]
967 firstrev = revschunk[0]
968 # Skip trailing revisions with empty diff
968 # Skip trailing revisions with empty diff
969 for lastrev in revschunk[::-1]:
969 for lastrev in revschunk[::-1]:
970 if length(lastrev) != 0:
970 if length(lastrev) != 0:
971 break
971 break
972
972
973 try:
973 try:
974 offset, data = self.get_segment_for_revs(firstrev, lastrev)
974 offset, data = self.get_segment_for_revs(firstrev, lastrev)
975 except OverflowError:
975 except OverflowError:
976 # issue4215 - we can't cache a run of chunks greater than
976 # issue4215 - we can't cache a run of chunks greater than
977 # 2G on Windows
977 # 2G on Windows
978 for rev in revschunk:
978 for rev in revschunk:
979 ladd((rev, self._chunk(rev)))
979 ladd((rev, self._chunk(rev)))
980
980
981 decomp = self.decompress
981 decomp = self.decompress
982 # self._decompressor might be None, but will not be used in that case
982 # self._decompressor might be None, but will not be used in that case
983 def_decomp = self._decompressor
983 def_decomp = self._decompressor
984 for rev in revschunk:
984 for rev in revschunk:
985 chunkstart = start(rev)
985 chunkstart = start(rev)
986 if inline:
986 if inline:
987 chunkstart += (rev + 1) * iosize
987 chunkstart += (rev + 1) * iosize
988 chunklength = length(rev)
988 chunklength = length(rev)
989 comp_mode = self.index[rev][10]
989 comp_mode = self.index[rev][10]
990 c = buffer(data, chunkstart - offset, chunklength)
990 c = buffer(data, chunkstart - offset, chunklength)
991 if comp_mode == COMP_MODE_PLAIN:
991 if comp_mode == COMP_MODE_PLAIN:
992 c = c
992 c = c
993 elif comp_mode == COMP_MODE_INLINE:
993 elif comp_mode == COMP_MODE_INLINE:
994 c = decomp(c)
994 c = decomp(c)
995 elif comp_mode == COMP_MODE_DEFAULT:
995 elif comp_mode == COMP_MODE_DEFAULT:
996 c = def_decomp(c)
996 c = def_decomp(c)
997 else:
997 else:
998 msg = b'unknown compression mode %d'
998 msg = b'unknown compression mode %d'
999 msg %= comp_mode
999 msg %= comp_mode
1000 raise error.RevlogError(msg)
1000 raise error.RevlogError(msg)
1001 ladd((rev, c))
1001 ladd((rev, c))
1002 if self._uncompressed_chunk_cache is not None:
1002 if self._uncompressed_chunk_cache is not None:
1003 self._uncompressed_chunk_cache.insert(rev, c, len(c))
1003 self._uncompressed_chunk_cache.insert(rev, c, len(c))
1004
1004
1005 chunks.sort()
1005 chunks.sort()
1006 return [x[1] for x in chunks]
1006 return [x[1] for x in chunks]
1007
1007
1008 def raw_text(self, node, rev) -> bytes:
1008 def raw_text(self, node, rev) -> bytes:
1009 """return the possibly unvalidated rawtext for a revision
1009 """return the possibly unvalidated rawtext for a revision
1010
1010
1011 returns rawtext
1011 returns rawtext
1012 """
1012 """
1013
1013
1014 # revision in the cache (could be useful to apply delta)
1014 # revision in the cache (could be useful to apply delta)
1015 cachedrev = None
1015 cachedrev = None
1016 # An intermediate text to apply deltas to
1016 # An intermediate text to apply deltas to
1017 basetext = None
1017 basetext = None
1018
1018
1019 # Check if we have the entry in cache
1019 # Check if we have the entry in cache
1020 # The cache entry looks like (node, rev, rawtext)
1020 # The cache entry looks like (node, rev, rawtext)
1021 if self._revisioncache:
1021 if self._revisioncache:
1022 cachedrev = self._revisioncache[1]
1022 cachedrev = self._revisioncache[1]
1023
1023
1024 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1024 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1025 if stopped:
1025 if stopped:
1026 basetext = self._revisioncache[2]
1026 basetext = self._revisioncache[2]
1027
1027
1028 # drop cache to save memory, the caller is expected to
1028 # drop cache to save memory, the caller is expected to
1029 # update self._inner._revisioncache after validating the text
1029 # update self._inner._revisioncache after validating the text
1030 self._revisioncache = None
1030 self._revisioncache = None
1031
1031
1032 targetsize = None
1032 targetsize = None
1033 rawsize = self.index[rev][2]
1033 rawsize = self.index[rev][2]
1034 if 0 <= rawsize:
1034 if 0 <= rawsize:
1035 targetsize = 4 * rawsize
1035 targetsize = 4 * rawsize
1036
1036
1037 if self._uncompressed_chunk_cache is not None:
1037 if self._uncompressed_chunk_cache is not None:
1038 # dynamically update the uncompressed_chunk_cache size to the
1038 # dynamically update the uncompressed_chunk_cache size to the
1039 # largest revision we saw in this revlog.
1039 # largest revision we saw in this revlog.
1040 factor = self.data_config.uncompressed_cache_factor
1040 factor = self.data_config.uncompressed_cache_factor
1041 candidate_size = rawsize * factor
1041 candidate_size = rawsize * factor
1042 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1042 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1043 self._uncompressed_chunk_cache.maxcost = candidate_size
1043 self._uncompressed_chunk_cache.maxcost = candidate_size
1044
1044
1045 bins = self._chunks(chain, targetsize=targetsize)
1045 bins = self._chunks(chain, targetsize=targetsize)
1046 if basetext is None:
1046 if basetext is None:
1047 basetext = bytes(bins[0])
1047 basetext = bytes(bins[0])
1048 bins = bins[1:]
1048 bins = bins[1:]
1049
1049
1050 rawtext = mdiff.patches(basetext, bins)
1050 rawtext = mdiff.patches(basetext, bins)
1051 del basetext # let us have a chance to free memory early
1051 del basetext # let us have a chance to free memory early
1052 return rawtext
1052 return rawtext
1053
1053
1054 def sidedata(self, rev, sidedata_end):
1054 def sidedata(self, rev, sidedata_end):
1055 """Return the sidedata for a given revision number."""
1055 """Return the sidedata for a given revision number."""
1056 index_entry = self.index[rev]
1056 index_entry = self.index[rev]
1057 sidedata_offset = index_entry[8]
1057 sidedata_offset = index_entry[8]
1058 sidedata_size = index_entry[9]
1058 sidedata_size = index_entry[9]
1059
1059
1060 if self.inline:
1060 if self.inline:
1061 sidedata_offset += self.index.entry_size * (1 + rev)
1061 sidedata_offset += self.index.entry_size * (1 + rev)
1062 if sidedata_size == 0:
1062 if sidedata_size == 0:
1063 return {}
1063 return {}
1064
1064
1065 if sidedata_end < sidedata_offset + sidedata_size:
1065 if sidedata_end < sidedata_offset + sidedata_size:
1066 filename = self.sidedata_file
1066 filename = self.sidedata_file
1067 end = sidedata_end
1067 end = sidedata_end
1068 offset = sidedata_offset
1068 offset = sidedata_offset
1069 length = sidedata_size
1069 length = sidedata_size
1070 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1070 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1071 raise error.RevlogError(m)
1071 raise error.RevlogError(m)
1072
1072
1073 comp_segment = self._segmentfile_sidedata.read_chunk(
1073 comp_segment = self._segmentfile_sidedata.read_chunk(
1074 sidedata_offset, sidedata_size
1074 sidedata_offset, sidedata_size
1075 )
1075 )
1076
1076
1077 comp = self.index[rev][11]
1077 comp = self.index[rev][11]
1078 if comp == COMP_MODE_PLAIN:
1078 if comp == COMP_MODE_PLAIN:
1079 segment = comp_segment
1079 segment = comp_segment
1080 elif comp == COMP_MODE_DEFAULT:
1080 elif comp == COMP_MODE_DEFAULT:
1081 segment = self._decompressor(comp_segment)
1081 segment = self._decompressor(comp_segment)
1082 elif comp == COMP_MODE_INLINE:
1082 elif comp == COMP_MODE_INLINE:
1083 segment = self.decompress(comp_segment)
1083 segment = self.decompress(comp_segment)
1084 else:
1084 else:
1085 msg = b'unknown compression mode %d'
1085 msg = b'unknown compression mode %d'
1086 msg %= comp
1086 msg %= comp
1087 raise error.RevlogError(msg)
1087 raise error.RevlogError(msg)
1088
1088
1089 sidedata = sidedatautil.deserialize_sidedata(segment)
1089 sidedata = sidedatautil.deserialize_sidedata(segment)
1090 return sidedata
1090 return sidedata
1091
1091
1092 def write_entry(
1092 def write_entry(
1093 self,
1093 self,
1094 transaction,
1094 transaction,
1095 entry,
1095 entry,
1096 data,
1096 data,
1097 link,
1097 link,
1098 offset,
1098 offset,
1099 sidedata,
1099 sidedata,
1100 sidedata_offset,
1100 sidedata_offset,
1101 index_end,
1101 index_end,
1102 data_end,
1102 data_end,
1103 sidedata_end,
1103 sidedata_end,
1104 ):
1104 ):
1105 # Files opened in a+ mode have inconsistent behavior on various
1105 # Files opened in a+ mode have inconsistent behavior on various
1106 # platforms. Windows requires that a file positioning call be made
1106 # platforms. Windows requires that a file positioning call be made
1107 # when the file handle transitions between reads and writes. See
1107 # when the file handle transitions between reads and writes. See
1108 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1108 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1109 # platforms, Python or the platform itself can be buggy. Some versions
1109 # platforms, Python or the platform itself can be buggy. Some versions
1110 # of Solaris have been observed to not append at the end of the file
1110 # of Solaris have been observed to not append at the end of the file
1111 # if the file was seeked to before the end. See issue4943 for more.
1111 # if the file was seeked to before the end. See issue4943 for more.
1112 #
1112 #
1113 # We work around this issue by inserting a seek() before writing.
1113 # We work around this issue by inserting a seek() before writing.
1114 # Note: This is likely not necessary on Python 3. However, because
1114 # Note: This is likely not necessary on Python 3. However, because
1115 # the file handle is reused for reads and may be seeked there, we need
1115 # the file handle is reused for reads and may be seeked there, we need
1116 # to be careful before changing this.
1116 # to be careful before changing this.
1117 if self._writinghandles is None:
1117 if self._writinghandles is None:
1118 msg = b'adding revision outside `revlog._writing` context'
1118 msg = b'adding revision outside `revlog._writing` context'
1119 raise error.ProgrammingError(msg)
1119 raise error.ProgrammingError(msg)
1120 ifh, dfh, sdfh = self._writinghandles
1120 ifh, dfh, sdfh = self._writinghandles
1121 if index_end is None:
1121 if index_end is None:
1122 ifh.seek(0, os.SEEK_END)
1122 ifh.seek(0, os.SEEK_END)
1123 else:
1123 else:
1124 ifh.seek(index_end, os.SEEK_SET)
1124 ifh.seek(index_end, os.SEEK_SET)
1125 if dfh:
1125 if dfh:
1126 if data_end is None:
1126 if data_end is None:
1127 dfh.seek(0, os.SEEK_END)
1127 dfh.seek(0, os.SEEK_END)
1128 else:
1128 else:
1129 dfh.seek(data_end, os.SEEK_SET)
1129 dfh.seek(data_end, os.SEEK_SET)
1130 if sdfh:
1130 if sdfh:
1131 sdfh.seek(sidedata_end, os.SEEK_SET)
1131 sdfh.seek(sidedata_end, os.SEEK_SET)
1132
1132
1133 curr = len(self.index) - 1
1133 curr = len(self.index) - 1
1134 if not self.inline:
1134 if not self.inline:
1135 transaction.add(self.data_file, offset)
1135 transaction.add(self.data_file, offset)
1136 if self.sidedata_file:
1136 if self.sidedata_file:
1137 transaction.add(self.sidedata_file, sidedata_offset)
1137 transaction.add(self.sidedata_file, sidedata_offset)
1138 transaction.add(self.canonical_index_file, curr * len(entry))
1138 transaction.add(self.canonical_index_file, curr * len(entry))
1139 if data[0]:
1139 if data[0]:
1140 dfh.write(data[0])
1140 dfh.write(data[0])
1141 dfh.write(data[1])
1141 dfh.write(data[1])
1142 if sidedata:
1142 if sidedata:
1143 sdfh.write(sidedata)
1143 sdfh.write(sidedata)
1144 if self._delay_buffer is None:
1144 if self._delay_buffer is None:
1145 ifh.write(entry)
1145 ifh.write(entry)
1146 else:
1146 else:
1147 self._delay_buffer.append(entry)
1147 self._delay_buffer.append(entry)
1148 elif self._delay_buffer is not None:
1148 elif self._delay_buffer is not None:
1149 msg = b'invalid delayed write on inline revlog'
1149 msg = b'invalid delayed write on inline revlog'
1150 raise error.ProgrammingError(msg)
1150 raise error.ProgrammingError(msg)
1151 else:
1151 else:
1152 offset += curr * self.index.entry_size
1152 offset += curr * self.index.entry_size
1153 transaction.add(self.canonical_index_file, offset)
1153 transaction.add(self.canonical_index_file, offset)
1154 assert not sidedata
1154 assert not sidedata
1155 ifh.write(entry)
1155 ifh.write(entry)
1156 ifh.write(data[0])
1156 ifh.write(data[0])
1157 ifh.write(data[1])
1157 ifh.write(data[1])
1158 return (
1158 return (
1159 ifh.tell(),
1159 ifh.tell(),
1160 dfh.tell() if dfh else None,
1160 dfh.tell() if dfh else None,
1161 sdfh.tell() if sdfh else None,
1161 sdfh.tell() if sdfh else None,
1162 )
1162 )
1163
1163
1164 def _divert_index(self):
1164 def _divert_index(self):
1165 index_file = self.index_file
1165 index_file = self.index_file
1166 # when we encounter a legacy inline-changelog, split it. However it is
1166 # when we encounter a legacy inline-changelog, split it. However it is
1167 # important to use the expected filename for pending content
1167 # important to use the expected filename for pending content
1168 # (<radix>.a) otherwise hooks won't be seeing the content of the
1168 # (<radix>.a) otherwise hooks won't be seeing the content of the
1169 # pending transaction.
1169 # pending transaction.
1170 if index_file.endswith(b'.s'):
1170 if index_file.endswith(b'.s'):
1171 index_file = self.index_file[:-2]
1171 index_file = self.index_file[:-2]
1172 return index_file + b'.a'
1172 return index_file + b'.a'
1173
1173
1174 def delay(self):
1174 def delay(self):
1175 assert not self.is_open
1175 assert not self.is_open
1176 if self.inline:
1176 if self.inline:
1177 msg = "revlog with delayed write should not be inline"
1177 msg = "revlog with delayed write should not be inline"
1178 raise error.ProgrammingError(msg)
1178 raise error.ProgrammingError(msg)
1179 if self._delay_buffer is not None or self._orig_index_file is not None:
1179 if self._delay_buffer is not None or self._orig_index_file is not None:
1180 # delay or divert already in place
1180 # delay or divert already in place
1181 return None
1181 return None
1182 elif len(self.index) == 0:
1182 elif len(self.index) == 0:
1183 self._orig_index_file = self.index_file
1183 self._orig_index_file = self.index_file
1184 self.index_file = self._divert_index()
1184 self.index_file = self._divert_index()
1185 assert self._orig_index_file is not None
1185 assert self._orig_index_file is not None
1186 assert self.index_file is not None
1186 assert self.index_file is not None
1187 if self.opener.exists(self.index_file):
1187 if self.opener.exists(self.index_file):
1188 self.opener.unlink(self.index_file)
1188 self.opener.unlink(self.index_file)
1189 return self.index_file
1189 return self.index_file
1190 else:
1190 else:
1191 self._delay_buffer = []
1191 self._delay_buffer = []
1192 return None
1192 return None
1193
1193
1194 def write_pending(self):
1194 def write_pending(self):
1195 assert not self.is_open
1195 assert not self.is_open
1196 if self.inline:
1196 if self.inline:
1197 msg = "revlog with delayed write should not be inline"
1197 msg = "revlog with delayed write should not be inline"
1198 raise error.ProgrammingError(msg)
1198 raise error.ProgrammingError(msg)
1199 if self._orig_index_file is not None:
1199 if self._orig_index_file is not None:
1200 return None, True
1200 return None, True
1201 any_pending = False
1201 any_pending = False
1202 pending_index_file = self._divert_index()
1202 pending_index_file = self._divert_index()
1203 if self.opener.exists(pending_index_file):
1203 if self.opener.exists(pending_index_file):
1204 self.opener.unlink(pending_index_file)
1204 self.opener.unlink(pending_index_file)
1205 util.copyfile(
1205 util.copyfile(
1206 self.opener.join(self.index_file),
1206 self.opener.join(self.index_file),
1207 self.opener.join(pending_index_file),
1207 self.opener.join(pending_index_file),
1208 )
1208 )
1209 if self._delay_buffer:
1209 if self._delay_buffer:
1210 with self.opener(pending_index_file, b'r+') as ifh:
1210 with self.opener(pending_index_file, b'r+') as ifh:
1211 ifh.seek(0, os.SEEK_END)
1211 ifh.seek(0, os.SEEK_END)
1212 ifh.write(b"".join(self._delay_buffer))
1212 ifh.write(b"".join(self._delay_buffer))
1213 any_pending = True
1213 any_pending = True
1214 self._delay_buffer = None
1214 self._delay_buffer = None
1215 self._orig_index_file = self.index_file
1215 self._orig_index_file = self.index_file
1216 self.index_file = pending_index_file
1216 self.index_file = pending_index_file
1217 return self.index_file, any_pending
1217 return self.index_file, any_pending
1218
1218
1219 def finalize_pending(self):
1219 def finalize_pending(self):
1220 assert not self.is_open
1220 assert not self.is_open
1221 if self.inline:
1221 if self.inline:
1222 msg = "revlog with delayed write should not be inline"
1222 msg = "revlog with delayed write should not be inline"
1223 raise error.ProgrammingError(msg)
1223 raise error.ProgrammingError(msg)
1224
1224
1225 delay = self._delay_buffer is not None
1225 delay = self._delay_buffer is not None
1226 divert = self._orig_index_file is not None
1226 divert = self._orig_index_file is not None
1227
1227
1228 if delay and divert:
1228 if delay and divert:
1229 assert False, "unreachable"
1229 assert False, "unreachable"
1230 elif delay:
1230 elif delay:
1231 if self._delay_buffer:
1231 if self._delay_buffer:
1232 with self.opener(self.index_file, b'r+') as ifh:
1232 with self.opener(self.index_file, b'r+') as ifh:
1233 ifh.seek(0, os.SEEK_END)
1233 ifh.seek(0, os.SEEK_END)
1234 ifh.write(b"".join(self._delay_buffer))
1234 ifh.write(b"".join(self._delay_buffer))
1235 self._delay_buffer = None
1235 self._delay_buffer = None
1236 elif divert:
1236 elif divert:
1237 if self.opener.exists(self.index_file):
1237 if self.opener.exists(self.index_file):
1238 self.opener.rename(
1238 self.opener.rename(
1239 self.index_file,
1239 self.index_file,
1240 self._orig_index_file,
1240 self._orig_index_file,
1241 checkambig=True,
1241 checkambig=True,
1242 )
1242 )
1243 self.index_file = self._orig_index_file
1243 self.index_file = self._orig_index_file
1244 self._orig_index_file = None
1244 self._orig_index_file = None
1245 else:
1245 else:
1246 msg = b"not delay or divert found on this revlog"
1246 msg = b"not delay or divert found on this revlog"
1247 raise error.ProgrammingError(msg)
1247 raise error.ProgrammingError(msg)
1248 return self.canonical_index_file
1248 return self.canonical_index_file
1249
1249
1250
1250
1251 if typing.TYPE_CHECKING:
1251 if typing.TYPE_CHECKING:
1252 # Tell Pytype what kind of object we expect
1252 # Tell Pytype what kind of object we expect
1253 ProxyBase = BaseIndexObject
1253 ProxyBase = BaseIndexObject
1254 else:
1254 else:
1255 ProxyBase = object
1255 ProxyBase = object
1256
1256
1257
1257
1258 class RustIndexProxy(ProxyBase):
1258 class RustIndexProxy(ProxyBase):
1259 """Wrapper around the Rust index to fake having direct access to the index.
1259 """Wrapper around the Rust index to fake having direct access to the index.
1260
1260
1261 Rust enforces xor mutability (one mutable reference XOR 1..n non-mutable),
1261 Rust enforces xor mutability (one mutable reference XOR 1..n non-mutable),
1262 so we can't expose the index from Rust directly, since the `InnerRevlog`
1262 so we can't expose the index from Rust directly, since the `InnerRevlog`
1263 already has ownership of the index. This object redirects all calls to the
1263 already has ownership of the index. This object redirects all calls to the
1264 index through the Rust-backed `InnerRevlog` glue which defines all
1264 index through the Rust-backed `InnerRevlog` glue which defines all
1265 necessary forwarding methods.
1265 necessary forwarding methods.
1266 """
1266 """
1267
1267
1268 def __init__(self, inner):
1268 def __init__(self, inner):
1269 # Do not rename as it's being used to access the index from Rust
1269 # Do not rename as it's being used to access the index from Rust
1270 self.inner = inner
1270 self.inner = inner
1271
1271
1272 # TODO possibly write all index methods manually to save on overhead?
1272 # TODO possibly write all index methods manually to save on overhead?
1273 def __getattr__(self, name):
1273 def __getattr__(self, name):
1274 return getattr(self.inner, f"_index_{name}")
1274 return getattr(self.inner, f"_index_{name}")
1275
1275
1276 # Magic methods need to be defined explicitely
1276 # Magic methods need to be defined explicitely
1277 def __len__(self):
1277 def __len__(self):
1278 return self.inner._index___len__()
1278 return self.inner._index___len__()
1279
1279
1280 def __getitem__(self, key):
1280 def __getitem__(self, key):
1281 return self.inner._index___getitem__(key)
1281 return self.inner._index___getitem__(key)
1282
1282
1283 def __contains__(self, key):
1283 def __contains__(self, key):
1284 return self.inner._index___contains__(key)
1284 return self.inner._index___contains__(key)
1285
1285
1286 def __delitem__(self, key):
1286 def __delitem__(self, key):
1287 return self.inner._index___delitem__(key)
1287 return self.inner._index___delitem__(key)
1288
1288
1289
1289
1290 class RustVFSWrapper:
1290 class RustVFSWrapper:
1291 """Used to wrap a Python VFS to pass it to Rust to lower the overhead of
1291 """Used to wrap a Python VFS to pass it to Rust to lower the overhead of
1292 calling back multiple times into Python.
1292 calling back multiple times into Python.
1293 """
1293 """
1294
1294
1295 def __init__(self, inner):
1295 def __init__(self, inner):
1296 self.inner = inner
1296 self.inner = inner
1297
1297
1298 def __call__(
1298 def __call__(
1299 self,
1299 self,
1300 path: bytes,
1300 path: bytes,
1301 mode: bytes = b"rb",
1301 mode: bytes = b"rb",
1302 atomictemp=False,
1302 atomictemp=False,
1303 checkambig=False,
1303 checkambig=False,
1304 ):
1304 ):
1305 fd = self.inner.__call__(
1305 fd = self.inner.__call__(
1306 path=path, mode=mode, atomictemp=atomictemp, checkambig=checkambig
1306 path=path, mode=mode, atomictemp=atomictemp, checkambig=checkambig
1307 )
1307 )
1308 # Information that Rust needs to get ownership of the file that's
1308 # Information that Rust needs to get ownership of the file that's
1309 # being opened.
1309 # being opened.
1310 return (os.dup(fd.fileno()), fd._tempname if atomictemp else None)
1310 return (os.dup(fd.fileno()), fd._tempname if atomictemp else None)
1311
1311
1312 def __getattr__(self, name):
1312 def __getattr__(self, name):
1313 return getattr(self.inner, name)
1313 return getattr(self.inner, name)
1314
1314
1315
1315
1316 class revlog:
1316 class revlog:
1317 """
1317 """
1318 the underlying revision storage object
1318 the underlying revision storage object
1319
1319
1320 A revlog consists of two parts, an index and the revision data.
1320 A revlog consists of two parts, an index and the revision data.
1321
1321
1322 The index is a file with a fixed record size containing
1322 The index is a file with a fixed record size containing
1323 information on each revision, including its nodeid (hash), the
1323 information on each revision, including its nodeid (hash), the
1324 nodeids of its parents, the position and offset of its data within
1324 nodeids of its parents, the position and offset of its data within
1325 the data file, and the revision it's based on. Finally, each entry
1325 the data file, and the revision it's based on. Finally, each entry
1326 contains a linkrev entry that can serve as a pointer to external
1326 contains a linkrev entry that can serve as a pointer to external
1327 data.
1327 data.
1328
1328
1329 The revision data itself is a linear collection of data chunks.
1329 The revision data itself is a linear collection of data chunks.
1330 Each chunk represents a revision and is usually represented as a
1330 Each chunk represents a revision and is usually represented as a
1331 delta against the previous chunk. To bound lookup time, runs of
1331 delta against the previous chunk. To bound lookup time, runs of
1332 deltas are limited to about 2 times the length of the original
1332 deltas are limited to about 2 times the length of the original
1333 version data. This makes retrieval of a version proportional to
1333 version data. This makes retrieval of a version proportional to
1334 its size, or O(1) relative to the number of revisions.
1334 its size, or O(1) relative to the number of revisions.
1335
1335
1336 Both pieces of the revlog are written to in an append-only
1336 Both pieces of the revlog are written to in an append-only
1337 fashion, which means we never need to rewrite a file to insert or
1337 fashion, which means we never need to rewrite a file to insert or
1338 remove data, and can use some simple techniques to avoid the need
1338 remove data, and can use some simple techniques to avoid the need
1339 for locking while reading.
1339 for locking while reading.
1340
1340
1341 If checkambig, indexfile is opened with checkambig=True at
1341 If checkambig, indexfile is opened with checkambig=True at
1342 writing, to avoid file stat ambiguity.
1342 writing, to avoid file stat ambiguity.
1343
1343
1344 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1344 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1345 index will be mmapped rather than read if it is larger than the
1345 index will be mmapped rather than read if it is larger than the
1346 configured threshold.
1346 configured threshold.
1347
1347
1348 If censorable is True, the revlog can have censored revisions.
1348 If censorable is True, the revlog can have censored revisions.
1349
1349
1350 If `upperboundcomp` is not None, this is the expected maximal gain from
1350 If `upperboundcomp` is not None, this is the expected maximal gain from
1351 compression for the data content.
1351 compression for the data content.
1352
1352
1353 `concurrencychecker` is an optional function that receives 3 arguments: a
1353 `concurrencychecker` is an optional function that receives 3 arguments: a
1354 file handle, a filename, and an expected position. It should check whether
1354 file handle, a filename, and an expected position. It should check whether
1355 the current position in the file handle is valid, and log/warn/fail (by
1355 the current position in the file handle is valid, and log/warn/fail (by
1356 raising).
1356 raising).
1357
1357
1358 See mercurial/revlogutils/contants.py for details about the content of an
1358 See mercurial/revlogutils/contants.py for details about the content of an
1359 index entry.
1359 index entry.
1360 """
1360 """
1361
1361
1362 _flagserrorclass = error.RevlogError
1362 _flagserrorclass = error.RevlogError
1363 _inner: "_InnerRevlog"
1363 _inner: "_InnerRevlog"
1364
1364
1365 opener: vfsmod.vfs
1365 opener: vfsmod.vfs
1366
1366
1367 @staticmethod
1367 @staticmethod
1368 def is_inline_index(header_bytes):
1368 def is_inline_index(header_bytes):
1369 """Determine if a revlog is inline from the initial bytes of the index"""
1369 """Determine if a revlog is inline from the initial bytes of the index"""
1370 if len(header_bytes) == 0:
1370 if len(header_bytes) == 0:
1371 return True
1371 return True
1372
1372
1373 header = INDEX_HEADER.unpack(header_bytes)[0]
1373 header = INDEX_HEADER.unpack(header_bytes)[0]
1374
1374
1375 _format_flags = header & ~0xFFFF
1375 _format_flags = header & ~0xFFFF
1376 _format_version = header & 0xFFFF
1376 _format_version = header & 0xFFFF
1377
1377
1378 features = FEATURES_BY_VERSION[_format_version]
1378 features = FEATURES_BY_VERSION[_format_version]
1379 return features[b'inline'](_format_flags)
1379 return features[b'inline'](_format_flags)
1380
1380
1381 _docket_file: Optional[bytes]
1381 _docket_file: Optional[bytes]
1382
1382
1383 def __init__(
1383 def __init__(
1384 self,
1384 self,
1385 opener: vfsmod.vfs,
1385 opener: vfsmod.vfs,
1386 target,
1386 target,
1387 radix,
1387 radix,
1388 postfix=None, # only exist for `tmpcensored` now
1388 postfix=None, # only exist for `tmpcensored` now
1389 checkambig=False,
1389 checkambig=False,
1390 mmaplargeindex=False,
1390 mmaplargeindex=False,
1391 censorable=False,
1391 censorable=False,
1392 upperboundcomp=None,
1392 upperboundcomp=None,
1393 persistentnodemap=False,
1393 persistentnodemap=False,
1394 concurrencychecker=None,
1394 concurrencychecker=None,
1395 trypending=False,
1395 trypending=False,
1396 try_split=False,
1396 try_split=False,
1397 canonical_parent_order=True,
1397 canonical_parent_order=True,
1398 data_config=None,
1398 data_config=None,
1399 delta_config=None,
1399 delta_config=None,
1400 feature_config=None,
1400 feature_config=None,
1401 may_inline=True, # may inline new revlog
1401 may_inline=True, # may inline new revlog
1402 ):
1402 ):
1403 """
1403 """
1404 create a revlog object
1404 create a revlog object
1405
1405
1406 opener is a function that abstracts the file opening operation
1406 opener is a function that abstracts the file opening operation
1407 and can be used to implement COW semantics or the like.
1407 and can be used to implement COW semantics or the like.
1408
1408
1409 `target`: a (KIND, ID) tuple that identify the content stored in
1409 `target`: a (KIND, ID) tuple that identify the content stored in
1410 this revlog. It help the rest of the code to understand what the revlog
1410 this revlog. It help the rest of the code to understand what the revlog
1411 is about without having to resort to heuristic and index filename
1411 is about without having to resort to heuristic and index filename
1412 analysis. Note: that this must be reliably be set by normal code, but
1412 analysis. Note: that this must be reliably be set by normal code, but
1413 that test, debug, or performance measurement code might not set this to
1413 that test, debug, or performance measurement code might not set this to
1414 accurate value.
1414 accurate value.
1415 """
1415 """
1416
1416
1417 self.radix = radix
1417 self.radix = radix
1418
1418
1419 self._docket_file = None
1419 self._docket_file = None
1420 self._indexfile = None
1420 self._indexfile = None
1421 self._datafile = None
1421 self._datafile = None
1422 self._sidedatafile = None
1422 self._sidedatafile = None
1423 self._nodemap_file = None
1423 self._nodemap_file = None
1424 self.postfix = postfix
1424 self.postfix = postfix
1425 self._trypending = trypending
1425 self._trypending = trypending
1426 self._try_split = try_split
1426 self._try_split = try_split
1427 self._may_inline = may_inline
1427 self._may_inline = may_inline
1428 self.uses_rust = False
1428 self.uses_rust = False
1429 self.opener = opener
1429 self.opener = opener
1430 if persistentnodemap:
1430 if persistentnodemap:
1431 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1431 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1432
1432
1433 assert target[0] in ALL_KINDS
1433 assert target[0] in ALL_KINDS
1434 assert len(target) == 2
1434 assert len(target) == 2
1435 self.target = target
1435 self.target = target
1436 if feature_config is not None:
1436 if feature_config is not None:
1437 self.feature_config = feature_config.copy()
1437 self.feature_config = feature_config.copy()
1438 elif b'feature-config' in self.opener.options:
1438 elif b'feature-config' in self.opener.options:
1439 self.feature_config = self.opener.options[b'feature-config'].copy()
1439 self.feature_config = self.opener.options[b'feature-config'].copy()
1440 else:
1440 else:
1441 self.feature_config = FeatureConfig()
1441 self.feature_config = FeatureConfig()
1442 self.feature_config.censorable = censorable
1442 self.feature_config.censorable = censorable
1443 self.feature_config.canonical_parent_order = canonical_parent_order
1443 self.feature_config.canonical_parent_order = canonical_parent_order
1444 if data_config is not None:
1444 if data_config is not None:
1445 self.data_config = data_config.copy()
1445 self.data_config = data_config.copy()
1446 elif b'data-config' in self.opener.options:
1446 elif b'data-config' in self.opener.options:
1447 self.data_config = self.opener.options[b'data-config'].copy()
1447 self.data_config = self.opener.options[b'data-config'].copy()
1448 else:
1448 else:
1449 self.data_config = DataConfig()
1449 self.data_config = DataConfig()
1450 self.data_config.check_ambig = checkambig
1450 self.data_config.check_ambig = checkambig
1451 self.data_config.mmap_large_index = mmaplargeindex
1451 self.data_config.mmap_large_index = mmaplargeindex
1452 if delta_config is not None:
1452 if delta_config is not None:
1453 self.delta_config = delta_config.copy()
1453 self.delta_config = delta_config.copy()
1454 elif b'delta-config' in self.opener.options:
1454 elif b'delta-config' in self.opener.options:
1455 self.delta_config = self.opener.options[b'delta-config'].copy()
1455 self.delta_config = self.opener.options[b'delta-config'].copy()
1456 else:
1456 else:
1457 self.delta_config = DeltaConfig()
1457 self.delta_config = DeltaConfig()
1458 self.delta_config.upper_bound_comp = upperboundcomp
1458 self.delta_config.upper_bound_comp = upperboundcomp
1459
1459
1460 # Maps rev to chain base rev.
1460 # Maps rev to chain base rev.
1461 self._chainbasecache = util.lrucachedict(100)
1461 self._chainbasecache = util.lrucachedict(100)
1462
1462
1463 self.index: Optional[BaseIndexObject] = None
1463 self.index: Optional[BaseIndexObject] = None
1464 self._docket = None
1464 self._docket = None
1465 self._nodemap_docket = None
1465 self._nodemap_docket = None
1466 # Mapping of partial identifiers to full nodes.
1466 # Mapping of partial identifiers to full nodes.
1467 self._pcache = {}
1467 self._pcache = {}
1468
1468
1469 # other optionnals features
1469 # other optionnals features
1470
1470
1471 # Make copy of flag processors so each revlog instance can support
1471 # Make copy of flag processors so each revlog instance can support
1472 # custom flags.
1472 # custom flags.
1473 self._flagprocessors = dict(flagutil.flagprocessors)
1473 self._flagprocessors = dict(flagutil.flagprocessors)
1474 # prevent nesting of addgroup
1474 # prevent nesting of addgroup
1475 self._adding_group = None
1475 self._adding_group = None
1476
1476
1477 index, chunk_cache = self._loadindex()
1477 index, chunk_cache = self._loadindex()
1478 self._load_inner(index, chunk_cache)
1478 self._load_inner(index, chunk_cache)
1479 self._concurrencychecker = concurrencychecker
1479 self._concurrencychecker = concurrencychecker
1480
1480
1481 def _init_opts(self):
1481 def _init_opts(self):
1482 """process options (from above/config) to setup associated default revlog mode
1482 """process options (from above/config) to setup associated default revlog mode
1483
1483
1484 These values might be affected when actually reading on disk information.
1484 These values might be affected when actually reading on disk information.
1485
1485
1486 The relevant values are returned for use in _loadindex().
1486 The relevant values are returned for use in _loadindex().
1487
1487
1488 * newversionflags:
1488 * newversionflags:
1489 version header to use if we need to create a new revlog
1489 version header to use if we need to create a new revlog
1490
1490
1491 * mmapindexthreshold:
1491 * mmapindexthreshold:
1492 minimal index size for start to use mmap
1492 minimal index size for start to use mmap
1493
1493
1494 * force_nodemap:
1494 * force_nodemap:
1495 force the usage of a "development" version of the nodemap code
1495 force the usage of a "development" version of the nodemap code
1496 """
1496 """
1497 opts = self.opener.options
1497 opts = self.opener.options
1498
1498
1499 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1499 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1500 new_header = CHANGELOGV2
1500 new_header = CHANGELOGV2
1501 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1501 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1502 self.feature_config.compute_rank = compute_rank
1502 self.feature_config.compute_rank = compute_rank
1503 elif b'revlogv2' in opts:
1503 elif b'revlogv2' in opts:
1504 new_header = REVLOGV2
1504 new_header = REVLOGV2
1505 elif b'revlogv1' in opts:
1505 elif b'revlogv1' in opts:
1506 new_header = REVLOGV1
1506 new_header = REVLOGV1
1507 if self._may_inline:
1507 if self._may_inline:
1508 new_header |= FLAG_INLINE_DATA
1508 new_header |= FLAG_INLINE_DATA
1509 if b'generaldelta' in opts:
1509 if b'generaldelta' in opts:
1510 new_header |= FLAG_GENERALDELTA
1510 new_header |= FLAG_GENERALDELTA
1511 elif b'revlogv0' in self.opener.options:
1511 elif b'revlogv0' in self.opener.options:
1512 new_header = REVLOGV0
1512 new_header = REVLOGV0
1513 else:
1513 else:
1514 new_header = REVLOG_DEFAULT_VERSION
1514 new_header = REVLOG_DEFAULT_VERSION
1515
1515
1516 mmapindexthreshold = None
1516 mmapindexthreshold = None
1517 if self.data_config.mmap_large_index:
1517 if self.data_config.mmap_large_index:
1518 mmapindexthreshold = self.data_config.mmap_index_threshold
1518 mmapindexthreshold = self.data_config.mmap_index_threshold
1519 if self.feature_config.enable_ellipsis:
1519 if self.feature_config.enable_ellipsis:
1520 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1520 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1521
1521
1522 # revlog v0 doesn't have flag processors
1522 # revlog v0 doesn't have flag processors
1523 for flag, processor in opts.get(b'flagprocessors', {}).items():
1523 for flag, processor in opts.get(b'flagprocessors', {}).items():
1524 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1524 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1525
1525
1526 chunk_cache_size = self.data_config.chunk_cache_size
1526 chunk_cache_size = self.data_config.chunk_cache_size
1527 if chunk_cache_size <= 0:
1527 if chunk_cache_size <= 0:
1528 raise error.RevlogError(
1528 raise error.RevlogError(
1529 _(b'revlog chunk cache size %r is not greater than 0')
1529 _(b'revlog chunk cache size %r is not greater than 0')
1530 % chunk_cache_size
1530 % chunk_cache_size
1531 )
1531 )
1532 elif chunk_cache_size & (chunk_cache_size - 1):
1532 elif chunk_cache_size & (chunk_cache_size - 1):
1533 raise error.RevlogError(
1533 raise error.RevlogError(
1534 _(b'revlog chunk cache size %r is not a power of 2')
1534 _(b'revlog chunk cache size %r is not a power of 2')
1535 % chunk_cache_size
1535 % chunk_cache_size
1536 )
1536 )
1537 force_nodemap = opts.get(b'devel-force-nodemap', False)
1537 force_nodemap = opts.get(b'devel-force-nodemap', False)
1538 return new_header, mmapindexthreshold, force_nodemap
1538 return new_header, mmapindexthreshold, force_nodemap
1539
1539
1540 def _get_data(self, filepath, mmap_threshold, size=None):
1540 def _get_data(self, filepath, mmap_threshold, size=None):
1541 """return a file content with or without mmap
1541 """return a file content with or without mmap
1542
1542
1543 If the file is missing return the empty string"""
1543 If the file is missing return the empty string"""
1544 try:
1544 try:
1545 with self.opener(filepath) as fp:
1545 with self.opener(filepath) as fp:
1546 if mmap_threshold is not None:
1546 if mmap_threshold is not None:
1547 file_size = self.opener.fstat(fp).st_size
1547 file_size = self.opener.fstat(fp).st_size
1548 if (
1548 if (
1549 file_size >= mmap_threshold
1549 file_size >= mmap_threshold
1550 and self.opener.is_mmap_safe(filepath)
1550 and self.opener.is_mmap_safe(filepath)
1551 ):
1551 ):
1552 if size is not None:
1552 if size is not None:
1553 # avoid potentiel mmap crash
1553 # avoid potentiel mmap crash
1554 size = min(file_size, size)
1554 size = min(file_size, size)
1555 # TODO: should .close() to release resources without
1555 # TODO: should .close() to release resources without
1556 # relying on Python GC
1556 # relying on Python GC
1557 if size is None:
1557 if size is None:
1558 return util.buffer(util.mmapread(fp))
1558 return util.buffer(util.mmapread(fp))
1559 else:
1559 else:
1560 return util.buffer(util.mmapread(fp, size))
1560 return util.buffer(util.mmapread(fp, size))
1561 if size is None:
1561 if size is None:
1562 return fp.read()
1562 return fp.read()
1563 else:
1563 else:
1564 return fp.read(size)
1564 return fp.read(size)
1565 except FileNotFoundError:
1565 except FileNotFoundError:
1566 return b''
1566 return b''
1567
1567
1568 def get_streams(self, max_linkrev, force_inline=False):
1568 def get_streams(self, max_linkrev, force_inline=False):
1569 """return a list of streams that represent this revlog
1569 """return a list of streams that represent this revlog
1570
1570
1571 This is used by stream-clone to do bytes to bytes copies of a repository.
1571 This is used by stream-clone to do bytes to bytes copies of a repository.
1572
1572
1573 This streams data for all revisions that refer to a changelog revision up
1573 This streams data for all revisions that refer to a changelog revision up
1574 to `max_linkrev`.
1574 to `max_linkrev`.
1575
1575
1576 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1576 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1577
1577
1578 It returns is a list of three-tuple:
1578 It returns is a list of three-tuple:
1579
1579
1580 [
1580 [
1581 (filename, bytes_stream, stream_size),
1581 (filename, bytes_stream, stream_size),
1582 …
1582 …
1583 ]
1583 ]
1584 """
1584 """
1585 n = len(self)
1585 n = len(self)
1586 index = self.index
1586 index = self.index
1587 while n > 0:
1587 while n > 0:
1588 linkrev = index[n - 1][4]
1588 linkrev = index[n - 1][4]
1589 if linkrev < max_linkrev:
1589 if linkrev < max_linkrev:
1590 break
1590 break
1591 # note: this loop will rarely go through multiple iterations, since
1591 # note: this loop will rarely go through multiple iterations, since
1592 # it only traverses commits created during the current streaming
1592 # it only traverses commits created during the current streaming
1593 # pull operation.
1593 # pull operation.
1594 #
1594 #
1595 # If this become a problem, using a binary search should cap the
1595 # If this become a problem, using a binary search should cap the
1596 # runtime of this.
1596 # runtime of this.
1597 n = n - 1
1597 n = n - 1
1598 if n == 0:
1598 if n == 0:
1599 # no data to send
1599 # no data to send
1600 return []
1600 return []
1601 index_size = n * index.entry_size
1601 index_size = n * index.entry_size
1602 data_size = self.end(n - 1)
1602 data_size = self.end(n - 1)
1603
1603
1604 # XXX we might have been split (or stripped) since the object
1604 # XXX we might have been split (or stripped) since the object
1605 # initialization, We need to close this race too, but having a way to
1605 # initialization, We need to close this race too, but having a way to
1606 # pre-open the file we feed to the revlog and never closing them before
1606 # pre-open the file we feed to the revlog and never closing them before
1607 # we are done streaming.
1607 # we are done streaming.
1608
1608
1609 if self._inline:
1609 if self._inline:
1610
1610
1611 def get_stream():
1611 def get_stream():
1612 with self.opener(self._indexfile, mode=b"r") as fp:
1612 with self.opener(self._indexfile, mode=b"r") as fp:
1613 yield None
1613 yield None
1614 size = index_size + data_size
1614 size = index_size + data_size
1615 if size <= 65536:
1615 if size <= 65536:
1616 yield fp.read(size)
1616 yield fp.read(size)
1617 else:
1617 else:
1618 yield from util.filechunkiter(fp, limit=size)
1618 yield from util.filechunkiter(fp, limit=size)
1619
1619
1620 inline_stream = get_stream()
1620 inline_stream = get_stream()
1621 next(inline_stream)
1621 next(inline_stream)
1622 return [
1622 return [
1623 (self._indexfile, inline_stream, index_size + data_size),
1623 (self._indexfile, inline_stream, index_size + data_size),
1624 ]
1624 ]
1625 elif force_inline:
1625 elif force_inline:
1626
1626
1627 def get_stream():
1627 def get_stream():
1628 with self.reading():
1628 with self.reading():
1629 yield None
1629 yield None
1630
1630
1631 for rev in range(n):
1631 for rev in range(n):
1632 idx = self.index.entry_binary(rev)
1632 idx = self.index.entry_binary(rev)
1633 if rev == 0 and self._docket is None:
1633 if rev == 0 and self._docket is None:
1634 # re-inject the inline flag
1634 # re-inject the inline flag
1635 header = self._format_flags
1635 header = self._format_flags
1636 header |= self._format_version
1636 header |= self._format_version
1637 header |= FLAG_INLINE_DATA
1637 header |= FLAG_INLINE_DATA
1638 header = self.index.pack_header(header)
1638 header = self.index.pack_header(header)
1639 idx = header + idx
1639 idx = header + idx
1640 yield idx
1640 yield idx
1641 yield self._inner.get_segment_for_revs(rev, rev)[1]
1641 yield self._inner.get_segment_for_revs(rev, rev)[1]
1642
1642
1643 inline_stream = get_stream()
1643 inline_stream = get_stream()
1644 next(inline_stream)
1644 next(inline_stream)
1645 return [
1645 return [
1646 (self._indexfile, inline_stream, index_size + data_size),
1646 (self._indexfile, inline_stream, index_size + data_size),
1647 ]
1647 ]
1648 else:
1648 else:
1649
1649
1650 def get_index_stream():
1650 def get_index_stream():
1651 with self.opener(self._indexfile, mode=b"r") as fp:
1651 with self.opener(self._indexfile, mode=b"r") as fp:
1652 yield None
1652 yield None
1653 if index_size <= 65536:
1653 if index_size <= 65536:
1654 yield fp.read(index_size)
1654 yield fp.read(index_size)
1655 else:
1655 else:
1656 yield from util.filechunkiter(fp, limit=index_size)
1656 yield from util.filechunkiter(fp, limit=index_size)
1657
1657
1658 def get_data_stream():
1658 def get_data_stream():
1659 with self._datafp() as fp:
1659 with self._datafp() as fp:
1660 yield None
1660 yield None
1661 if data_size <= 65536:
1661 if data_size <= 65536:
1662 yield fp.read(data_size)
1662 yield fp.read(data_size)
1663 else:
1663 else:
1664 yield from util.filechunkiter(fp, limit=data_size)
1664 yield from util.filechunkiter(fp, limit=data_size)
1665
1665
1666 index_stream = get_index_stream()
1666 index_stream = get_index_stream()
1667 next(index_stream)
1667 next(index_stream)
1668 data_stream = get_data_stream()
1668 data_stream = get_data_stream()
1669 next(data_stream)
1669 next(data_stream)
1670 return [
1670 return [
1671 (self._datafile, data_stream, data_size),
1671 (self._datafile, data_stream, data_size),
1672 (self._indexfile, index_stream, index_size),
1672 (self._indexfile, index_stream, index_size),
1673 ]
1673 ]
1674
1674
1675 def _loadindex(self, docket=None):
1675 def _loadindex(self, docket=None):
1676 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1676 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1677
1677
1678 if self.postfix is not None:
1678 if self.postfix is not None:
1679 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1679 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1680 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1680 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1681 entry_point = b'%s.i.a' % self.radix
1681 entry_point = b'%s.i.a' % self.radix
1682 elif self._try_split and self.opener.exists(self._split_index_file):
1682 elif self._try_split and self.opener.exists(self._split_index_file):
1683 entry_point = self._split_index_file
1683 entry_point = self._split_index_file
1684 else:
1684 else:
1685 entry_point = b'%s.i' % self.radix
1685 entry_point = b'%s.i' % self.radix
1686
1686
1687 if docket is not None:
1687 if docket is not None:
1688 self._docket = docket
1688 self._docket = docket
1689 self._docket_file = entry_point
1689 self._docket_file = entry_point
1690 else:
1690 else:
1691 self._initempty = True
1691 self._initempty = True
1692 entry_data = self._get_data(entry_point, mmapindexthreshold)
1692 entry_data = self._get_data(entry_point, mmapindexthreshold)
1693 if len(entry_data) > 0:
1693 if len(entry_data) > 0:
1694 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1694 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1695 self._initempty = False
1695 self._initempty = False
1696 else:
1696 else:
1697 header = new_header
1697 header = new_header
1698
1698
1699 self._format_flags = header & ~0xFFFF
1699 self._format_flags = header & ~0xFFFF
1700 self._format_version = header & 0xFFFF
1700 self._format_version = header & 0xFFFF
1701
1701
1702 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1702 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1703 if supported_flags is None:
1703 if supported_flags is None:
1704 msg = _(b'unknown version (%d) in revlog %s')
1704 msg = _(b'unknown version (%d) in revlog %s')
1705 msg %= (self._format_version, self.display_id)
1705 msg %= (self._format_version, self.display_id)
1706 raise error.RevlogError(msg)
1706 raise error.RevlogError(msg)
1707 elif self._format_flags & ~supported_flags:
1707 elif self._format_flags & ~supported_flags:
1708 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1708 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1709 display_flag = self._format_flags >> 16
1709 display_flag = self._format_flags >> 16
1710 msg %= (display_flag, self._format_version, self.display_id)
1710 msg %= (display_flag, self._format_version, self.display_id)
1711 raise error.RevlogError(msg)
1711 raise error.RevlogError(msg)
1712
1712
1713 features = FEATURES_BY_VERSION[self._format_version]
1713 features = FEATURES_BY_VERSION[self._format_version]
1714 self._inline = features[b'inline'](self._format_flags)
1714 self._inline = features[b'inline'](self._format_flags)
1715 self.delta_config.general_delta = features[b'generaldelta'](
1715 self.delta_config.general_delta = features[b'generaldelta'](
1716 self._format_flags
1716 self._format_flags
1717 )
1717 )
1718 self.data_config.generaldelta = self.delta_config.general_delta
1718 self.data_config.generaldelta = self.delta_config.general_delta
1719 self.feature_config.has_side_data = features[b'sidedata']
1719 self.feature_config.has_side_data = features[b'sidedata']
1720
1720
1721 if not features[b'docket']:
1721 if not features[b'docket']:
1722 self._indexfile = entry_point
1722 self._indexfile = entry_point
1723 index_data = entry_data
1723 index_data = entry_data
1724 else:
1724 else:
1725 self._docket_file = entry_point
1725 self._docket_file = entry_point
1726 if self._initempty:
1726 if self._initempty:
1727 self._docket = docketutil.default_docket(self, header)
1727 self._docket = docketutil.default_docket(self, header)
1728 else:
1728 else:
1729 self._docket = docketutil.parse_docket(
1729 self._docket = docketutil.parse_docket(
1730 self, entry_data, use_pending=self._trypending
1730 self, entry_data, use_pending=self._trypending
1731 )
1731 )
1732
1732
1733 if self._docket is not None:
1733 if self._docket is not None:
1734 self._indexfile = self._docket.index_filepath()
1734 self._indexfile = self._docket.index_filepath()
1735 index_data = b''
1735 index_data = b''
1736 index_size = self._docket.index_end
1736 index_size = self._docket.index_end
1737 if index_size > 0:
1737 if index_size > 0:
1738 index_data = self._get_data(
1738 index_data = self._get_data(
1739 self._indexfile, mmapindexthreshold, size=index_size
1739 self._indexfile, mmapindexthreshold, size=index_size
1740 )
1740 )
1741 if len(index_data) < index_size:
1741 if len(index_data) < index_size:
1742 msg = _(b'too few index data for %s: got %d, expected %d')
1742 msg = _(b'too few index data for %s: got %d, expected %d')
1743 msg %= (self.display_id, len(index_data), index_size)
1743 msg %= (self.display_id, len(index_data), index_size)
1744 raise error.RevlogError(msg)
1744 raise error.RevlogError(msg)
1745
1745
1746 self._inline = False
1746 self._inline = False
1747 # generaldelta implied by version 2 revlogs.
1747 # generaldelta implied by version 2 revlogs.
1748 self.delta_config.general_delta = True
1748 self.delta_config.general_delta = True
1749 self.data_config.generaldelta = True
1749 self.data_config.generaldelta = True
1750 # the logic for persistent nodemap will be dealt with within the
1750 # the logic for persistent nodemap will be dealt with within the
1751 # main docket, so disable it for now.
1751 # main docket, so disable it for now.
1752 self._nodemap_file = None
1752 self._nodemap_file = None
1753
1753
1754 if self._docket is not None:
1754 if self._docket is not None:
1755 self._datafile = self._docket.data_filepath()
1755 self._datafile = self._docket.data_filepath()
1756 self._sidedatafile = self._docket.sidedata_filepath()
1756 self._sidedatafile = self._docket.sidedata_filepath()
1757 elif self.postfix is None:
1757 elif self.postfix is None:
1758 self._datafile = b'%s.d' % self.radix
1758 self._datafile = b'%s.d' % self.radix
1759 else:
1759 else:
1760 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1760 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1761
1761
1762 self.nodeconstants = sha1nodeconstants
1762 self.nodeconstants = sha1nodeconstants
1763 self.nullid = self.nodeconstants.nullid
1763 self.nullid = self.nodeconstants.nullid
1764
1764
1765 # sparse-revlog can't be on without general-delta (issue6056)
1765 # sparse-revlog can't be on without general-delta (issue6056)
1766 if not self.delta_config.general_delta:
1766 if not self.delta_config.general_delta:
1767 self.delta_config.sparse_revlog = False
1767 self.delta_config.sparse_revlog = False
1768
1768
1769 self._storedeltachains = True
1769 self._storedeltachains = True
1770
1770
1771 devel_nodemap = (
1771 devel_nodemap = (
1772 self._nodemap_file
1772 self._nodemap_file
1773 and force_nodemap
1773 and force_nodemap
1774 and parse_index_v1_nodemap is not None
1774 and parse_index_v1_nodemap is not None
1775 )
1775 )
1776
1776
1777 use_rust_index = False
1777 use_rust_index = False
1778 rust_applicable = self._nodemap_file is not None
1778 rust_applicable = self._nodemap_file is not None
1779 rust_applicable = rust_applicable or self.target[0] == KIND_FILELOG
1779 rust_applicable = rust_applicable or self.target[0] == KIND_FILELOG
1780 rust_applicable = rust_applicable and getattr(
1780 rust_applicable = rust_applicable and getattr(
1781 self.opener, "rust_compatible", True
1781 self.opener, "rust_compatible", True
1782 )
1782 )
1783 if rustrevlog is not None and rust_applicable:
1783 if rustrevlog is not None and rust_applicable:
1784 # we would like to use the rust_index in all case, especially
1784 # we would like to use the rust_index in all case, especially
1785 # because it is necessary for AncestorsIterator and LazyAncestors
1785 # because it is necessary for AncestorsIterator and LazyAncestors
1786 # since the 6.7 cycle.
1786 # since the 6.7 cycle.
1787 #
1787 #
1788 # However, the performance impact of inconditionnaly building the
1788 # However, the performance impact of inconditionnaly building the
1789 # nodemap is currently a problem for non-persistent nodemap
1789 # nodemap is currently a problem for non-persistent nodemap
1790 # repository.
1790 # repository.
1791 use_rust_index = True
1791 use_rust_index = True
1792
1792
1793 if self._format_version != REVLOGV1:
1793 if self._format_version != REVLOGV1:
1794 use_rust_index = False
1794 use_rust_index = False
1795
1795
1796 if hasattr(self.opener, "fncache"):
1796 if hasattr(self.opener, "fncache"):
1797 vfs = self.opener.vfs
1797 vfs = self.opener.vfs
1798 if not self.opener.uses_dotencode:
1798 if not self.opener.uses_dotencode:
1799 use_rust_index = False
1799 use_rust_index = False
1800 if not isinstance(vfs, vfsmod.vfs):
1800 if not isinstance(vfs, vfsmod.vfs):
1801 # Be cautious since we don't support other vfs
1801 # Be cautious since we don't support other vfs
1802 use_rust_index = False
1802 use_rust_index = False
1803 else:
1803 else:
1804 # Rust only supports repos with fncache
1804 # Rust only supports repos with fncache
1805 use_rust_index = False
1805 use_rust_index = False
1806
1806
1807 self._parse_index = parse_index_v1
1807 self._parse_index = parse_index_v1
1808 if self._format_version == REVLOGV0:
1808 if self._format_version == REVLOGV0:
1809 self._parse_index = revlogv0.parse_index_v0
1809 self._parse_index = revlogv0.parse_index_v0
1810 elif self._format_version == REVLOGV2:
1810 elif self._format_version == REVLOGV2:
1811 self._parse_index = parse_index_v2
1811 self._parse_index = parse_index_v2
1812 elif self._format_version == CHANGELOGV2:
1812 elif self._format_version == CHANGELOGV2:
1813 self._parse_index = parse_index_cl_v2
1813 self._parse_index = parse_index_cl_v2
1814 elif devel_nodemap:
1814 elif devel_nodemap:
1815 self._parse_index = parse_index_v1_nodemap
1815 self._parse_index = parse_index_v1_nodemap
1816
1816
1817 if use_rust_index:
1817 if use_rust_index:
1818 # Let the Rust code parse its own index
1818 # Let the Rust code parse its own index
1819 index, chunkcache = (index_data, None)
1819 index, chunkcache = (index_data, None)
1820 self.uses_rust = True
1820 self.uses_rust = True
1821 else:
1821 else:
1822 try:
1822 try:
1823 d = self._parse_index(index_data, self._inline)
1823 d = self._parse_index(index_data, self._inline)
1824 index, chunkcache = d
1824 index, chunkcache = d
1825 self._register_nodemap_info(index)
1825 self._register_nodemap_info(index)
1826 except (ValueError, IndexError):
1826 except (ValueError, IndexError):
1827 raise error.RevlogError(
1827 raise error.RevlogError(
1828 _(b"index %s is corrupted") % self.display_id
1828 _(b"index %s is corrupted") % self.display_id
1829 )
1829 )
1830 # revnum -> (chain-length, sum-delta-length)
1830 # revnum -> (chain-length, sum-delta-length)
1831 self._chaininfocache = util.lrucachedict(500)
1831 self._chaininfocache = util.lrucachedict(500)
1832
1832
1833 return index, chunkcache
1833 return index, chunkcache
1834
1834
1835 def _load_inner(self, index, chunk_cache):
1835 def _load_inner(self, index, chunk_cache):
1836 if self._docket is None:
1836 if self._docket is None:
1837 default_compression_header = None
1837 default_compression_header = None
1838 else:
1838 else:
1839 default_compression_header = self._docket.default_compression_header
1839 default_compression_header = self._docket.default_compression_header
1840
1840
1841 if self.uses_rust:
1841 if self.uses_rust:
1842 vfs_is_readonly = False
1842 vfs_is_readonly = False
1843 fncache = None
1843 fncache = None
1844
1844
1845 if hasattr(self.opener, "vfs"):
1845 if hasattr(self.opener, "vfs"):
1846 vfs = self.opener
1846 vfs = self.opener
1847 if isinstance(vfs, vfsmod.readonlyvfs):
1847 if isinstance(vfs, vfsmod.readonlyvfs):
1848 vfs_is_readonly = True
1848 vfs_is_readonly = True
1849 vfs = vfs.vfs
1849 vfs = vfs.vfs
1850 fncache = vfs.fncache
1850 fncache = vfs.fncache
1851 vfs = vfs.vfs
1851 vfs = vfs.vfs
1852 else:
1852 else:
1853 vfs = self.opener
1853 vfs = self.opener
1854
1854
1855 vfs_base = vfs.base
1855 vfs_base = vfs.base
1856 assert fncache is not None, "Rust only supports repos with fncache"
1856 assert fncache is not None, "Rust only supports repos with fncache"
1857
1857
1858 self._inner = rustrevlog.InnerRevlog(
1858 self._inner = rustrevlog.InnerRevlog(
1859 vfs_base=vfs_base,
1859 vfs_base=vfs_base,
1860 fncache=fncache,
1860 fncache=fncache,
1861 vfs_is_readonly=vfs_is_readonly,
1861 vfs_is_readonly=vfs_is_readonly,
1862 index_data=index,
1862 index_data=index,
1863 index_file=self._indexfile,
1863 index_file=self._indexfile,
1864 data_file=self._datafile,
1864 data_file=self._datafile,
1865 sidedata_file=self._sidedatafile,
1865 sidedata_file=self._sidedatafile,
1866 inline=self._inline,
1866 inline=self._inline,
1867 data_config=self.data_config,
1867 data_config=self.data_config,
1868 delta_config=self.delta_config,
1868 delta_config=self.delta_config,
1869 feature_config=self.feature_config,
1869 feature_config=self.feature_config,
1870 chunk_cache=chunk_cache,
1870 chunk_cache=chunk_cache,
1871 default_compression_header=default_compression_header,
1871 default_compression_header=default_compression_header,
1872 revlog_type=self.target[0],
1872 revlog_type=self.target[0],
1873 use_persistent_nodemap=self._nodemap_file is not None,
1873 use_persistent_nodemap=self._nodemap_file is not None,
1874 )
1874 )
1875 self.index = RustIndexProxy(self._inner)
1875 self.index = RustIndexProxy(self._inner)
1876 self._register_nodemap_info(self.index)
1876 self._register_nodemap_info(self.index)
1877 self.uses_rust = True
1877 self.uses_rust = True
1878 else:
1878 else:
1879 self._inner = _InnerRevlog(
1879 self._inner = _InnerRevlog(
1880 opener=self.opener,
1880 opener=self.opener,
1881 index=index,
1881 index=index,
1882 index_file=self._indexfile,
1882 index_file=self._indexfile,
1883 data_file=self._datafile,
1883 data_file=self._datafile,
1884 sidedata_file=self._sidedatafile,
1884 sidedata_file=self._sidedatafile,
1885 inline=self._inline,
1885 inline=self._inline,
1886 data_config=self.data_config,
1886 data_config=self.data_config,
1887 delta_config=self.delta_config,
1887 delta_config=self.delta_config,
1888 feature_config=self.feature_config,
1888 feature_config=self.feature_config,
1889 chunk_cache=chunk_cache,
1889 chunk_cache=chunk_cache,
1890 default_compression_header=default_compression_header,
1890 default_compression_header=default_compression_header,
1891 )
1891 )
1892 self.index = self._inner.index
1892 self.index = self._inner.index
1893
1893
1894 def _register_nodemap_info(self, index):
1894 def _register_nodemap_info(self, index):
1895 use_nodemap = (
1895 use_nodemap = (
1896 not self._inline
1896 not self._inline
1897 and self._nodemap_file is not None
1897 and self._nodemap_file is not None
1898 and hasattr(index, 'update_nodemap_data')
1898 and hasattr(index, 'update_nodemap_data')
1899 )
1899 )
1900 if use_nodemap:
1900 if use_nodemap:
1901 nodemap_data = nodemaputil.persisted_data(self)
1901 nodemap_data = nodemaputil.persisted_data(self)
1902 if nodemap_data is not None:
1902 if nodemap_data is not None:
1903 docket = nodemap_data[0]
1903 docket = nodemap_data[0]
1904 if (
1904 if (
1905 len(index) > docket.tip_rev
1905 len(index) > docket.tip_rev
1906 and index[docket.tip_rev][7] == docket.tip_node
1906 and index[docket.tip_rev][7] == docket.tip_node
1907 ):
1907 ):
1908 # no changelog tampering
1908 # no changelog tampering
1909 self._nodemap_docket = docket
1909 self._nodemap_docket = docket
1910 index.update_nodemap_data(
1910 index.update_nodemap_data(
1911 *nodemap_data
1911 *nodemap_data
1912 ) # pytype: disable=attribute-error
1912 ) # pytype: disable=attribute-error
1913
1913
1914 def get_revlog(self):
1914 def get_revlog(self):
1915 """simple function to mirror API of other not-really-revlog API"""
1915 """simple function to mirror API of other not-really-revlog API"""
1916 return self
1916 return self
1917
1917
1918 @util.propertycache
1918 @util.propertycache
1919 def revlog_kind(self):
1919 def revlog_kind(self):
1920 return self.target[0]
1920 return self.target[0]
1921
1921
1922 @util.propertycache
1922 @util.propertycache
1923 def display_id(self):
1923 def display_id(self):
1924 """The public facing "ID" of the revlog that we use in message"""
1924 """The public facing "ID" of the revlog that we use in message"""
1925 if self.revlog_kind == KIND_FILELOG:
1925 if self.revlog_kind == KIND_FILELOG:
1926 # Reference the file without the "data/" prefix, so it is familiar
1926 # Reference the file without the "data/" prefix, so it is familiar
1927 # to the user.
1927 # to the user.
1928 return self.target[1]
1928 return self.target[1]
1929 else:
1929 else:
1930 return self.radix
1930 return self.radix
1931
1931
1932 def _datafp(self, mode=b'r'):
1932 def _datafp(self, mode=b'r'):
1933 """file object for the revlog's data file"""
1933 """file object for the revlog's data file"""
1934 return self.opener(self._datafile, mode=mode)
1934 return self.opener(self._datafile, mode=mode)
1935
1935
1936 def tiprev(self):
1936 def tiprev(self):
1937 return len(self.index) - 1
1937 return len(self.index) - 1
1938
1938
1939 def tip(self):
1939 def tip(self):
1940 return self.node(self.tiprev())
1940 return self.node(self.tiprev())
1941
1941
1942 def __contains__(self, rev):
1942 def __contains__(self, rev):
1943 return 0 <= rev < len(self)
1943 return 0 <= rev < len(self)
1944
1944
1945 def __len__(self):
1945 def __len__(self):
1946 return len(self.index)
1946 return len(self.index)
1947
1947
1948 def __iter__(self) -> Iterator[int]:
1948 def __iter__(self) -> Iterator[int]:
1949 return iter(range(len(self)))
1949 return iter(range(len(self)))
1950
1950
1951 def revs(self, start=0, stop=None):
1951 def revs(self, start=0, stop=None):
1952 """iterate over all rev in this revlog (from start to stop)"""
1952 """iterate over all rev in this revlog (from start to stop)"""
1953 return storageutil.iterrevs(len(self), start=start, stop=stop)
1953 return storageutil.iterrevs(len(self), start=start, stop=stop)
1954
1954
1955 def hasnode(self, node):
1955 def hasnode(self, node):
1956 try:
1956 try:
1957 self.rev(node)
1957 self.rev(node)
1958 return True
1958 return True
1959 except KeyError:
1959 except KeyError:
1960 return False
1960 return False
1961
1961
1962 def _candelta(self, baserev, rev):
1962 def _candelta(self, baserev, rev):
1963 """whether two revisions (baserev, rev) can be delta-ed or not"""
1963 """whether two revisions (baserev, rev) can be delta-ed or not"""
1964 # Disable delta if either rev requires a content-changing flag
1964 # Disable delta if either rev requires a content-changing flag
1965 # processor (ex. LFS). This is because such flag processor can alter
1965 # processor (ex. LFS). This is because such flag processor can alter
1966 # the rawtext content that the delta will be based on, and two clients
1966 # the rawtext content that the delta will be based on, and two clients
1967 # could have a same revlog node with different flags (i.e. different
1967 # could have a same revlog node with different flags (i.e. different
1968 # rawtext contents) and the delta could be incompatible.
1968 # rawtext contents) and the delta could be incompatible.
1969 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1969 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1970 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1970 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1971 ):
1971 ):
1972 return False
1972 return False
1973 return True
1973 return True
1974
1974
1975 def update_caches(self, transaction):
1975 def update_caches(self, transaction):
1976 """update on disk cache
1976 """update on disk cache
1977
1977
1978 If a transaction is passed, the update may be delayed to transaction
1978 If a transaction is passed, the update may be delayed to transaction
1979 commit."""
1979 commit."""
1980 if self._nodemap_file is not None:
1980 if self._nodemap_file is not None:
1981 if transaction is None:
1981 if transaction is None:
1982 nodemaputil.update_persistent_nodemap(self)
1982 nodemaputil.update_persistent_nodemap(self)
1983 else:
1983 else:
1984 nodemaputil.setup_persistent_nodemap(transaction, self)
1984 nodemaputil.setup_persistent_nodemap(transaction, self)
1985
1985
1986 def clearcaches(self, clear_persisted_data: bool = False) -> None:
1986 def clearcaches(self, clear_persisted_data: bool = False) -> None:
1987 """Clear in-memory caches"""
1987 """Clear in-memory caches"""
1988 self._chainbasecache.clear()
1988 self._chainbasecache.clear()
1989 self._inner.clear_cache()
1989 self._inner.clear_cache()
1990 self._pcache = {}
1990 self._pcache = {}
1991 self._nodemap_docket = None
1991 self._nodemap_docket = None
1992 self.index.clearcaches()
1992 self.index.clearcaches()
1993 # The python code is the one responsible for validating the docket, we
1993 # The python code is the one responsible for validating the docket, we
1994 # end up having to refresh it here.
1994 # end up having to refresh it here.
1995 use_nodemap = (
1995 use_nodemap = (
1996 not self._inline
1996 not self._inline
1997 and self._nodemap_file is not None
1997 and self._nodemap_file is not None
1998 and hasattr(self.index, 'update_nodemap_data')
1998 and hasattr(self.index, 'update_nodemap_data')
1999 )
1999 )
2000 if use_nodemap:
2000 if use_nodemap:
2001 nodemap_data = nodemaputil.persisted_data(self)
2001 nodemap_data = nodemaputil.persisted_data(self)
2002 if nodemap_data is not None:
2002 if nodemap_data is not None:
2003 self._nodemap_docket = nodemap_data[0]
2003 self._nodemap_docket = nodemap_data[0]
2004 self.index.update_nodemap_data(
2004 self.index.update_nodemap_data(
2005 *nodemap_data
2005 *nodemap_data
2006 ) # pytype: disable=attribute-error
2006 ) # pytype: disable=attribute-error
2007
2007
2008 def rev(self, node):
2008 def rev(self, node):
2009 """return the revision number associated with a <nodeid>"""
2009 """return the revision number associated with a <nodeid>"""
2010 try:
2010 try:
2011 return self.index.rev(node)
2011 return self.index.rev(node)
2012 except TypeError:
2012 except TypeError:
2013 raise
2013 raise
2014 except error.RevlogError:
2014 except error.RevlogError:
2015 # parsers.c radix tree lookup failed
2015 # parsers.c radix tree lookup failed
2016 if (
2016 if (
2017 node == self.nodeconstants.wdirid
2017 node == self.nodeconstants.wdirid
2018 or node in self.nodeconstants.wdirfilenodeids
2018 or node in self.nodeconstants.wdirfilenodeids
2019 ):
2019 ):
2020 raise error.WdirUnsupported
2020 raise error.WdirUnsupported
2021 raise error.LookupError(node, self.display_id, _(b'no node'))
2021 raise error.LookupError(node, self.display_id, _(b'no node'))
2022
2022
2023 # Accessors for index entries.
2023 # Accessors for index entries.
2024
2024
2025 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
2025 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
2026 # are flags.
2026 # are flags.
2027 def start(self, rev):
2027 def start(self, rev):
2028 return int(self.index[rev][0] >> 16)
2028 return int(self.index[rev][0] >> 16)
2029
2029
2030 def sidedata_cut_off(self, rev):
2030 def sidedata_cut_off(self, rev):
2031 sd_cut_off = self.index[rev][8]
2031 sd_cut_off = self.index[rev][8]
2032 if sd_cut_off != 0:
2032 if sd_cut_off != 0:
2033 return sd_cut_off
2033 return sd_cut_off
2034 # This is some annoying dance, because entries without sidedata
2034 # This is some annoying dance, because entries without sidedata
2035 # currently use 0 as their ofsset. (instead of previous-offset +
2035 # currently use 0 as their ofsset. (instead of previous-offset +
2036 # previous-size)
2036 # previous-size)
2037 #
2037 #
2038 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
2038 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
2039 # In the meantime, we need this.
2039 # In the meantime, we need this.
2040 while 0 <= rev:
2040 while 0 <= rev:
2041 e = self.index[rev]
2041 e = self.index[rev]
2042 if e[9] != 0:
2042 if e[9] != 0:
2043 return e[8] + e[9]
2043 return e[8] + e[9]
2044 rev -= 1
2044 rev -= 1
2045 return 0
2045 return 0
2046
2046
2047 def flags(self, rev):
2047 def flags(self, rev):
2048 return self.index[rev][0] & 0xFFFF
2048 return self.index[rev][0] & 0xFFFF
2049
2049
2050 def length(self, rev):
2050 def length(self, rev):
2051 return self.index[rev][1]
2051 return self.index[rev][1]
2052
2052
2053 def sidedata_length(self, rev):
2053 def sidedata_length(self, rev):
2054 if not self.feature_config.has_side_data:
2054 if not self.feature_config.has_side_data:
2055 return 0
2055 return 0
2056 return self.index[rev][9]
2056 return self.index[rev][9]
2057
2057
2058 def rawsize(self, rev):
2058 def rawsize(self, rev):
2059 """return the length of the uncompressed text for a given revision"""
2059 """return the length of the uncompressed text for a given revision"""
2060 l = self.index[rev][2]
2060 l = self.index[rev][2]
2061 if l >= 0:
2061 if l >= 0:
2062 return l
2062 return l
2063
2063
2064 t = self.rawdata(rev)
2064 t = self.rawdata(rev)
2065 return len(t)
2065 return len(t)
2066
2066
2067 def size(self, rev):
2067 def size(self, rev):
2068 """length of non-raw text (processed by a "read" flag processor)"""
2068 """length of non-raw text (processed by a "read" flag processor)"""
2069 # fast path: if no "read" flag processor could change the content,
2069 # fast path: if no "read" flag processor could change the content,
2070 # size is rawsize. note: ELLIPSIS is known to not change the content.
2070 # size is rawsize. note: ELLIPSIS is known to not change the content.
2071 flags = self.flags(rev)
2071 flags = self.flags(rev)
2072 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
2072 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
2073 return self.rawsize(rev)
2073 return self.rawsize(rev)
2074
2074
2075 return len(self.revision(rev))
2075 return len(self.revision(rev))
2076
2076
2077 def fast_rank(self, rev):
2077 def fast_rank(self, rev):
2078 """Return the rank of a revision if already known, or None otherwise.
2078 """Return the rank of a revision if already known, or None otherwise.
2079
2079
2080 The rank of a revision is the size of the sub-graph it defines as a
2080 The rank of a revision is the size of the sub-graph it defines as a
2081 head. Equivalently, the rank of a revision `r` is the size of the set
2081 head. Equivalently, the rank of a revision `r` is the size of the set
2082 `ancestors(r)`, `r` included.
2082 `ancestors(r)`, `r` included.
2083
2083
2084 This method returns the rank retrieved from the revlog in constant
2084 This method returns the rank retrieved from the revlog in constant
2085 time. It makes no attempt at computing unknown values for versions of
2085 time. It makes no attempt at computing unknown values for versions of
2086 the revlog which do not persist the rank.
2086 the revlog which do not persist the rank.
2087 """
2087 """
2088 rank = self.index[rev][ENTRY_RANK]
2088 rank = self.index[rev][ENTRY_RANK]
2089 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
2089 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
2090 return None
2090 return None
2091 if rev == nullrev:
2091 if rev == nullrev:
2092 return 0 # convention
2092 return 0 # convention
2093 return rank
2093 return rank
2094
2094
2095 def chainbase(self, rev):
2095 def chainbase(self, rev):
2096 base = self._chainbasecache.get(rev)
2096 base = self._chainbasecache.get(rev)
2097 if base is not None:
2097 if base is not None:
2098 return base
2098 return base
2099
2099
2100 index = self.index
2100 index = self.index
2101 iterrev = rev
2101 iterrev = rev
2102 base = index[iterrev][3]
2102 base = index[iterrev][3]
2103 while base != iterrev:
2103 while base != iterrev:
2104 iterrev = base
2104 iterrev = base
2105 base = index[iterrev][3]
2105 base = index[iterrev][3]
2106
2106
2107 self._chainbasecache[rev] = base
2107 self._chainbasecache[rev] = base
2108 return base
2108 return base
2109
2109
2110 def linkrev(self, rev):
2110 def linkrev(self, rev):
2111 return self.index[rev][4]
2111 return self.index[rev][4]
2112
2112
2113 def parentrevs(self, rev):
2113 def parentrevs(self, rev):
2114 try:
2114 try:
2115 entry = self.index[rev]
2115 entry = self.index[rev]
2116 except IndexError:
2116 except IndexError:
2117 if rev == wdirrev:
2117 if rev == wdirrev:
2118 raise error.WdirUnsupported
2118 raise error.WdirUnsupported
2119 raise
2119 raise
2120
2120
2121 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2121 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2122 return entry[6], entry[5]
2122 return entry[6], entry[5]
2123 else:
2123 else:
2124 return entry[5], entry[6]
2124 return entry[5], entry[6]
2125
2125
2126 # fast parentrevs(rev) where rev isn't filtered
2126 # fast parentrevs(rev) where rev isn't filtered
2127 _uncheckedparentrevs = parentrevs
2127 _uncheckedparentrevs = parentrevs
2128
2128
2129 def node(self, rev):
2129 def node(self, rev):
2130 try:
2130 try:
2131 return self.index[rev][7]
2131 return self.index[rev][7]
2132 except IndexError:
2132 except IndexError:
2133 if rev == wdirrev:
2133 if rev == wdirrev:
2134 raise error.WdirUnsupported
2134 raise error.WdirUnsupported
2135 raise
2135 raise
2136
2136
2137 # Derived from index values.
2137 # Derived from index values.
2138
2138
2139 def end(self, rev):
2139 def end(self, rev):
2140 return self.start(rev) + self.length(rev)
2140 return self.start(rev) + self.length(rev)
2141
2141
2142 def parents(self, node):
2142 def parents(self, node):
2143 i = self.index
2143 i = self.index
2144 d = i[self.rev(node)]
2144 d = i[self.rev(node)]
2145 # inline node() to avoid function call overhead
2145 # inline node() to avoid function call overhead
2146 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2146 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2147 return i[d[6]][7], i[d[5]][7]
2147 return i[d[6]][7], i[d[5]][7]
2148 else:
2148 else:
2149 return i[d[5]][7], i[d[6]][7]
2149 return i[d[5]][7], i[d[6]][7]
2150
2150
2151 def chainlen(self, rev):
2151 def chainlen(self, rev):
2152 return self._chaininfo(rev)[0]
2152 return self._chaininfo(rev)[0]
2153
2153
2154 def _chaininfo(self, rev):
2154 def _chaininfo(self, rev):
2155 chaininfocache = self._chaininfocache
2155 chaininfocache = self._chaininfocache
2156 if rev in chaininfocache:
2156 if rev in chaininfocache:
2157 return chaininfocache[rev]
2157 return chaininfocache[rev]
2158 index = self.index
2158 index = self.index
2159 generaldelta = self.delta_config.general_delta
2159 generaldelta = self.delta_config.general_delta
2160 iterrev = rev
2160 iterrev = rev
2161 e = index[iterrev]
2161 e = index[iterrev]
2162 clen = 0
2162 clen = 0
2163 compresseddeltalen = 0
2163 compresseddeltalen = 0
2164 while iterrev != e[3]:
2164 while iterrev != e[3]:
2165 clen += 1
2165 clen += 1
2166 compresseddeltalen += e[1]
2166 compresseddeltalen += e[1]
2167 if generaldelta:
2167 if generaldelta:
2168 iterrev = e[3]
2168 iterrev = e[3]
2169 else:
2169 else:
2170 iterrev -= 1
2170 iterrev -= 1
2171 if iterrev in chaininfocache:
2171 if iterrev in chaininfocache:
2172 t = chaininfocache[iterrev]
2172 t = chaininfocache[iterrev]
2173 clen += t[0]
2173 clen += t[0]
2174 compresseddeltalen += t[1]
2174 compresseddeltalen += t[1]
2175 break
2175 break
2176 e = index[iterrev]
2176 e = index[iterrev]
2177 else:
2177 else:
2178 # Add text length of base since decompressing that also takes
2178 # Add text length of base since decompressing that also takes
2179 # work. For cache hits the length is already included.
2179 # work. For cache hits the length is already included.
2180 compresseddeltalen += e[1]
2180 compresseddeltalen += e[1]
2181 r = (clen, compresseddeltalen)
2181 r = (clen, compresseddeltalen)
2182 chaininfocache[rev] = r
2182 chaininfocache[rev] = r
2183 return r
2183 return r
2184
2184
2185 def _deltachain(self, rev, stoprev=None):
2185 def _deltachain(self, rev, stoprev=None):
2186 return self._inner._deltachain(rev, stoprev=stoprev)
2186 return self._inner._deltachain(rev, stoprev=stoprev)
2187
2187
2188 def ancestors(self, revs, stoprev=0, inclusive=False):
2188 def ancestors(self, revs, stoprev=0, inclusive=False):
2189 """Generate the ancestors of 'revs' in reverse revision order.
2189 """Generate the ancestors of 'revs' in reverse revision order.
2190 Does not generate revs lower than stoprev.
2190 Does not generate revs lower than stoprev.
2191
2191
2192 See the documentation for ancestor.lazyancestors for more details."""
2192 See the documentation for ancestor.lazyancestors for more details."""
2193
2193
2194 # first, make sure start revisions aren't filtered
2194 # first, make sure start revisions aren't filtered
2195 revs = list(revs)
2195 revs = list(revs)
2196 checkrev = self.node
2196 checkrev = self.node
2197 for r in revs:
2197 for r in revs:
2198 checkrev(r)
2198 checkrev(r)
2199 # and we're sure ancestors aren't filtered as well
2199 # and we're sure ancestors aren't filtered as well
2200
2200
2201 if rustancestor is not None and self.index.rust_ext_compat:
2201 if rustancestor is not None and self.index.rust_ext_compat:
2202 lazyancestors = rustancestor.LazyAncestors
2202 lazyancestors = rustancestor.LazyAncestors
2203 arg = self.index
2203 arg = self.index
2204 else:
2204 else:
2205 lazyancestors = ancestor.lazyancestors
2205 lazyancestors = ancestor.lazyancestors
2206 arg = self._uncheckedparentrevs
2206 arg = self._uncheckedparentrevs
2207 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2207 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2208
2208
2209 def descendants(self, revs):
2209 def descendants(self, revs):
2210 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2210 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2211
2211
2212 def findcommonmissing(self, common=None, heads=None):
2212 def findcommonmissing(self, common=None, heads=None):
2213 """Return a tuple of the ancestors of common and the ancestors of heads
2213 """Return a tuple of the ancestors of common and the ancestors of heads
2214 that are not ancestors of common. In revset terminology, we return the
2214 that are not ancestors of common. In revset terminology, we return the
2215 tuple:
2215 tuple:
2216
2216
2217 ::common, (::heads) - (::common)
2217 ::common, (::heads) - (::common)
2218
2218
2219 The list is sorted by revision number, meaning it is
2219 The list is sorted by revision number, meaning it is
2220 topologically sorted.
2220 topologically sorted.
2221
2221
2222 'heads' and 'common' are both lists of node IDs. If heads is
2222 'heads' and 'common' are both lists of node IDs. If heads is
2223 not supplied, uses all of the revlog's heads. If common is not
2223 not supplied, uses all of the revlog's heads. If common is not
2224 supplied, uses nullid."""
2224 supplied, uses nullid."""
2225 if common is None:
2225 if common is None:
2226 common = [self.nullid]
2226 common = [self.nullid]
2227 if heads is None:
2227 if heads is None:
2228 heads = self.heads()
2228 heads = self.heads()
2229
2229
2230 common = [self.rev(n) for n in common]
2230 common = [self.rev(n) for n in common]
2231 heads = [self.rev(n) for n in heads]
2231 heads = [self.rev(n) for n in heads]
2232
2232
2233 # we want the ancestors, but inclusive
2233 # we want the ancestors, but inclusive
2234 class lazyset:
2234 class lazyset:
2235 def __init__(self, lazyvalues):
2235 def __init__(self, lazyvalues):
2236 self.addedvalues = set()
2236 self.addedvalues = set()
2237 self.lazyvalues = lazyvalues
2237 self.lazyvalues = lazyvalues
2238
2238
2239 def __contains__(self, value):
2239 def __contains__(self, value):
2240 return value in self.addedvalues or value in self.lazyvalues
2240 return value in self.addedvalues or value in self.lazyvalues
2241
2241
2242 def __iter__(self):
2242 def __iter__(self):
2243 added = self.addedvalues
2243 added = self.addedvalues
2244 for r in added:
2244 for r in added:
2245 yield r
2245 yield r
2246 for r in self.lazyvalues:
2246 for r in self.lazyvalues:
2247 if not r in added:
2247 if not r in added:
2248 yield r
2248 yield r
2249
2249
2250 def add(self, value):
2250 def add(self, value):
2251 self.addedvalues.add(value)
2251 self.addedvalues.add(value)
2252
2252
2253 def update(self, values):
2253 def update(self, values):
2254 self.addedvalues.update(values)
2254 self.addedvalues.update(values)
2255
2255
2256 has = lazyset(self.ancestors(common))
2256 has = lazyset(self.ancestors(common))
2257 has.add(nullrev)
2257 has.add(nullrev)
2258 has.update(common)
2258 has.update(common)
2259
2259
2260 # take all ancestors from heads that aren't in has
2260 # take all ancestors from heads that aren't in has
2261 missing = set()
2261 missing = set()
2262 visit = collections.deque(r for r in heads if r not in has)
2262 visit = collections.deque(r for r in heads if r not in has)
2263 while visit:
2263 while visit:
2264 r = visit.popleft()
2264 r = visit.popleft()
2265 if r in missing:
2265 if r in missing:
2266 continue
2266 continue
2267 else:
2267 else:
2268 missing.add(r)
2268 missing.add(r)
2269 for p in self.parentrevs(r):
2269 for p in self.parentrevs(r):
2270 if p not in has:
2270 if p not in has:
2271 visit.append(p)
2271 visit.append(p)
2272 missing = list(missing)
2272 missing = list(missing)
2273 missing.sort()
2273 missing.sort()
2274 return has, [self.node(miss) for miss in missing]
2274 return has, [self.node(miss) for miss in missing]
2275
2275
2276 def incrementalmissingrevs(self, common=None):
2276 def incrementalmissingrevs(self, common=None):
2277 """Return an object that can be used to incrementally compute the
2277 """Return an object that can be used to incrementally compute the
2278 revision numbers of the ancestors of arbitrary sets that are not
2278 revision numbers of the ancestors of arbitrary sets that are not
2279 ancestors of common. This is an ancestor.incrementalmissingancestors
2279 ancestors of common. This is an ancestor.incrementalmissingancestors
2280 object.
2280 object.
2281
2281
2282 'common' is a list of revision numbers. If common is not supplied, uses
2282 'common' is a list of revision numbers. If common is not supplied, uses
2283 nullrev.
2283 nullrev.
2284 """
2284 """
2285 if common is None:
2285 if common is None:
2286 common = [nullrev]
2286 common = [nullrev]
2287
2287
2288 if rustancestor is not None and self.index.rust_ext_compat:
2288 if rustancestor is not None and self.index.rust_ext_compat:
2289 return rustancestor.MissingAncestors(self.index, common)
2289 return rustancestor.MissingAncestors(self.index, common)
2290 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2290 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2291
2291
2292 def findmissingrevs(self, common=None, heads=None):
2292 def findmissingrevs(self, common=None, heads=None):
2293 """Return the revision numbers of the ancestors of heads that
2293 """Return the revision numbers of the ancestors of heads that
2294 are not ancestors of common.
2294 are not ancestors of common.
2295
2295
2296 More specifically, return a list of revision numbers corresponding to
2296 More specifically, return a list of revision numbers corresponding to
2297 nodes N such that every N satisfies the following constraints:
2297 nodes N such that every N satisfies the following constraints:
2298
2298
2299 1. N is an ancestor of some node in 'heads'
2299 1. N is an ancestor of some node in 'heads'
2300 2. N is not an ancestor of any node in 'common'
2300 2. N is not an ancestor of any node in 'common'
2301
2301
2302 The list is sorted by revision number, meaning it is
2302 The list is sorted by revision number, meaning it is
2303 topologically sorted.
2303 topologically sorted.
2304
2304
2305 'heads' and 'common' are both lists of revision numbers. If heads is
2305 'heads' and 'common' are both lists of revision numbers. If heads is
2306 not supplied, uses all of the revlog's heads. If common is not
2306 not supplied, uses all of the revlog's heads. If common is not
2307 supplied, uses nullid."""
2307 supplied, uses nullid."""
2308 if common is None:
2308 if common is None:
2309 common = [nullrev]
2309 common = [nullrev]
2310 if heads is None:
2310 if heads is None:
2311 heads = self.headrevs()
2311 heads = self.headrevs()
2312
2312
2313 inc = self.incrementalmissingrevs(common=common)
2313 inc = self.incrementalmissingrevs(common=common)
2314 return inc.missingancestors(heads)
2314 return inc.missingancestors(heads)
2315
2315
2316 def findmissing(self, common=None, heads=None):
2316 def findmissing(self, common=None, heads=None):
2317 """Return the ancestors of heads that are not ancestors of common.
2317 """Return the ancestors of heads that are not ancestors of common.
2318
2318
2319 More specifically, return a list of nodes N such that every N
2319 More specifically, return a list of nodes N such that every N
2320 satisfies the following constraints:
2320 satisfies the following constraints:
2321
2321
2322 1. N is an ancestor of some node in 'heads'
2322 1. N is an ancestor of some node in 'heads'
2323 2. N is not an ancestor of any node in 'common'
2323 2. N is not an ancestor of any node in 'common'
2324
2324
2325 The list is sorted by revision number, meaning it is
2325 The list is sorted by revision number, meaning it is
2326 topologically sorted.
2326 topologically sorted.
2327
2327
2328 'heads' and 'common' are both lists of node IDs. If heads is
2328 'heads' and 'common' are both lists of node IDs. If heads is
2329 not supplied, uses all of the revlog's heads. If common is not
2329 not supplied, uses all of the revlog's heads. If common is not
2330 supplied, uses nullid."""
2330 supplied, uses nullid."""
2331 if common is None:
2331 if common is None:
2332 common = [self.nullid]
2332 common = [self.nullid]
2333 if heads is None:
2333 if heads is None:
2334 heads = self.heads()
2334 heads = self.heads()
2335
2335
2336 common = [self.rev(n) for n in common]
2336 common = [self.rev(n) for n in common]
2337 heads = [self.rev(n) for n in heads]
2337 heads = [self.rev(n) for n in heads]
2338
2338
2339 inc = self.incrementalmissingrevs(common=common)
2339 inc = self.incrementalmissingrevs(common=common)
2340 return [self.node(r) for r in inc.missingancestors(heads)]
2340 return [self.node(r) for r in inc.missingancestors(heads)]
2341
2341
2342 def nodesbetween(self, roots=None, heads=None):
2342 def nodesbetween(self, roots=None, heads=None):
2343 """Return a topological path from 'roots' to 'heads'.
2343 """Return a topological path from 'roots' to 'heads'.
2344
2344
2345 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2345 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2346 topologically sorted list of all nodes N that satisfy both of
2346 topologically sorted list of all nodes N that satisfy both of
2347 these constraints:
2347 these constraints:
2348
2348
2349 1. N is a descendant of some node in 'roots'
2349 1. N is a descendant of some node in 'roots'
2350 2. N is an ancestor of some node in 'heads'
2350 2. N is an ancestor of some node in 'heads'
2351
2351
2352 Every node is considered to be both a descendant and an ancestor
2352 Every node is considered to be both a descendant and an ancestor
2353 of itself, so every reachable node in 'roots' and 'heads' will be
2353 of itself, so every reachable node in 'roots' and 'heads' will be
2354 included in 'nodes'.
2354 included in 'nodes'.
2355
2355
2356 'outroots' is the list of reachable nodes in 'roots', i.e., the
2356 'outroots' is the list of reachable nodes in 'roots', i.e., the
2357 subset of 'roots' that is returned in 'nodes'. Likewise,
2357 subset of 'roots' that is returned in 'nodes'. Likewise,
2358 'outheads' is the subset of 'heads' that is also in 'nodes'.
2358 'outheads' is the subset of 'heads' that is also in 'nodes'.
2359
2359
2360 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2360 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2361 unspecified, uses nullid as the only root. If 'heads' is
2361 unspecified, uses nullid as the only root. If 'heads' is
2362 unspecified, uses list of all of the revlog's heads."""
2362 unspecified, uses list of all of the revlog's heads."""
2363 nonodes = ([], [], [])
2363 nonodes = ([], [], [])
2364 if roots is not None:
2364 if roots is not None:
2365 roots = list(roots)
2365 roots = list(roots)
2366 if not roots:
2366 if not roots:
2367 return nonodes
2367 return nonodes
2368 lowestrev = min([self.rev(n) for n in roots])
2368 lowestrev = min([self.rev(n) for n in roots])
2369 else:
2369 else:
2370 roots = [self.nullid] # Everybody's a descendant of nullid
2370 roots = [self.nullid] # Everybody's a descendant of nullid
2371 lowestrev = nullrev
2371 lowestrev = nullrev
2372 if (lowestrev == nullrev) and (heads is None):
2372 if (lowestrev == nullrev) and (heads is None):
2373 # We want _all_ the nodes!
2373 # We want _all_ the nodes!
2374 return (
2374 return (
2375 [self.node(r) for r in self],
2375 [self.node(r) for r in self],
2376 [self.nullid],
2376 [self.nullid],
2377 list(self.heads()),
2377 list(self.heads()),
2378 )
2378 )
2379 if heads is None:
2379 if heads is None:
2380 # All nodes are ancestors, so the latest ancestor is the last
2380 # All nodes are ancestors, so the latest ancestor is the last
2381 # node.
2381 # node.
2382 highestrev = len(self) - 1
2382 highestrev = len(self) - 1
2383 # Set ancestors to None to signal that every node is an ancestor.
2383 # Set ancestors to None to signal that every node is an ancestor.
2384 ancestors = None
2384 ancestors = None
2385 # Set heads to an empty dictionary for later discovery of heads
2385 # Set heads to an empty dictionary for later discovery of heads
2386 heads = {}
2386 heads = {}
2387 else:
2387 else:
2388 heads = list(heads)
2388 heads = list(heads)
2389 if not heads:
2389 if not heads:
2390 return nonodes
2390 return nonodes
2391 ancestors = set()
2391 ancestors = set()
2392 # Turn heads into a dictionary so we can remove 'fake' heads.
2392 # Turn heads into a dictionary so we can remove 'fake' heads.
2393 # Also, later we will be using it to filter out the heads we can't
2393 # Also, later we will be using it to filter out the heads we can't
2394 # find from roots.
2394 # find from roots.
2395 heads = dict.fromkeys(heads, False)
2395 heads = dict.fromkeys(heads, False)
2396 # Start at the top and keep marking parents until we're done.
2396 # Start at the top and keep marking parents until we're done.
2397 nodestotag = set(heads)
2397 nodestotag = set(heads)
2398 # Remember where the top was so we can use it as a limit later.
2398 # Remember where the top was so we can use it as a limit later.
2399 highestrev = max([self.rev(n) for n in nodestotag])
2399 highestrev = max([self.rev(n) for n in nodestotag])
2400 while nodestotag:
2400 while nodestotag:
2401 # grab a node to tag
2401 # grab a node to tag
2402 n = nodestotag.pop()
2402 n = nodestotag.pop()
2403 # Never tag nullid
2403 # Never tag nullid
2404 if n == self.nullid:
2404 if n == self.nullid:
2405 continue
2405 continue
2406 # A node's revision number represents its place in a
2406 # A node's revision number represents its place in a
2407 # topologically sorted list of nodes.
2407 # topologically sorted list of nodes.
2408 r = self.rev(n)
2408 r = self.rev(n)
2409 if r >= lowestrev:
2409 if r >= lowestrev:
2410 if n not in ancestors:
2410 if n not in ancestors:
2411 # If we are possibly a descendant of one of the roots
2411 # If we are possibly a descendant of one of the roots
2412 # and we haven't already been marked as an ancestor
2412 # and we haven't already been marked as an ancestor
2413 ancestors.add(n) # Mark as ancestor
2413 ancestors.add(n) # Mark as ancestor
2414 # Add non-nullid parents to list of nodes to tag.
2414 # Add non-nullid parents to list of nodes to tag.
2415 nodestotag.update(
2415 nodestotag.update(
2416 [p for p in self.parents(n) if p != self.nullid]
2416 [p for p in self.parents(n) if p != self.nullid]
2417 )
2417 )
2418 elif n in heads: # We've seen it before, is it a fake head?
2418 elif n in heads: # We've seen it before, is it a fake head?
2419 # So it is, real heads should not be the ancestors of
2419 # So it is, real heads should not be the ancestors of
2420 # any other heads.
2420 # any other heads.
2421 heads.pop(n)
2421 heads.pop(n)
2422 if not ancestors:
2422 if not ancestors:
2423 return nonodes
2423 return nonodes
2424 # Now that we have our set of ancestors, we want to remove any
2424 # Now that we have our set of ancestors, we want to remove any
2425 # roots that are not ancestors.
2425 # roots that are not ancestors.
2426
2426
2427 # If one of the roots was nullid, everything is included anyway.
2427 # If one of the roots was nullid, everything is included anyway.
2428 if lowestrev > nullrev:
2428 if lowestrev > nullrev:
2429 # But, since we weren't, let's recompute the lowest rev to not
2429 # But, since we weren't, let's recompute the lowest rev to not
2430 # include roots that aren't ancestors.
2430 # include roots that aren't ancestors.
2431
2431
2432 # Filter out roots that aren't ancestors of heads
2432 # Filter out roots that aren't ancestors of heads
2433 roots = [root for root in roots if root in ancestors]
2433 roots = [root for root in roots if root in ancestors]
2434 # Recompute the lowest revision
2434 # Recompute the lowest revision
2435 if roots:
2435 if roots:
2436 lowestrev = min([self.rev(root) for root in roots])
2436 lowestrev = min([self.rev(root) for root in roots])
2437 else:
2437 else:
2438 # No more roots? Return empty list
2438 # No more roots? Return empty list
2439 return nonodes
2439 return nonodes
2440 else:
2440 else:
2441 # We are descending from nullid, and don't need to care about
2441 # We are descending from nullid, and don't need to care about
2442 # any other roots.
2442 # any other roots.
2443 lowestrev = nullrev
2443 lowestrev = nullrev
2444 roots = [self.nullid]
2444 roots = [self.nullid]
2445 # Transform our roots list into a set.
2445 # Transform our roots list into a set.
2446 descendants = set(roots)
2446 descendants = set(roots)
2447 # Also, keep the original roots so we can filter out roots that aren't
2447 # Also, keep the original roots so we can filter out roots that aren't
2448 # 'real' roots (i.e. are descended from other roots).
2448 # 'real' roots (i.e. are descended from other roots).
2449 roots = descendants.copy()
2449 roots = descendants.copy()
2450 # Our topologically sorted list of output nodes.
2450 # Our topologically sorted list of output nodes.
2451 orderedout = []
2451 orderedout = []
2452 # Don't start at nullid since we don't want nullid in our output list,
2452 # Don't start at nullid since we don't want nullid in our output list,
2453 # and if nullid shows up in descendants, empty parents will look like
2453 # and if nullid shows up in descendants, empty parents will look like
2454 # they're descendants.
2454 # they're descendants.
2455 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2455 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2456 n = self.node(r)
2456 n = self.node(r)
2457 isdescendant = False
2457 isdescendant = False
2458 if lowestrev == nullrev: # Everybody is a descendant of nullid
2458 if lowestrev == nullrev: # Everybody is a descendant of nullid
2459 isdescendant = True
2459 isdescendant = True
2460 elif n in descendants:
2460 elif n in descendants:
2461 # n is already a descendant
2461 # n is already a descendant
2462 isdescendant = True
2462 isdescendant = True
2463 # This check only needs to be done here because all the roots
2463 # This check only needs to be done here because all the roots
2464 # will start being marked is descendants before the loop.
2464 # will start being marked is descendants before the loop.
2465 if n in roots:
2465 if n in roots:
2466 # If n was a root, check if it's a 'real' root.
2466 # If n was a root, check if it's a 'real' root.
2467 p = tuple(self.parents(n))
2467 p = tuple(self.parents(n))
2468 # If any of its parents are descendants, it's not a root.
2468 # If any of its parents are descendants, it's not a root.
2469 if (p[0] in descendants) or (p[1] in descendants):
2469 if (p[0] in descendants) or (p[1] in descendants):
2470 roots.remove(n)
2470 roots.remove(n)
2471 else:
2471 else:
2472 p = tuple(self.parents(n))
2472 p = tuple(self.parents(n))
2473 # A node is a descendant if either of its parents are
2473 # A node is a descendant if either of its parents are
2474 # descendants. (We seeded the dependents list with the roots
2474 # descendants. (We seeded the dependents list with the roots
2475 # up there, remember?)
2475 # up there, remember?)
2476 if (p[0] in descendants) or (p[1] in descendants):
2476 if (p[0] in descendants) or (p[1] in descendants):
2477 descendants.add(n)
2477 descendants.add(n)
2478 isdescendant = True
2478 isdescendant = True
2479 if isdescendant and ((ancestors is None) or (n in ancestors)):
2479 if isdescendant and ((ancestors is None) or (n in ancestors)):
2480 # Only include nodes that are both descendants and ancestors.
2480 # Only include nodes that are both descendants and ancestors.
2481 orderedout.append(n)
2481 orderedout.append(n)
2482 if (ancestors is not None) and (n in heads):
2482 if (ancestors is not None) and (n in heads):
2483 # We're trying to figure out which heads are reachable
2483 # We're trying to figure out which heads are reachable
2484 # from roots.
2484 # from roots.
2485 # Mark this head as having been reached
2485 # Mark this head as having been reached
2486 heads[n] = True
2486 heads[n] = True
2487 elif ancestors is None:
2487 elif ancestors is None:
2488 # Otherwise, we're trying to discover the heads.
2488 # Otherwise, we're trying to discover the heads.
2489 # Assume this is a head because if it isn't, the next step
2489 # Assume this is a head because if it isn't, the next step
2490 # will eventually remove it.
2490 # will eventually remove it.
2491 heads[n] = True
2491 heads[n] = True
2492 # But, obviously its parents aren't.
2492 # But, obviously its parents aren't.
2493 for p in self.parents(n):
2493 for p in self.parents(n):
2494 heads.pop(p, None)
2494 heads.pop(p, None)
2495 heads = [head for head, flag in heads.items() if flag]
2495 heads = [head for head, flag in heads.items() if flag]
2496 roots = list(roots)
2496 roots = list(roots)
2497 assert orderedout
2497 assert orderedout
2498 assert roots
2498 assert roots
2499 assert heads
2499 assert heads
2500 return (orderedout, roots, heads)
2500 return (orderedout, roots, heads)
2501
2501
2502 def headrevs(self, revs=None, stop_rev=None):
2502 def headrevs(self, revs=None, stop_rev=None):
2503 if revs is None:
2503 if revs is None:
2504 return self.index.headrevs(None, stop_rev)
2504 return self.index.headrevs(None, stop_rev)
2505 if rustdagop is not None and self.index.rust_ext_compat:
2505 if rustdagop is not None and self.index.rust_ext_compat:
2506 return rustdagop.headrevs(self.index, revs)
2506 return rustdagop.headrevs(self.index, revs)
2507 return dagop.headrevs(revs, self._uncheckedparentrevs)
2507 return dagop.headrevs(revs, self._uncheckedparentrevs)
2508
2508
2509 def headrevsdiff(self, start, stop):
2509 def headrevsdiff(self, start, stop):
2510 try:
2510 try:
2511 return self.index.headrevsdiff(
2511 return self.index.headrevsdiff(
2512 start, stop
2512 start, stop
2513 ) # pytype: disable=attribute-error
2513 ) # pytype: disable=attribute-error
2514 except AttributeError:
2514 except AttributeError:
2515 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2515 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2516
2516
2517 def computephases(self, roots):
2517 def computephases(self, roots):
2518 return self.index.computephasesmapsets(
2518 return self.index.computephasesmapsets(
2519 roots
2519 roots
2520 ) # pytype: disable=attribute-error
2520 ) # pytype: disable=attribute-error
2521
2521
2522 def _head_node_ids(self):
2522 def _head_node_ids(self):
2523 try:
2523 try:
2524 return self.index.head_node_ids() # pytype: disable=attribute-error
2524 return self.index.head_node_ids() # pytype: disable=attribute-error
2525 except AttributeError:
2525 except AttributeError:
2526 return [self.node(r) for r in self.headrevs()]
2526 return [self.node(r) for r in self.headrevs()]
2527
2527
2528 def heads(self, start=None, stop=None):
2528 def heads(self, start=None, stop=None):
2529 """return the list of all nodes that have no children
2529 """return the list of all nodes that have no children
2530
2530
2531 if start is specified, only heads that are descendants of
2531 if start is specified, only heads that are descendants of
2532 start will be returned
2532 start will be returned
2533 if stop is specified, it will consider all the revs from stop
2533 if stop is specified, it will consider all the revs from stop
2534 as if they had no children
2534 as if they had no children
2535 """
2535 """
2536 if start is None and stop is None:
2536 if start is None and stop is None:
2537 if not len(self):
2537 if not len(self):
2538 return [self.nullid]
2538 return [self.nullid]
2539 return self._head_node_ids()
2539 return self._head_node_ids()
2540 if start is None:
2540 if start is None:
2541 start = nullrev
2541 start = nullrev
2542 else:
2542 else:
2543 start = self.rev(start)
2543 start = self.rev(start)
2544
2544
2545 stoprevs = {self.rev(n) for n in stop or []}
2545 stoprevs = {self.rev(n) for n in stop or []}
2546
2546
2547 revs = dagop.headrevssubset(
2547 revs = dagop.headrevssubset(
2548 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2548 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2549 )
2549 )
2550
2550
2551 return [self.node(rev) for rev in revs]
2551 return [self.node(rev) for rev in revs]
2552
2552
2553 def diffheads(self, start, stop):
2553 def diffheads(self, start, stop):
2554 """return the nodes that make up the difference between
2554 """return the nodes that make up the difference between
2555 heads of revs before `start` and heads of revs before `stop`"""
2555 heads of revs before `start` and heads of revs before `stop`"""
2556 removed, added = self.headrevsdiff(start, stop)
2556 removed, added = self.headrevsdiff(start, stop)
2557 return [self.node(r) for r in removed], [self.node(r) for r in added]
2557 return [self.node(r) for r in removed], [self.node(r) for r in added]
2558
2558
2559 def children(self, node):
2559 def children(self, node):
2560 """find the children of a given node"""
2560 """find the children of a given node"""
2561 c = []
2561 c = []
2562 p = self.rev(node)
2562 p = self.rev(node)
2563 for r in self.revs(start=p + 1):
2563 for r in self.revs(start=p + 1):
2564 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2564 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2565 if prevs:
2565 if prevs:
2566 for pr in prevs:
2566 for pr in prevs:
2567 if pr == p:
2567 if pr == p:
2568 c.append(self.node(r))
2568 c.append(self.node(r))
2569 elif p == nullrev:
2569 elif p == nullrev:
2570 c.append(self.node(r))
2570 c.append(self.node(r))
2571 return c
2571 return c
2572
2572
2573 def commonancestorsheads(self, a, b):
2573 def commonancestorsheads(self, a, b):
2574 """calculate all the heads of the common ancestors of nodes a and b"""
2574 """calculate all the heads of the common ancestors of nodes a and b"""
2575 a, b = self.rev(a), self.rev(b)
2575 a, b = self.rev(a), self.rev(b)
2576 ancs = self._commonancestorsheads(a, b)
2576 ancs = self._commonancestorsheads(a, b)
2577 return pycompat.maplist(self.node, ancs)
2577 return pycompat.maplist(self.node, ancs)
2578
2578
2579 def _commonancestorsheads(self, *revs):
2579 def _commonancestorsheads(self, *revs):
2580 """calculate all the heads of the common ancestors of revs"""
2580 """calculate all the heads of the common ancestors of revs"""
2581 try:
2581 try:
2582 ancs = self.index.commonancestorsheads(
2582 ancs = self.index.commonancestorsheads(
2583 *revs
2583 *revs
2584 ) # pytype: disable=attribute-error
2584 ) # pytype: disable=attribute-error
2585 except (AttributeError, OverflowError): # C implementation failed
2585 except (AttributeError, OverflowError): # C implementation failed
2586 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2586 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2587 return ancs
2587 return ancs
2588
2588
2589 def isancestor(self, a, b):
2589 def isancestor(self, a, b):
2590 """return True if node a is an ancestor of node b
2590 """return True if node a is an ancestor of node b
2591
2591
2592 A revision is considered an ancestor of itself."""
2592 A revision is considered an ancestor of itself."""
2593 a, b = self.rev(a), self.rev(b)
2593 a, b = self.rev(a), self.rev(b)
2594 return self.isancestorrev(a, b)
2594 return self.isancestorrev(a, b)
2595
2595
2596 def isancestorrev(self, a, b):
2596 def isancestorrev(self, a, b):
2597 """return True if revision a is an ancestor of revision b
2597 """return True if revision a is an ancestor of revision b
2598
2598
2599 A revision is considered an ancestor of itself.
2599 A revision is considered an ancestor of itself.
2600
2600
2601 The implementation of this is trivial but the use of
2601 The implementation of this is trivial but the use of
2602 reachableroots is not."""
2602 reachableroots is not."""
2603 if a == nullrev:
2603 if a == nullrev:
2604 return True
2604 return True
2605 elif a == b:
2605 elif a == b:
2606 return True
2606 return True
2607 elif a > b:
2607 elif a > b:
2608 return False
2608 return False
2609 return bool(self.reachableroots(a, [b], [a], includepath=False))
2609 return bool(self.reachableroots(a, [b], [a], includepath=False))
2610
2610
2611 def reachableroots(self, minroot, heads, roots, includepath=False):
2611 def reachableroots(self, minroot, heads, roots, includepath=False):
2612 """return (heads(::(<roots> and <roots>::<heads>)))
2612 """return (heads(::(<roots> and <roots>::<heads>)))
2613
2613
2614 If includepath is True, return (<roots>::<heads>)."""
2614 If includepath is True, return (<roots>::<heads>)."""
2615 try:
2615 try:
2616 return self.index.reachableroots2(
2616 return self.index.reachableroots2(
2617 minroot, heads, roots, includepath
2617 minroot, heads, roots, includepath
2618 ) # pytype: disable=attribute-error
2618 ) # pytype: disable=attribute-error
2619 except AttributeError:
2619 except AttributeError:
2620 return dagop._reachablerootspure(
2620 return dagop._reachablerootspure(
2621 self.parentrevs, minroot, roots, heads, includepath
2621 self.parentrevs, minroot, roots, heads, includepath
2622 )
2622 )
2623
2623
2624 def ancestor(self, a, b):
2624 def ancestor(self, a, b):
2625 """calculate the "best" common ancestor of nodes a and b"""
2625 """calculate the "best" common ancestor of nodes a and b"""
2626
2626
2627 a, b = self.rev(a), self.rev(b)
2627 a, b = self.rev(a), self.rev(b)
2628 try:
2628 try:
2629 ancs = self.index.ancestors(a, b) # pytype: disable=attribute-error
2629 ancs = self.index.ancestors(a, b) # pytype: disable=attribute-error
2630 except (AttributeError, OverflowError):
2630 except (AttributeError, OverflowError):
2631 ancs = ancestor.ancestors(self.parentrevs, a, b)
2631 ancs = ancestor.ancestors(self.parentrevs, a, b)
2632 if ancs:
2632 if ancs:
2633 # choose a consistent winner when there's a tie
2633 # choose a consistent winner when there's a tie
2634 return min(map(self.node, ancs))
2634 return min(map(self.node, ancs))
2635 return self.nullid
2635 return self.nullid
2636
2636
2637 def _match(self, id):
2637 def _match(self, id):
2638 if isinstance(id, int):
2638 if isinstance(id, int):
2639 # rev
2639 # rev
2640 return self.node(id)
2640 return self.node(id)
2641 if len(id) == self.nodeconstants.nodelen:
2641 if len(id) == self.nodeconstants.nodelen:
2642 # possibly a binary node
2642 # possibly a binary node
2643 # odds of a binary node being all hex in ASCII are 1 in 10**25
2643 # odds of a binary node being all hex in ASCII are 1 in 10**25
2644 try:
2644 try:
2645 node = id
2645 node = id
2646 self.rev(node) # quick search the index
2646 self.rev(node) # quick search the index
2647 return node
2647 return node
2648 except error.LookupError:
2648 except error.LookupError:
2649 pass # may be partial hex id
2649 pass # may be partial hex id
2650 try:
2650 try:
2651 # str(rev)
2651 # str(rev)
2652 rev = int(id)
2652 rev = int(id)
2653 if b"%d" % rev != id:
2653 if b"%d" % rev != id:
2654 raise ValueError
2654 raise ValueError
2655 if rev < 0:
2655 if rev < 0:
2656 rev = len(self) + rev
2656 rev = len(self) + rev
2657 if rev < 0 or rev >= len(self):
2657 if rev < 0 or rev >= len(self):
2658 raise ValueError
2658 raise ValueError
2659 return self.node(rev)
2659 return self.node(rev)
2660 except (ValueError, OverflowError):
2660 except (ValueError, OverflowError):
2661 pass
2661 pass
2662 if len(id) == 2 * self.nodeconstants.nodelen:
2662 if len(id) == 2 * self.nodeconstants.nodelen:
2663 try:
2663 try:
2664 # a full hex nodeid?
2664 # a full hex nodeid?
2665 node = bin(id)
2665 node = bin(id)
2666 self.rev(node)
2666 self.rev(node)
2667 return node
2667 return node
2668 except (binascii.Error, error.LookupError):
2668 except (binascii.Error, error.LookupError):
2669 pass
2669 pass
2670
2670
2671 def _partialmatch(self, id):
2671 def _partialmatch(self, id):
2672 # we don't care wdirfilenodeids as they should be always full hash
2672 # we don't care wdirfilenodeids as they should be always full hash
2673 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2673 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2674 ambiguous = False
2674 ambiguous = False
2675 try:
2675 try:
2676 partial = self.index.partialmatch(
2676 partial = self.index.partialmatch(
2677 id
2677 id
2678 ) # pytype: disable=attribute-error
2678 ) # pytype: disable=attribute-error
2679 if partial and self.hasnode(partial):
2679 if partial and self.hasnode(partial):
2680 if maybewdir:
2680 if maybewdir:
2681 # single 'ff...' match in radix tree, ambiguous with wdir
2681 # single 'ff...' match in radix tree, ambiguous with wdir
2682 ambiguous = True
2682 ambiguous = True
2683 else:
2683 else:
2684 return partial
2684 return partial
2685 elif maybewdir:
2685 elif maybewdir:
2686 # no 'ff...' match in radix tree, wdir identified
2686 # no 'ff...' match in radix tree, wdir identified
2687 raise error.WdirUnsupported
2687 raise error.WdirUnsupported
2688 else:
2688 else:
2689 return None
2689 return None
2690 except error.RevlogError:
2690 except error.RevlogError:
2691 # parsers.c radix tree lookup gave multiple matches
2691 # parsers.c radix tree lookup gave multiple matches
2692 # fast path: for unfiltered changelog, radix tree is accurate
2692 # fast path: for unfiltered changelog, radix tree is accurate
2693 if not getattr(self, 'filteredrevs', None):
2693 if not getattr(self, 'filteredrevs', None):
2694 ambiguous = True
2694 ambiguous = True
2695 # fall through to slow path that filters hidden revisions
2695 # fall through to slow path that filters hidden revisions
2696 except (AttributeError, ValueError):
2696 except (AttributeError, ValueError):
2697 # we are pure python, or key is not hex
2697 # we are pure python, or key is not hex
2698 pass
2698 pass
2699 if ambiguous:
2699 if ambiguous:
2700 raise error.AmbiguousPrefixLookupError(
2700 raise error.AmbiguousPrefixLookupError(
2701 id, self.display_id, _(b'ambiguous identifier')
2701 id, self.display_id, _(b'ambiguous identifier')
2702 )
2702 )
2703
2703
2704 if id in self._pcache:
2704 if id in self._pcache:
2705 return self._pcache[id]
2705 return self._pcache[id]
2706
2706
2707 if len(id) <= 40:
2707 if len(id) <= 40:
2708 # hex(node)[:...]
2708 # hex(node)[:...]
2709 l = len(id) // 2 * 2 # grab an even number of digits
2709 l = len(id) // 2 * 2 # grab an even number of digits
2710 try:
2710 try:
2711 # we're dropping the last digit, so let's check that it's hex,
2711 # we're dropping the last digit, so let's check that it's hex,
2712 # to avoid the expensive computation below if it's not
2712 # to avoid the expensive computation below if it's not
2713 if len(id) % 2 > 0:
2713 if len(id) % 2 > 0:
2714 if not (id[-1] in hexdigits):
2714 if not (id[-1] in hexdigits):
2715 return None
2715 return None
2716 prefix = bin(id[:l])
2716 prefix = bin(id[:l])
2717 except binascii.Error:
2717 except binascii.Error:
2718 pass
2718 pass
2719 else:
2719 else:
2720 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2720 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2721 nl = [
2721 nl = [
2722 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2722 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2723 ]
2723 ]
2724 if self.nodeconstants.nullhex.startswith(id):
2724 if self.nodeconstants.nullhex.startswith(id):
2725 nl.append(self.nullid)
2725 nl.append(self.nullid)
2726 if len(nl) > 0:
2726 if len(nl) > 0:
2727 if len(nl) == 1 and not maybewdir:
2727 if len(nl) == 1 and not maybewdir:
2728 self._pcache[id] = nl[0]
2728 self._pcache[id] = nl[0]
2729 return nl[0]
2729 return nl[0]
2730 raise error.AmbiguousPrefixLookupError(
2730 raise error.AmbiguousPrefixLookupError(
2731 id, self.display_id, _(b'ambiguous identifier')
2731 id, self.display_id, _(b'ambiguous identifier')
2732 )
2732 )
2733 if maybewdir:
2733 if maybewdir:
2734 raise error.WdirUnsupported
2734 raise error.WdirUnsupported
2735 return None
2735 return None
2736
2736
2737 def lookup(self, id):
2737 def lookup(self, id):
2738 """locate a node based on:
2738 """locate a node based on:
2739 - revision number or str(revision number)
2739 - revision number or str(revision number)
2740 - nodeid or subset of hex nodeid
2740 - nodeid or subset of hex nodeid
2741 """
2741 """
2742 n = self._match(id)
2742 n = self._match(id)
2743 if n is not None:
2743 if n is not None:
2744 return n
2744 return n
2745 n = self._partialmatch(id)
2745 n = self._partialmatch(id)
2746 if n:
2746 if n:
2747 return n
2747 return n
2748
2748
2749 raise error.LookupError(id, self.display_id, _(b'no match found'))
2749 raise error.LookupError(id, self.display_id, _(b'no match found'))
2750
2750
2751 def shortest(self, node, minlength=1):
2751 def shortest(self, node, minlength=1):
2752 """Find the shortest unambiguous prefix that matches node."""
2752 """Find the shortest unambiguous prefix that matches node."""
2753
2753
2754 def isvalid(prefix):
2754 def isvalid(prefix):
2755 try:
2755 try:
2756 matchednode = self._partialmatch(prefix)
2756 matchednode = self._partialmatch(prefix)
2757 except error.AmbiguousPrefixLookupError:
2757 except error.AmbiguousPrefixLookupError:
2758 return False
2758 return False
2759 except error.WdirUnsupported:
2759 except error.WdirUnsupported:
2760 # single 'ff...' match
2760 # single 'ff...' match
2761 return True
2761 return True
2762 if matchednode is None:
2762 if matchednode is None:
2763 raise error.LookupError(node, self.display_id, _(b'no node'))
2763 raise error.LookupError(node, self.display_id, _(b'no node'))
2764 return True
2764 return True
2765
2765
2766 def maybewdir(prefix):
2766 def maybewdir(prefix):
2767 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2767 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2768
2768
2769 hexnode = hex(node)
2769 hexnode = hex(node)
2770
2770
2771 def disambiguate(hexnode, minlength):
2771 def disambiguate(hexnode, minlength):
2772 """Disambiguate against wdirid."""
2772 """Disambiguate against wdirid."""
2773 for length in range(minlength, len(hexnode) + 1):
2773 for length in range(minlength, len(hexnode) + 1):
2774 prefix = hexnode[:length]
2774 prefix = hexnode[:length]
2775 if not maybewdir(prefix):
2775 if not maybewdir(prefix):
2776 return prefix
2776 return prefix
2777
2777
2778 if not getattr(self, 'filteredrevs', None):
2778 if not getattr(self, 'filteredrevs', None):
2779 try:
2779 try:
2780 shortest = self.index.shortest(
2780 shortest = self.index.shortest(
2781 node
2781 node
2782 ) # pytype: disable=attribute-error
2782 ) # pytype: disable=attribute-error
2783 length = max(shortest, minlength)
2783 length = max(shortest, minlength)
2784 return disambiguate(hexnode, length)
2784 return disambiguate(hexnode, length)
2785 except error.RevlogError:
2785 except error.RevlogError:
2786 if node != self.nodeconstants.wdirid:
2786 if node != self.nodeconstants.wdirid:
2787 raise error.LookupError(
2787 raise error.LookupError(
2788 node, self.display_id, _(b'no node')
2788 node, self.display_id, _(b'no node')
2789 )
2789 )
2790 except AttributeError:
2790 except AttributeError:
2791 # Fall through to pure code
2791 # Fall through to pure code
2792 pass
2792 pass
2793
2793
2794 if node == self.nodeconstants.wdirid:
2794 if node == self.nodeconstants.wdirid:
2795 for length in range(minlength, len(hexnode) + 1):
2795 for length in range(minlength, len(hexnode) + 1):
2796 prefix = hexnode[:length]
2796 prefix = hexnode[:length]
2797 if isvalid(prefix):
2797 if isvalid(prefix):
2798 return prefix
2798 return prefix
2799
2799
2800 for length in range(minlength, len(hexnode) + 1):
2800 for length in range(minlength, len(hexnode) + 1):
2801 prefix = hexnode[:length]
2801 prefix = hexnode[:length]
2802 if isvalid(prefix):
2802 if isvalid(prefix):
2803 return disambiguate(hexnode, length)
2803 return disambiguate(hexnode, length)
2804
2804
2805 def cmp(self, node, text):
2805 def cmp(self, node, text):
2806 """compare text with a given file revision
2806 """compare text with a given file revision
2807
2807
2808 returns True if text is different than what is stored.
2808 returns True if text is different than what is stored.
2809 """
2809 """
2810 p1, p2 = self.parents(node)
2810 p1, p2 = self.parents(node)
2811 return storageutil.hashrevisionsha1(text, p1, p2) != node
2811 return storageutil.hashrevisionsha1(text, p1, p2) != node
2812
2812
2813 def deltaparent(self, rev):
2813 def deltaparent(self, rev):
2814 """return deltaparent of the given revision"""
2814 """return deltaparent of the given revision"""
2815 base = self.index[rev][3]
2815 base = self.index[rev][3]
2816 if base == rev:
2816 if base == rev:
2817 return nullrev
2817 return nullrev
2818 elif self.delta_config.general_delta:
2818 elif self.delta_config.general_delta:
2819 return base
2819 return base
2820 else:
2820 else:
2821 return rev - 1
2821 return rev - 1
2822
2822
2823 def issnapshot(self, rev):
2823 def issnapshot(self, rev):
2824 """tells whether rev is a snapshot"""
2824 """tells whether rev is a snapshot"""
2825 ret = self._inner.issnapshot(rev)
2825 ret = self._inner.issnapshot(rev)
2826 self.issnapshot = self._inner.issnapshot
2826 self.issnapshot = self._inner.issnapshot
2827 return ret
2827 return ret
2828
2828
2829 def snapshotdepth(self, rev):
2829 def snapshotdepth(self, rev):
2830 """number of snapshot in the chain before this one"""
2830 """number of snapshot in the chain before this one"""
2831 if not self.issnapshot(rev):
2831 if not self.issnapshot(rev):
2832 raise error.ProgrammingError(b'revision %d not a snapshot')
2832 raise error.ProgrammingError(b'revision %d not a snapshot')
2833 return len(self._inner._deltachain(rev)[0]) - 1
2833 return len(self._inner._deltachain(rev)[0]) - 1
2834
2834
2835 def revdiff(self, rev1, rev2):
2835 def revdiff(self, rev1, rev2):
2836 """return or calculate a delta between two revisions
2836 """return or calculate a delta between two revisions
2837
2837
2838 The delta calculated is in binary form and is intended to be written to
2838 The delta calculated is in binary form and is intended to be written to
2839 revlog data directly. So this function needs raw revision data.
2839 revlog data directly. So this function needs raw revision data.
2840 """
2840 """
2841 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2841 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2842 return bytes(self._inner._chunk(rev2))
2842 return bytes(self._inner._chunk(rev2))
2843
2843
2844 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2844 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2845
2845
2846 def revision(self, nodeorrev):
2846 def revision(self, nodeorrev):
2847 """return an uncompressed revision of a given node or revision
2847 """return an uncompressed revision of a given node or revision
2848 number.
2848 number.
2849 """
2849 """
2850 return self._revisiondata(nodeorrev)
2850 return self._revisiondata(nodeorrev)
2851
2851
2852 def sidedata(self, nodeorrev):
2852 def sidedata(self, nodeorrev):
2853 """a map of extra data related to the changeset but not part of the hash
2853 """a map of extra data related to the changeset but not part of the hash
2854
2854
2855 This function currently return a dictionary. However, more advanced
2855 This function currently return a dictionary. However, more advanced
2856 mapping object will likely be used in the future for a more
2856 mapping object will likely be used in the future for a more
2857 efficient/lazy code.
2857 efficient/lazy code.
2858 """
2858 """
2859 # deal with <nodeorrev> argument type
2859 # deal with <nodeorrev> argument type
2860 if isinstance(nodeorrev, int):
2860 if isinstance(nodeorrev, int):
2861 rev = nodeorrev
2861 rev = nodeorrev
2862 else:
2862 else:
2863 rev = self.rev(nodeorrev)
2863 rev = self.rev(nodeorrev)
2864 return self._sidedata(rev)
2864 return self._sidedata(rev)
2865
2865
2866 def _rawtext(self, node, rev):
2866 def _rawtext(self, node, rev):
2867 """return the possibly unvalidated rawtext for a revision
2867 """return the possibly unvalidated rawtext for a revision
2868
2868
2869 returns (rev, rawtext, validated)
2869 returns (rev, rawtext, validated)
2870 """
2870 """
2871 # Check if we have the entry in cache
2871 # Check if we have the entry in cache
2872 # The cache entry looks like (node, rev, rawtext)
2872 # The cache entry looks like (node, rev, rawtext)
2873 if self._inner._revisioncache:
2873 if self._inner._revisioncache:
2874 if self._inner._revisioncache[0] == node:
2874 if self._inner._revisioncache[0] == node:
2875 return (rev, self._inner._revisioncache[2], True)
2875 return (rev, self._inner._revisioncache[2], True)
2876
2876
2877 if rev is None:
2877 if rev is None:
2878 rev = self.rev(node)
2878 rev = self.rev(node)
2879
2879
2880 text = self._inner.raw_text(node, rev)
2880 text = self._inner.raw_text(node, rev)
2881 return (rev, text, False)
2881 return (rev, text, False)
2882
2882
2883 def _revisiondata(self, nodeorrev, raw=False):
2883 def _revisiondata(self, nodeorrev, raw=False):
2884 # deal with <nodeorrev> argument type
2884 # deal with <nodeorrev> argument type
2885 if isinstance(nodeorrev, int):
2885 if isinstance(nodeorrev, int):
2886 rev = nodeorrev
2886 rev = nodeorrev
2887 node = self.node(rev)
2887 node = self.node(rev)
2888 else:
2888 else:
2889 node = nodeorrev
2889 node = nodeorrev
2890 rev = None
2890 rev = None
2891
2891
2892 # fast path the special `nullid` rev
2892 # fast path the special `nullid` rev
2893 if node == self.nullid:
2893 if node == self.nullid:
2894 return b""
2894 return b""
2895
2895
2896 # ``rawtext`` is the text as stored inside the revlog. Might be the
2896 # ``rawtext`` is the text as stored inside the revlog. Might be the
2897 # revision or might need to be processed to retrieve the revision.
2897 # revision or might need to be processed to retrieve the revision.
2898 rev, rawtext, validated = self._rawtext(node, rev)
2898 rev, rawtext, validated = self._rawtext(node, rev)
2899
2899
2900 if raw and validated:
2900 if raw and validated:
2901 # if we don't want to process the raw text and that raw
2901 # if we don't want to process the raw text and that raw
2902 # text is cached, we can exit early.
2902 # text is cached, we can exit early.
2903 return rawtext
2903 return rawtext
2904 if rev is None:
2904 if rev is None:
2905 rev = self.rev(node)
2905 rev = self.rev(node)
2906 # the revlog's flag for this revision
2906 # the revlog's flag for this revision
2907 # (usually alter its state or content)
2907 # (usually alter its state or content)
2908 flags = self.flags(rev)
2908 flags = self.flags(rev)
2909
2909
2910 if validated and flags == REVIDX_DEFAULT_FLAGS:
2910 if validated and flags == REVIDX_DEFAULT_FLAGS:
2911 # no extra flags set, no flag processor runs, text = rawtext
2911 # no extra flags set, no flag processor runs, text = rawtext
2912 return rawtext
2912 return rawtext
2913
2913
2914 if raw:
2914 if raw:
2915 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2915 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2916 text = rawtext
2916 text = rawtext
2917 else:
2917 else:
2918 r = flagutil.processflagsread(self, rawtext, flags)
2918 r = flagutil.processflagsread(self, rawtext, flags)
2919 text, validatehash = r
2919 text, validatehash = r
2920 if validatehash:
2920 if validatehash:
2921 self.checkhash(text, node, rev=rev)
2921 self.checkhash(text, node, rev=rev)
2922 if not validated:
2922 if not validated:
2923 self._inner._revisioncache = (node, rev, rawtext)
2923 self._inner._revisioncache = (node, rev, rawtext)
2924
2924
2925 return text
2925 return text
2926
2926
2927 def _sidedata(self, rev):
2927 def _sidedata(self, rev):
2928 """Return the sidedata for a given revision number."""
2928 """Return the sidedata for a given revision number."""
2929 if self._sidedatafile is None:
2929 if self._sidedatafile is None:
2930 return {}
2930 return {}
2931 sidedata_end = None
2931 sidedata_end = None
2932 if self._docket is not None:
2932 if self._docket is not None:
2933 sidedata_end = self._docket.sidedata_end
2933 sidedata_end = self._docket.sidedata_end
2934 return self._inner.sidedata(rev, sidedata_end)
2934 return self._inner.sidedata(rev, sidedata_end)
2935
2935
2936 def rawdata(self, nodeorrev):
2936 def rawdata(self, nodeorrev):
2937 """return an uncompressed raw data of a given node or revision number."""
2937 """return an uncompressed raw data of a given node or revision number."""
2938 return self._revisiondata(nodeorrev, raw=True)
2938 return self._revisiondata(nodeorrev, raw=True)
2939
2939
2940 def hash(self, text, p1, p2):
2940 def hash(self, text, p1, p2):
2941 """Compute a node hash.
2941 """Compute a node hash.
2942
2942
2943 Available as a function so that subclasses can replace the hash
2943 Available as a function so that subclasses can replace the hash
2944 as needed.
2944 as needed.
2945 """
2945 """
2946 return storageutil.hashrevisionsha1(text, p1, p2)
2946 return storageutil.hashrevisionsha1(text, p1, p2)
2947
2947
2948 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2948 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2949 """Check node hash integrity.
2949 """Check node hash integrity.
2950
2950
2951 Available as a function so that subclasses can extend hash mismatch
2951 Available as a function so that subclasses can extend hash mismatch
2952 behaviors as needed.
2952 behaviors as needed.
2953 """
2953 """
2954 try:
2954 try:
2955 if p1 is None and p2 is None:
2955 if p1 is None and p2 is None:
2956 p1, p2 = self.parents(node)
2956 p1, p2 = self.parents(node)
2957 if node != self.hash(text, p1, p2):
2957 if node != self.hash(text, p1, p2):
2958 # Clear the revision cache on hash failure. The revision cache
2958 # Clear the revision cache on hash failure. The revision cache
2959 # only stores the raw revision and clearing the cache does have
2959 # only stores the raw revision and clearing the cache does have
2960 # the side-effect that we won't have a cache hit when the raw
2960 # the side-effect that we won't have a cache hit when the raw
2961 # revision data is accessed. But this case should be rare and
2961 # revision data is accessed. But this case should be rare and
2962 # it is extra work to teach the cache about the hash
2962 # it is extra work to teach the cache about the hash
2963 # verification state.
2963 # verification state.
2964 if (
2964 if (
2965 self._inner._revisioncache
2965 self._inner._revisioncache
2966 and self._inner._revisioncache[0] == node
2966 and self._inner._revisioncache[0] == node
2967 ):
2967 ):
2968 self._inner._revisioncache = None
2968 self._inner._revisioncache = None
2969
2969
2970 revornode = rev
2970 revornode = rev
2971 if revornode is None:
2971 if revornode is None:
2972 revornode = templatefilters.short(hex(node))
2972 revornode = templatefilters.short(hex(node))
2973 raise error.RevlogError(
2973 raise error.RevlogError(
2974 _(b"integrity check failed on %s:%s")
2974 _(b"integrity check failed on %s:%s")
2975 % (self.display_id, pycompat.bytestr(revornode))
2975 % (self.display_id, pycompat.bytestr(revornode))
2976 )
2976 )
2977 except error.RevlogError:
2977 except error.RevlogError:
2978 if self.feature_config.censorable and storageutil.iscensoredtext(
2978 if self.feature_config.censorable and storageutil.iscensoredtext(
2979 text
2979 text
2980 ):
2980 ):
2981 raise error.CensoredNodeError(self.display_id, node, text)
2981 raise error.CensoredNodeError(self.display_id, node, text)
2982 raise
2982 raise
2983
2983
2984 @property
2984 @property
2985 def _split_index_file(self):
2985 def _split_index_file(self):
2986 """the path where to expect the index of an ongoing splitting operation
2986 """the path where to expect the index of an ongoing splitting operation
2987
2987
2988 The file will only exist if a splitting operation is in progress, but
2988 The file will only exist if a splitting operation is in progress, but
2989 it is always expected at the same location."""
2989 it is always expected at the same location."""
2990 parts = self.radix.split(b'/')
2990 parts = self.radix.split(b'/')
2991 if len(parts) > 1:
2991 if len(parts) > 1:
2992 # adds a '-s' prefix to the ``data/` or `meta/` base
2992 # adds a '-s' prefix to the ``data/` or `meta/` base
2993 head = parts[0] + b'-s'
2993 head = parts[0] + b'-s'
2994 mids = parts[1:-1]
2994 mids = parts[1:-1]
2995 tail = parts[-1] + b'.i'
2995 tail = parts[-1] + b'.i'
2996 pieces = [head] + mids + [tail]
2996 pieces = [head] + mids + [tail]
2997 return b'/'.join(pieces)
2997 return b'/'.join(pieces)
2998 else:
2998 else:
2999 # the revlog is stored at the root of the store (changelog or
2999 # the revlog is stored at the root of the store (changelog or
3000 # manifest), no risk of collision.
3000 # manifest), no risk of collision.
3001 return self.radix + b'.i.s'
3001 return self.radix + b'.i.s'
3002
3002
3003 def _enforceinlinesize(self, tr):
3003 def _enforceinlinesize(self, tr):
3004 """Check if the revlog is too big for inline and convert if so.
3004 """Check if the revlog is too big for inline and convert if so.
3005
3005
3006 This should be called after revisions are added to the revlog. If the
3006 This should be called after revisions are added to the revlog. If the
3007 revlog has grown too large to be an inline revlog, it will convert it
3007 revlog has grown too large to be an inline revlog, it will convert it
3008 to use multiple index and data files.
3008 to use multiple index and data files.
3009 """
3009 """
3010 tiprev = len(self) - 1
3010 tiprev = len(self) - 1
3011 total_size = self.start(tiprev) + self.length(tiprev)
3011 total_size = self.start(tiprev) + self.length(tiprev)
3012 if not self._inline or (self._may_inline and total_size < _maxinline):
3012 if not self._inline or (self._may_inline and total_size < _maxinline):
3013 return
3013 return
3014
3014
3015 if self._docket is not None:
3015 if self._docket is not None:
3016 msg = b"inline revlog should not have a docket"
3016 msg = b"inline revlog should not have a docket"
3017 raise error.ProgrammingError(msg)
3017 raise error.ProgrammingError(msg)
3018
3018
3019 # In the common case, we enforce inline size because the revlog has
3019 # In the common case, we enforce inline size because the revlog has
3020 # been appened too. And in such case, it must have an initial offset
3020 # been appened too. And in such case, it must have an initial offset
3021 # recorded in the transaction.
3021 # recorded in the transaction.
3022 troffset = tr.findoffset(self._inner.canonical_index_file)
3022 troffset = tr.findoffset(self._inner.canonical_index_file)
3023 pre_touched = troffset is not None
3023 pre_touched = troffset is not None
3024 if not pre_touched and self.target[0] != KIND_CHANGELOG:
3024 if not pre_touched and self.target[0] != KIND_CHANGELOG:
3025 raise error.RevlogError(
3025 raise error.RevlogError(
3026 _(b"%s not found in the transaction") % self._indexfile
3026 _(b"%s not found in the transaction") % self._indexfile
3027 )
3027 )
3028
3028
3029 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
3029 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
3030 tr.add(self._datafile, 0)
3030 tr.add(self._datafile, 0)
3031
3031
3032 new_index_file_path = None
3032 new_index_file_path = None
3033 old_index_file_path = self._indexfile
3033 old_index_file_path = self._indexfile
3034 new_index_file_path = self._split_index_file
3034 new_index_file_path = self._split_index_file
3035 opener = self.opener
3035 opener = self.opener
3036 weak_self = weakref.ref(self)
3036 weak_self = weakref.ref(self)
3037
3037
3038 # the "split" index replace the real index when the transaction is
3038 # the "split" index replace the real index when the transaction is
3039 # finalized
3039 # finalized
3040 def finalize_callback(tr):
3040 def finalize_callback(tr):
3041 opener.rename(
3041 opener.rename(
3042 new_index_file_path,
3042 new_index_file_path,
3043 old_index_file_path,
3043 old_index_file_path,
3044 checkambig=True,
3044 checkambig=True,
3045 )
3045 )
3046 maybe_self = weak_self()
3046 maybe_self = weak_self()
3047 if maybe_self is not None:
3047 if maybe_self is not None:
3048 maybe_self._indexfile = old_index_file_path
3048 maybe_self._indexfile = old_index_file_path
3049 maybe_self._inner.index_file = maybe_self._indexfile
3049 maybe_self._inner.index_file = maybe_self._indexfile
3050
3050
3051 def abort_callback(tr):
3051 def abort_callback(tr):
3052 maybe_self = weak_self()
3052 maybe_self = weak_self()
3053 if maybe_self is not None:
3053 if maybe_self is not None:
3054 maybe_self._indexfile = old_index_file_path
3054 maybe_self._indexfile = old_index_file_path
3055 maybe_self._inner.inline = True
3055 maybe_self._inner.inline = True
3056 maybe_self._inner.index_file = old_index_file_path
3056 maybe_self._inner.index_file = old_index_file_path
3057
3057
3058 tr.registertmp(new_index_file_path)
3058 tr.registertmp(new_index_file_path)
3059 # we use 001 here to make this this happens after the finalisation of
3059 # we use 001 here to make this this happens after the finalisation of
3060 # pending changelog write (using 000). Otherwise the two finalizer
3060 # pending changelog write (using 000). Otherwise the two finalizer
3061 # would step over each other and delete the changelog.i file.
3061 # would step over each other and delete the changelog.i file.
3062 if self.target[1] is not None:
3062 if self.target[1] is not None:
3063 callback_id = b'001-revlog-split-%d-%s' % self.target
3063 callback_id = b'001-revlog-split-%d-%s' % self.target
3064 else:
3064 else:
3065 callback_id = b'001-revlog-split-%d' % self.target[0]
3065 callback_id = b'001-revlog-split-%d' % self.target[0]
3066 tr.addfinalize(callback_id, finalize_callback)
3066 tr.addfinalize(callback_id, finalize_callback)
3067 tr.addabort(callback_id, abort_callback)
3067 tr.addabort(callback_id, abort_callback)
3068
3068
3069 self._format_flags &= ~FLAG_INLINE_DATA
3069 self._format_flags &= ~FLAG_INLINE_DATA
3070 self._inner.split_inline(
3070 self._inner.split_inline(
3071 tr,
3071 tr,
3072 self._format_flags | self._format_version,
3072 self._format_flags | self._format_version,
3073 new_index_file_path=new_index_file_path,
3073 new_index_file_path=new_index_file_path,
3074 )
3074 )
3075
3075
3076 self._inline = False
3076 self._inline = False
3077 if new_index_file_path is not None:
3077 if new_index_file_path is not None:
3078 self._indexfile = new_index_file_path
3078 self._indexfile = new_index_file_path
3079
3079
3080 nodemaputil.setup_persistent_nodemap(tr, self)
3080 nodemaputil.setup_persistent_nodemap(tr, self)
3081
3081
3082 def _nodeduplicatecallback(self, transaction, node):
3082 def _nodeduplicatecallback(self, transaction, node):
3083 """called when trying to add a node already stored."""
3083 """called when trying to add a node already stored."""
3084
3084
3085 @contextlib.contextmanager
3085 @contextlib.contextmanager
3086 def reading(self):
3086 def reading(self):
3087 with self._inner.reading():
3087 with self._inner.reading():
3088 yield
3088 yield
3089
3089
3090 @contextlib.contextmanager
3090 @contextlib.contextmanager
3091 def _writing(self, transaction):
3091 def _writing(self, transaction):
3092 if self._trypending:
3092 if self._trypending:
3093 msg = b'try to write in a `trypending` revlog: %s'
3093 msg = b'try to write in a `trypending` revlog: %s'
3094 msg %= self.display_id
3094 msg %= self.display_id
3095 raise error.ProgrammingError(msg)
3095 raise error.ProgrammingError(msg)
3096 if self._inner.is_writing:
3096 if self._inner.is_writing:
3097 yield
3097 yield
3098 else:
3098 else:
3099 data_end = None
3099 data_end = None
3100 sidedata_end = None
3100 sidedata_end = None
3101 if self._docket is not None:
3101 if self._docket is not None:
3102 data_end = self._docket.data_end
3102 data_end = self._docket.data_end
3103 sidedata_end = self._docket.sidedata_end
3103 sidedata_end = self._docket.sidedata_end
3104 with self._inner.writing(
3104 with self._inner.writing(
3105 transaction,
3105 transaction,
3106 data_end=data_end,
3106 data_end=data_end,
3107 sidedata_end=sidedata_end,
3107 sidedata_end=sidedata_end,
3108 ):
3108 ):
3109 yield
3109 yield
3110 if self._docket is not None:
3110 if self._docket is not None:
3111 self._write_docket(transaction)
3111 self._write_docket(transaction)
3112
3112
3113 @property
3113 @property
3114 def is_delaying(self):
3114 def is_delaying(self):
3115 return self._inner.is_delaying
3115 return self._inner.is_delaying
3116
3116
3117 def _write_docket(self, transaction):
3117 def _write_docket(self, transaction):
3118 """write the current docket on disk
3118 """write the current docket on disk
3119
3119
3120 Exist as a method to help changelog to implement transaction logic
3120 Exist as a method to help changelog to implement transaction logic
3121
3121
3122 We could also imagine using the same transaction logic for all revlog
3122 We could also imagine using the same transaction logic for all revlog
3123 since docket are cheap."""
3123 since docket are cheap."""
3124 self._docket.write(transaction)
3124 self._docket.write(transaction)
3125
3125
3126 def addrevision(
3126 def addrevision(
3127 self,
3127 self,
3128 text,
3128 text,
3129 transaction,
3129 transaction,
3130 link,
3130 link,
3131 p1,
3131 p1,
3132 p2,
3132 p2,
3133 cachedelta=None,
3133 cachedelta=None,
3134 node=None,
3134 node=None,
3135 flags=REVIDX_DEFAULT_FLAGS,
3135 flags=REVIDX_DEFAULT_FLAGS,
3136 deltacomputer=None,
3136 deltacomputer=None,
3137 sidedata=None,
3137 sidedata=None,
3138 ):
3138 ):
3139 """add a revision to the log
3139 """add a revision to the log
3140
3140
3141 text - the revision data to add
3141 text - the revision data to add
3142 transaction - the transaction object used for rollback
3142 transaction - the transaction object used for rollback
3143 link - the linkrev data to add
3143 link - the linkrev data to add
3144 p1, p2 - the parent nodeids of the revision
3144 p1, p2 - the parent nodeids of the revision
3145 cachedelta - an optional precomputed delta
3145 cachedelta - an optional precomputed delta
3146 node - nodeid of revision; typically node is not specified, and it is
3146 node - nodeid of revision; typically node is not specified, and it is
3147 computed by default as hash(text, p1, p2), however subclasses might
3147 computed by default as hash(text, p1, p2), however subclasses might
3148 use different hashing method (and override checkhash() in such case)
3148 use different hashing method (and override checkhash() in such case)
3149 flags - the known flags to set on the revision
3149 flags - the known flags to set on the revision
3150 deltacomputer - an optional deltacomputer instance shared between
3150 deltacomputer - an optional deltacomputer instance shared between
3151 multiple calls
3151 multiple calls
3152 """
3152 """
3153 if link == nullrev:
3153 if link == nullrev:
3154 raise error.RevlogError(
3154 raise error.RevlogError(
3155 _(b"attempted to add linkrev -1 to %s") % self.display_id
3155 _(b"attempted to add linkrev -1 to %s") % self.display_id
3156 )
3156 )
3157
3157
3158 if sidedata is None:
3158 if sidedata is None:
3159 sidedata = {}
3159 sidedata = {}
3160 elif sidedata and not self.feature_config.has_side_data:
3160 elif sidedata and not self.feature_config.has_side_data:
3161 raise error.ProgrammingError(
3161 raise error.ProgrammingError(
3162 _(b"trying to add sidedata to a revlog who don't support them")
3162 _(b"trying to add sidedata to a revlog who don't support them")
3163 )
3163 )
3164
3164
3165 if flags:
3165 if flags:
3166 node = node or self.hash(text, p1, p2)
3166 node = node or self.hash(text, p1, p2)
3167
3167
3168 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3168 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3169
3169
3170 # If the flag processor modifies the revision data, ignore any provided
3170 # If the flag processor modifies the revision data, ignore any provided
3171 # cachedelta.
3171 # cachedelta.
3172 if rawtext != text:
3172 if rawtext != text:
3173 cachedelta = None
3173 cachedelta = None
3174
3174
3175 if len(rawtext) > _maxentrysize:
3175 if len(rawtext) > _maxentrysize:
3176 raise error.RevlogError(
3176 raise error.RevlogError(
3177 _(
3177 _(
3178 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3178 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3179 )
3179 )
3180 % (self.display_id, len(rawtext))
3180 % (self.display_id, len(rawtext))
3181 )
3181 )
3182
3182
3183 node = node or self.hash(rawtext, p1, p2)
3183 node = node or self.hash(rawtext, p1, p2)
3184 rev = self.index.get_rev(node)
3184 rev = self.index.get_rev(node)
3185 if rev is not None:
3185 if rev is not None:
3186 return rev
3186 return rev
3187
3187
3188 if validatehash:
3188 if validatehash:
3189 self.checkhash(rawtext, node, p1=p1, p2=p2)
3189 self.checkhash(rawtext, node, p1=p1, p2=p2)
3190
3190
3191 return self.addrawrevision(
3191 return self.addrawrevision(
3192 rawtext,
3192 rawtext,
3193 transaction,
3193 transaction,
3194 link,
3194 link,
3195 p1,
3195 p1,
3196 p2,
3196 p2,
3197 node,
3197 node,
3198 flags,
3198 flags,
3199 cachedelta=cachedelta,
3199 cachedelta=cachedelta,
3200 deltacomputer=deltacomputer,
3200 deltacomputer=deltacomputer,
3201 sidedata=sidedata,
3201 sidedata=sidedata,
3202 )
3202 )
3203
3203
3204 def addrawrevision(
3204 def addrawrevision(
3205 self,
3205 self,
3206 rawtext,
3206 rawtext,
3207 transaction,
3207 transaction,
3208 link,
3208 link,
3209 p1,
3209 p1,
3210 p2,
3210 p2,
3211 node,
3211 node,
3212 flags,
3212 flags,
3213 cachedelta=None,
3213 cachedelta=None,
3214 deltacomputer=None,
3214 deltacomputer=None,
3215 sidedata=None,
3215 sidedata=None,
3216 ):
3216 ):
3217 """add a raw revision with known flags, node and parents
3217 """add a raw revision with known flags, node and parents
3218 useful when reusing a revision not stored in this revlog (ex: received
3218 useful when reusing a revision not stored in this revlog (ex: received
3219 over wire, or read from an external bundle).
3219 over wire, or read from an external bundle).
3220 """
3220 """
3221 with self._writing(transaction):
3221 with self._writing(transaction):
3222 return self._addrevision(
3222 return self._addrevision(
3223 node,
3223 node,
3224 rawtext,
3224 rawtext,
3225 transaction,
3225 transaction,
3226 link,
3226 link,
3227 p1,
3227 p1,
3228 p2,
3228 p2,
3229 flags,
3229 flags,
3230 cachedelta,
3230 cachedelta,
3231 deltacomputer=deltacomputer,
3231 deltacomputer=deltacomputer,
3232 sidedata=sidedata,
3232 sidedata=sidedata,
3233 )
3233 )
3234
3234
3235 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3235 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3236 return self._inner.compress(data)
3236 return self._inner.compress(data)
3237
3237
3238 def decompress(self, data):
3238 def decompress(self, data):
3239 return self._inner.decompress(data)
3239 return self._inner.decompress(data)
3240
3240
3241 def _addrevision(
3241 def _addrevision(
3242 self,
3242 self,
3243 node,
3243 node,
3244 rawtext,
3244 rawtext,
3245 transaction,
3245 transaction,
3246 link,
3246 link,
3247 p1,
3247 p1,
3248 p2,
3248 p2,
3249 flags,
3249 flags,
3250 cachedelta,
3250 cachedelta,
3251 alwayscache=False,
3251 alwayscache=False,
3252 deltacomputer=None,
3252 deltacomputer=None,
3253 sidedata=None,
3253 sidedata=None,
3254 ):
3254 ):
3255 """internal function to add revisions to the log
3255 """internal function to add revisions to the log
3256
3256
3257 see addrevision for argument descriptions.
3257 see addrevision for argument descriptions.
3258
3258
3259 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3259 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3260
3260
3261 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3261 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3262 be used.
3262 be used.
3263
3263
3264 invariants:
3264 invariants:
3265 - rawtext is optional (can be None); if not set, cachedelta must be set.
3265 - rawtext is optional (can be None); if not set, cachedelta must be set.
3266 if both are set, they must correspond to each other.
3266 if both are set, they must correspond to each other.
3267 """
3267 """
3268 if node == self.nullid:
3268 if node == self.nullid:
3269 raise error.RevlogError(
3269 raise error.RevlogError(
3270 _(b"%s: attempt to add null revision") % self.display_id
3270 _(b"%s: attempt to add null revision") % self.display_id
3271 )
3271 )
3272 if (
3272 if (
3273 node == self.nodeconstants.wdirid
3273 node == self.nodeconstants.wdirid
3274 or node in self.nodeconstants.wdirfilenodeids
3274 or node in self.nodeconstants.wdirfilenodeids
3275 ):
3275 ):
3276 raise error.RevlogError(
3276 raise error.RevlogError(
3277 _(b"%s: attempt to add wdir revision") % self.display_id
3277 _(b"%s: attempt to add wdir revision") % self.display_id
3278 )
3278 )
3279 if not self._inner.is_writing:
3279 if not self._inner.is_writing:
3280 msg = b'adding revision outside `revlog._writing` context'
3280 msg = b'adding revision outside `revlog._writing` context'
3281 raise error.ProgrammingError(msg)
3281 raise error.ProgrammingError(msg)
3282
3282
3283 btext = [rawtext]
3283 btext = [rawtext]
3284
3284
3285 curr = len(self)
3285 curr = len(self)
3286 prev = curr - 1
3286 prev = curr - 1
3287
3287
3288 offset = self._get_data_offset(prev)
3288 offset = self._get_data_offset(prev)
3289
3289
3290 if self._concurrencychecker:
3290 if self._concurrencychecker:
3291 ifh, dfh, sdfh = self._inner._writinghandles
3291 ifh, dfh, sdfh = self._inner._writinghandles
3292 # XXX no checking for the sidedata file
3292 # XXX no checking for the sidedata file
3293 if self._inline:
3293 if self._inline:
3294 # offset is "as if" it were in the .d file, so we need to add on
3294 # offset is "as if" it were in the .d file, so we need to add on
3295 # the size of the entry metadata.
3295 # the size of the entry metadata.
3296 self._concurrencychecker(
3296 self._concurrencychecker(
3297 ifh, self._indexfile, offset + curr * self.index.entry_size
3297 ifh, self._indexfile, offset + curr * self.index.entry_size
3298 )
3298 )
3299 else:
3299 else:
3300 # Entries in the .i are a consistent size.
3300 # Entries in the .i are a consistent size.
3301 self._concurrencychecker(
3301 self._concurrencychecker(
3302 ifh, self._indexfile, curr * self.index.entry_size
3302 ifh, self._indexfile, curr * self.index.entry_size
3303 )
3303 )
3304 self._concurrencychecker(dfh, self._datafile, offset)
3304 self._concurrencychecker(dfh, self._datafile, offset)
3305
3305
3306 p1r, p2r = self.rev(p1), self.rev(p2)
3306 p1r, p2r = self.rev(p1), self.rev(p2)
3307
3307
3308 # full versions are inserted when the needed deltas
3308 # full versions are inserted when the needed deltas
3309 # become comparable to the uncompressed text
3309 # become comparable to the uncompressed text
3310 if rawtext is None:
3310 if rawtext is None:
3311 # need rawtext size, before changed by flag processors, which is
3311 # need rawtext size, before changed by flag processors, which is
3312 # the non-raw size. use revlog explicitly to avoid filelog's extra
3312 # the non-raw size. use revlog explicitly to avoid filelog's extra
3313 # logic that might remove metadata size.
3313 # logic that might remove metadata size.
3314 textlen = mdiff.patchedsize(
3314 textlen = mdiff.patchedsize(
3315 revlog.size(self, cachedelta[0]), cachedelta[1]
3315 revlog.size(self, cachedelta[0]), cachedelta[1]
3316 )
3316 )
3317 else:
3317 else:
3318 textlen = len(rawtext)
3318 textlen = len(rawtext)
3319
3319
3320 if deltacomputer is None:
3320 if deltacomputer is None:
3321 write_debug = None
3321 write_debug = None
3322 if self.delta_config.debug_delta:
3322 if self.delta_config.debug_delta:
3323 write_debug = transaction._report
3323 write_debug = transaction._report
3324 deltacomputer = deltautil.deltacomputer(
3324 deltacomputer = deltautil.deltacomputer(
3325 self, write_debug=write_debug
3325 self, write_debug=write_debug
3326 )
3326 )
3327
3327
3328 if cachedelta is not None and len(cachedelta) == 2:
3328 if cachedelta is not None and len(cachedelta) == 2:
3329 # If the cached delta has no information about how it should be
3329 # If the cached delta has no information about how it should be
3330 # reused, add the default reuse instruction according to the
3330 # reused, add the default reuse instruction according to the
3331 # revlog's configuration.
3331 # revlog's configuration.
3332 if (
3332 if (
3333 self.delta_config.general_delta
3333 self.delta_config.general_delta
3334 and self.delta_config.lazy_delta_base
3334 and self.delta_config.lazy_delta_base
3335 ):
3335 ):
3336 delta_base_reuse = DELTA_BASE_REUSE_TRY
3336 delta_base_reuse = DELTA_BASE_REUSE_TRY
3337 else:
3337 else:
3338 delta_base_reuse = DELTA_BASE_REUSE_NO
3338 delta_base_reuse = DELTA_BASE_REUSE_NO
3339 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3339 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3340
3340
3341 revinfo = revlogutils.revisioninfo(
3341 revinfo = revlogutils.revisioninfo(
3342 node,
3342 node,
3343 p1,
3343 p1,
3344 p2,
3344 p2,
3345 btext,
3345 btext,
3346 textlen,
3346 textlen,
3347 cachedelta,
3347 cachedelta,
3348 flags,
3348 flags,
3349 )
3349 )
3350
3350
3351 deltainfo = deltacomputer.finddeltainfo(revinfo)
3351 deltainfo = deltacomputer.finddeltainfo(revinfo)
3352
3352
3353 compression_mode = COMP_MODE_INLINE
3353 compression_mode = COMP_MODE_INLINE
3354 if self._docket is not None:
3354 if self._docket is not None:
3355 default_comp = self._docket.default_compression_header
3355 default_comp = self._docket.default_compression_header
3356 r = deltautil.delta_compression(default_comp, deltainfo)
3356 r = deltautil.delta_compression(default_comp, deltainfo)
3357 compression_mode, deltainfo = r
3357 compression_mode, deltainfo = r
3358
3358
3359 sidedata_compression_mode = COMP_MODE_INLINE
3359 sidedata_compression_mode = COMP_MODE_INLINE
3360 if sidedata and self.feature_config.has_side_data:
3360 if sidedata and self.feature_config.has_side_data:
3361 sidedata_compression_mode = COMP_MODE_PLAIN
3361 sidedata_compression_mode = COMP_MODE_PLAIN
3362 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3362 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3363 sidedata_offset = self._docket.sidedata_end
3363 sidedata_offset = self._docket.sidedata_end
3364 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3364 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3365 if (
3365 if (
3366 h != b'u'
3366 h != b'u'
3367 and comp_sidedata[0:1] != b'\0'
3367 and comp_sidedata[0:1] != b'\0'
3368 and len(comp_sidedata) < len(serialized_sidedata)
3368 and len(comp_sidedata) < len(serialized_sidedata)
3369 ):
3369 ):
3370 assert not h
3370 assert not h
3371 if (
3371 if (
3372 comp_sidedata[0:1]
3372 comp_sidedata[0:1]
3373 == self._docket.default_compression_header
3373 == self._docket.default_compression_header
3374 ):
3374 ):
3375 sidedata_compression_mode = COMP_MODE_DEFAULT
3375 sidedata_compression_mode = COMP_MODE_DEFAULT
3376 serialized_sidedata = comp_sidedata
3376 serialized_sidedata = comp_sidedata
3377 else:
3377 else:
3378 sidedata_compression_mode = COMP_MODE_INLINE
3378 sidedata_compression_mode = COMP_MODE_INLINE
3379 serialized_sidedata = comp_sidedata
3379 serialized_sidedata = comp_sidedata
3380 else:
3380 else:
3381 serialized_sidedata = b""
3381 serialized_sidedata = b""
3382 # Don't store the offset if the sidedata is empty, that way
3382 # Don't store the offset if the sidedata is empty, that way
3383 # we can easily detect empty sidedata and they will be no different
3383 # we can easily detect empty sidedata and they will be no different
3384 # than ones we manually add.
3384 # than ones we manually add.
3385 sidedata_offset = 0
3385 sidedata_offset = 0
3386
3386
3387 rank = RANK_UNKNOWN
3387 rank = RANK_UNKNOWN
3388 if self.feature_config.compute_rank:
3388 if self.feature_config.compute_rank:
3389 if (p1r, p2r) == (nullrev, nullrev):
3389 if (p1r, p2r) == (nullrev, nullrev):
3390 rank = 1
3390 rank = 1
3391 elif p1r != nullrev and p2r == nullrev:
3391 elif p1r != nullrev and p2r == nullrev:
3392 rank = 1 + self.fast_rank(p1r)
3392 rank = 1 + self.fast_rank(p1r)
3393 elif p1r == nullrev and p2r != nullrev:
3393 elif p1r == nullrev and p2r != nullrev:
3394 rank = 1 + self.fast_rank(p2r)
3394 rank = 1 + self.fast_rank(p2r)
3395 else: # merge node
3395 else: # merge node
3396 if rustdagop is not None and self.index.rust_ext_compat:
3396 if rustdagop is not None and self.index.rust_ext_compat:
3397 rank = rustdagop.rank(self.index, p1r, p2r)
3397 rank = rustdagop.rank(self.index, p1r, p2r)
3398 else:
3398 else:
3399 pmin, pmax = sorted((p1r, p2r))
3399 pmin, pmax = sorted((p1r, p2r))
3400 rank = 1 + self.fast_rank(pmax)
3400 rank = 1 + self.fast_rank(pmax)
3401 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3401 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3402
3402
3403 e = revlogutils.entry(
3403 e = revlogutils.entry(
3404 flags=flags,
3404 flags=flags,
3405 data_offset=offset,
3405 data_offset=offset,
3406 data_compressed_length=deltainfo.deltalen,
3406 data_compressed_length=deltainfo.deltalen,
3407 data_uncompressed_length=textlen,
3407 data_uncompressed_length=textlen,
3408 data_compression_mode=compression_mode,
3408 data_compression_mode=compression_mode,
3409 data_delta_base=deltainfo.base,
3409 data_delta_base=deltainfo.base,
3410 link_rev=link,
3410 link_rev=link,
3411 parent_rev_1=p1r,
3411 parent_rev_1=p1r,
3412 parent_rev_2=p2r,
3412 parent_rev_2=p2r,
3413 node_id=node,
3413 node_id=node,
3414 sidedata_offset=sidedata_offset,
3414 sidedata_offset=sidedata_offset,
3415 sidedata_compressed_length=len(serialized_sidedata),
3415 sidedata_compressed_length=len(serialized_sidedata),
3416 sidedata_compression_mode=sidedata_compression_mode,
3416 sidedata_compression_mode=sidedata_compression_mode,
3417 rank=rank,
3417 rank=rank,
3418 )
3418 )
3419
3419
3420 self.index.append(e)
3420 self.index.append(e)
3421 entry = self.index.entry_binary(curr)
3421 entry = self.index.entry_binary(curr)
3422 if curr == 0 and self._docket is None:
3422 if curr == 0 and self._docket is None:
3423 header = self._format_flags | self._format_version
3423 header = self._format_flags | self._format_version
3424 header = self.index.pack_header(header)
3424 header = self.index.pack_header(header)
3425 entry = header + entry
3425 entry = header + entry
3426 self._writeentry(
3426 self._writeentry(
3427 transaction,
3427 transaction,
3428 entry,
3428 entry,
3429 deltainfo.data,
3429 deltainfo.data,
3430 link,
3430 link,
3431 offset,
3431 offset,
3432 serialized_sidedata,
3432 serialized_sidedata,
3433 sidedata_offset,
3433 sidedata_offset,
3434 )
3434 )
3435
3435
3436 rawtext = btext[0]
3436 rawtext = btext[0]
3437
3437
3438 if alwayscache and rawtext is None:
3438 if alwayscache and rawtext is None:
3439 rawtext = deltacomputer.buildtext(revinfo)
3439 rawtext = deltacomputer.buildtext(revinfo)
3440
3440
3441 if type(rawtext) == bytes: # only accept immutable objects
3441 if type(rawtext) == bytes: # only accept immutable objects
3442 self._inner._revisioncache = (node, curr, rawtext)
3442 self._inner._revisioncache = (node, curr, rawtext)
3443 self._chainbasecache[curr] = deltainfo.chainbase
3443 self._chainbasecache[curr] = deltainfo.chainbase
3444 return curr
3444 return curr
3445
3445
3446 def _get_data_offset(self, prev):
3446 def _get_data_offset(self, prev):
3447 """Returns the current offset in the (in-transaction) data file.
3447 """Returns the current offset in the (in-transaction) data file.
3448 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3448 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3449 file to store that information: since sidedata can be rewritten to the
3449 file to store that information: since sidedata can be rewritten to the
3450 end of the data file within a transaction, you can have cases where, for
3450 end of the data file within a transaction, you can have cases where, for
3451 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3451 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3452 to `n - 1`'s sidedata being written after `n`'s data.
3452 to `n - 1`'s sidedata being written after `n`'s data.
3453
3453
3454 TODO cache this in a docket file before getting out of experimental."""
3454 TODO cache this in a docket file before getting out of experimental."""
3455 if self._docket is None:
3455 if self._docket is None:
3456 return self.end(prev)
3456 return self.end(prev)
3457 else:
3457 else:
3458 return self._docket.data_end
3458 return self._docket.data_end
3459
3459
3460 def _writeentry(
3460 def _writeentry(
3461 self,
3461 self,
3462 transaction,
3462 transaction,
3463 entry,
3463 entry,
3464 data,
3464 data,
3465 link,
3465 link,
3466 offset,
3466 offset,
3467 sidedata,
3467 sidedata,
3468 sidedata_offset,
3468 sidedata_offset,
3469 ):
3469 ):
3470 # Files opened in a+ mode have inconsistent behavior on various
3470 # Files opened in a+ mode have inconsistent behavior on various
3471 # platforms. Windows requires that a file positioning call be made
3471 # platforms. Windows requires that a file positioning call be made
3472 # when the file handle transitions between reads and writes. See
3472 # when the file handle transitions between reads and writes. See
3473 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3473 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3474 # platforms, Python or the platform itself can be buggy. Some versions
3474 # platforms, Python or the platform itself can be buggy. Some versions
3475 # of Solaris have been observed to not append at the end of the file
3475 # of Solaris have been observed to not append at the end of the file
3476 # if the file was seeked to before the end. See issue4943 for more.
3476 # if the file was seeked to before the end. See issue4943 for more.
3477 #
3477 #
3478 # We work around this issue by inserting a seek() before writing.
3478 # We work around this issue by inserting a seek() before writing.
3479 # Note: This is likely not necessary on Python 3. However, because
3479 # Note: This is likely not necessary on Python 3. However, because
3480 # the file handle is reused for reads and may be seeked there, we need
3480 # the file handle is reused for reads and may be seeked there, we need
3481 # to be careful before changing this.
3481 # to be careful before changing this.
3482 index_end = data_end = sidedata_end = None
3482 index_end = data_end = sidedata_end = None
3483 if self._docket is not None:
3483 if self._docket is not None:
3484 index_end = self._docket.index_end
3484 index_end = self._docket.index_end
3485 data_end = self._docket.data_end
3485 data_end = self._docket.data_end
3486 sidedata_end = self._docket.sidedata_end
3486 sidedata_end = self._docket.sidedata_end
3487
3487
3488 files_end = self._inner.write_entry(
3488 files_end = self._inner.write_entry(
3489 transaction,
3489 transaction,
3490 entry,
3490 entry,
3491 data,
3491 data,
3492 link,
3492 link,
3493 offset,
3493 offset,
3494 sidedata,
3494 sidedata,
3495 sidedata_offset,
3495 sidedata_offset,
3496 index_end,
3496 index_end,
3497 data_end,
3497 data_end,
3498 sidedata_end,
3498 sidedata_end,
3499 )
3499 )
3500 self._enforceinlinesize(transaction)
3500 self._enforceinlinesize(transaction)
3501 if self._docket is not None:
3501 if self._docket is not None:
3502 self._docket.index_end = files_end[0]
3502 self._docket.index_end = files_end[0]
3503 self._docket.data_end = files_end[1]
3503 self._docket.data_end = files_end[1]
3504 self._docket.sidedata_end = files_end[2]
3504 self._docket.sidedata_end = files_end[2]
3505
3505
3506 nodemaputil.setup_persistent_nodemap(transaction, self)
3506 nodemaputil.setup_persistent_nodemap(transaction, self)
3507
3507
3508 def addgroup(
3508 def addgroup(
3509 self,
3509 self,
3510 deltas,
3510 deltas,
3511 linkmapper,
3511 linkmapper,
3512 transaction,
3512 transaction,
3513 alwayscache=False,
3513 alwayscache=False,
3514 addrevisioncb=None,
3514 addrevisioncb=None,
3515 duplicaterevisioncb=None,
3515 duplicaterevisioncb=None,
3516 debug_info=None,
3516 debug_info=None,
3517 delta_base_reuse_policy=None,
3517 delta_base_reuse_policy=None,
3518 ):
3518 ):
3519 """
3519 """
3520 add a delta group
3520 add a delta group
3521
3521
3522 given a set of deltas, add them to the revision log. the
3522 given a set of deltas, add them to the revision log. the
3523 first delta is against its parent, which should be in our
3523 first delta is against its parent, which should be in our
3524 log, the rest are against the previous delta.
3524 log, the rest are against the previous delta.
3525
3525
3526 If ``addrevisioncb`` is defined, it will be called with arguments of
3526 If ``addrevisioncb`` is defined, it will be called with arguments of
3527 this revlog and the node that was added.
3527 this revlog and the node that was added.
3528 """
3528 """
3529
3529
3530 if self._adding_group:
3530 if self._adding_group:
3531 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3531 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3532
3532
3533 # read the default delta-base reuse policy from revlog config if the
3533 # read the default delta-base reuse policy from revlog config if the
3534 # group did not specify one.
3534 # group did not specify one.
3535 if delta_base_reuse_policy is None:
3535 if delta_base_reuse_policy is None:
3536 if (
3536 if (
3537 self.delta_config.general_delta
3537 self.delta_config.general_delta
3538 and self.delta_config.lazy_delta_base
3538 and self.delta_config.lazy_delta_base
3539 ):
3539 ):
3540 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3540 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3541 else:
3541 else:
3542 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3542 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3543
3543
3544 self._adding_group = True
3544 self._adding_group = True
3545 empty = True
3545 empty = True
3546 try:
3546 try:
3547 with self._writing(transaction):
3547 with self._writing(transaction):
3548 write_debug = None
3548 write_debug = None
3549 if self.delta_config.debug_delta:
3549 if self.delta_config.debug_delta:
3550 write_debug = transaction._report
3550 write_debug = transaction._report
3551 deltacomputer = deltautil.deltacomputer(
3551 deltacomputer = deltautil.deltacomputer(
3552 self,
3552 self,
3553 write_debug=write_debug,
3553 write_debug=write_debug,
3554 debug_info=debug_info,
3554 debug_info=debug_info,
3555 )
3555 )
3556 # loop through our set of deltas
3556 # loop through our set of deltas
3557 for data in deltas:
3557 for data in deltas:
3558 (
3558 (
3559 node,
3559 node,
3560 p1,
3560 p1,
3561 p2,
3561 p2,
3562 linknode,
3562 linknode,
3563 deltabase,
3563 deltabase,
3564 delta,
3564 delta,
3565 flags,
3565 flags,
3566 sidedata,
3566 sidedata,
3567 ) = data
3567 ) = data
3568 link = linkmapper(linknode)
3568 link = linkmapper(linknode)
3569 flags = flags or REVIDX_DEFAULT_FLAGS
3569 flags = flags or REVIDX_DEFAULT_FLAGS
3570
3570
3571 rev = self.index.get_rev(node)
3571 rev = self.index.get_rev(node)
3572 if rev is not None:
3572 if rev is not None:
3573 # this can happen if two branches make the same change
3573 # this can happen if two branches make the same change
3574 self._nodeduplicatecallback(transaction, rev)
3574 self._nodeduplicatecallback(transaction, rev)
3575 if duplicaterevisioncb:
3575 if duplicaterevisioncb:
3576 duplicaterevisioncb(self, rev)
3576 duplicaterevisioncb(self, rev)
3577 empty = False
3577 empty = False
3578 continue
3578 continue
3579
3579
3580 for p in (p1, p2):
3580 for p in (p1, p2):
3581 if not self.index.has_node(p):
3581 if not self.index.has_node(p):
3582 raise error.LookupError(
3582 raise error.LookupError(
3583 p, self.radix, _(b'unknown parent')
3583 p, self.radix, _(b'unknown parent')
3584 )
3584 )
3585
3585
3586 if not self.index.has_node(deltabase):
3586 if not self.index.has_node(deltabase):
3587 raise error.LookupError(
3587 raise error.LookupError(
3588 deltabase, self.display_id, _(b'unknown delta base')
3588 deltabase, self.display_id, _(b'unknown delta base')
3589 )
3589 )
3590
3590
3591 baserev = self.rev(deltabase)
3591 baserev = self.rev(deltabase)
3592
3592
3593 if baserev != nullrev and self.iscensored(baserev):
3593 if baserev != nullrev and self.iscensored(baserev):
3594 # if base is censored, delta must be full replacement in a
3594 # if base is censored, delta must be full replacement in a
3595 # single patch operation
3595 # single patch operation
3596 hlen = struct.calcsize(b">lll")
3596 hlen = struct.calcsize(b">lll")
3597 oldlen = self.rawsize(baserev)
3597 oldlen = self.rawsize(baserev)
3598 newlen = len(delta) - hlen
3598 newlen = len(delta) - hlen
3599 if delta[:hlen] != mdiff.replacediffheader(
3599 if delta[:hlen] != mdiff.replacediffheader(
3600 oldlen, newlen
3600 oldlen, newlen
3601 ):
3601 ):
3602 raise error.CensoredBaseError(
3602 raise error.CensoredBaseError(
3603 self.display_id, self.node(baserev)
3603 self.display_id, self.node(baserev)
3604 )
3604 )
3605
3605
3606 if not flags and self._peek_iscensored(baserev, delta):
3606 if not flags and self._peek_iscensored(baserev, delta):
3607 flags |= REVIDX_ISCENSORED
3607 flags |= REVIDX_ISCENSORED
3608
3608
3609 # We assume consumers of addrevisioncb will want to retrieve
3609 # We assume consumers of addrevisioncb will want to retrieve
3610 # the added revision, which will require a call to
3610 # the added revision, which will require a call to
3611 # revision(). revision() will fast path if there is a cache
3611 # revision(). revision() will fast path if there is a cache
3612 # hit. So, we tell _addrevision() to always cache in this case.
3612 # hit. So, we tell _addrevision() to always cache in this case.
3613 # We're only using addgroup() in the context of changegroup
3613 # We're only using addgroup() in the context of changegroup
3614 # generation so the revision data can always be handled as raw
3614 # generation so the revision data can always be handled as raw
3615 # by the flagprocessor.
3615 # by the flagprocessor.
3616 rev = self._addrevision(
3616 rev = self._addrevision(
3617 node,
3617 node,
3618 None,
3618 None,
3619 transaction,
3619 transaction,
3620 link,
3620 link,
3621 p1,
3621 p1,
3622 p2,
3622 p2,
3623 flags,
3623 flags,
3624 (baserev, delta, delta_base_reuse_policy),
3624 (baserev, delta, delta_base_reuse_policy),
3625 alwayscache=alwayscache,
3625 alwayscache=alwayscache,
3626 deltacomputer=deltacomputer,
3626 deltacomputer=deltacomputer,
3627 sidedata=sidedata,
3627 sidedata=sidedata,
3628 )
3628 )
3629
3629
3630 if addrevisioncb:
3630 if addrevisioncb:
3631 addrevisioncb(self, rev)
3631 addrevisioncb(self, rev)
3632 empty = False
3632 empty = False
3633 finally:
3633 finally:
3634 self._adding_group = False
3634 self._adding_group = False
3635 return not empty
3635 return not empty
3636
3636
3637 def iscensored(self, rev):
3637 def iscensored(self, rev):
3638 """Check if a file revision is censored."""
3638 """Check if a file revision is censored."""
3639 if not self.feature_config.censorable:
3639 if not self.feature_config.censorable:
3640 return False
3640 return False
3641
3641
3642 return self.flags(rev) & REVIDX_ISCENSORED
3642 return self.flags(rev) & REVIDX_ISCENSORED
3643
3643
3644 def _peek_iscensored(self, baserev, delta):
3644 def _peek_iscensored(self, baserev, delta):
3645 """Quickly check if a delta produces a censored revision."""
3645 """Quickly check if a delta produces a censored revision."""
3646 if not self.feature_config.censorable:
3646 if not self.feature_config.censorable:
3647 return False
3647 return False
3648
3648
3649 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3649 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3650
3650
3651 def getstrippoint(self, minlink):
3651 def getstrippoint(self, minlink):
3652 """find the minimum rev that must be stripped to strip the linkrev
3652 """find the minimum rev that must be stripped to strip the linkrev
3653
3653
3654 Returns a tuple containing the minimum rev and a set of all revs that
3654 Returns a tuple containing the minimum rev and a set of all revs that
3655 have linkrevs that will be broken by this strip.
3655 have linkrevs that will be broken by this strip.
3656 """
3656 """
3657 return storageutil.resolvestripinfo(
3657 return storageutil.resolvestripinfo(
3658 minlink,
3658 minlink,
3659 len(self) - 1,
3659 len(self) - 1,
3660 self.headrevs(),
3660 self.headrevs(),
3661 self.linkrev,
3661 self.linkrev,
3662 self.parentrevs,
3662 self.parentrevs,
3663 )
3663 )
3664
3664
3665 def strip(self, minlink, transaction):
3665 def strip(self, minlink, transaction):
3666 """truncate the revlog on the first revision with a linkrev >= minlink
3666 """truncate the revlog on the first revision with a linkrev >= minlink
3667
3667
3668 This function is called when we're stripping revision minlink and
3668 This function is called when we're stripping revision minlink and
3669 its descendants from the repository.
3669 its descendants from the repository.
3670
3670
3671 We have to remove all revisions with linkrev >= minlink, because
3671 We have to remove all revisions with linkrev >= minlink, because
3672 the equivalent changelog revisions will be renumbered after the
3672 the equivalent changelog revisions will be renumbered after the
3673 strip.
3673 strip.
3674
3674
3675 So we truncate the revlog on the first of these revisions, and
3675 So we truncate the revlog on the first of these revisions, and
3676 trust that the caller has saved the revisions that shouldn't be
3676 trust that the caller has saved the revisions that shouldn't be
3677 removed and that it'll re-add them after this truncation.
3677 removed and that it'll re-add them after this truncation.
3678 """
3678 """
3679 if len(self) == 0:
3679 if len(self) == 0:
3680 return
3680 return
3681
3681
3682 rev, _ = self.getstrippoint(minlink)
3682 rev, _ = self.getstrippoint(minlink)
3683 if rev == len(self):
3683 if rev == len(self):
3684 return
3684 return
3685
3685
3686 # first truncate the files on disk
3686 # first truncate the files on disk
3687 data_end = self.start(rev)
3687 data_end = self.start(rev)
3688 if not self._inline:
3688 if not self._inline:
3689 transaction.add(self._datafile, data_end)
3689 transaction.add(self._datafile, data_end)
3690 end = rev * self.index.entry_size
3690 end = rev * self.index.entry_size
3691 else:
3691 else:
3692 end = data_end + (rev * self.index.entry_size)
3692 end = data_end + (rev * self.index.entry_size)
3693
3693
3694 if self._sidedatafile:
3694 if self._sidedatafile:
3695 sidedata_end = self.sidedata_cut_off(rev)
3695 sidedata_end = self.sidedata_cut_off(rev)
3696 transaction.add(self._sidedatafile, sidedata_end)
3696 transaction.add(self._sidedatafile, sidedata_end)
3697
3697
3698 transaction.add(self._indexfile, end)
3698 transaction.add(self._indexfile, end)
3699 if self._docket is not None:
3699 if self._docket is not None:
3700 # XXX we could, leverage the docket while stripping. However it is
3700 # XXX we could, leverage the docket while stripping. However it is
3701 # not powerfull enough at the time of this comment
3701 # not powerfull enough at the time of this comment
3702 self._docket.index_end = end
3702 self._docket.index_end = end
3703 self._docket.data_end = data_end
3703 self._docket.data_end = data_end
3704 self._docket.sidedata_end = sidedata_end
3704 self._docket.sidedata_end = sidedata_end
3705 self._docket.write(transaction, stripping=True)
3705 self._docket.write(transaction, stripping=True)
3706
3706
3707 # then reset internal state in memory to forget those revisions
3707 # then reset internal state in memory to forget those revisions
3708 self._chaininfocache = util.lrucachedict(500)
3708 self._chaininfocache = util.lrucachedict(500)
3709 self._inner.clear_cache()
3709 self._inner.clear_cache()
3710
3710
3711 del self.index[rev:-1]
3711 del self.index[rev:-1]
3712
3712
3713 def checksize(self):
3713 def checksize(self):
3714 """Check size of index and data files
3714 """Check size of index and data files
3715
3715
3716 return a (dd, di) tuple.
3716 return a (dd, di) tuple.
3717 - dd: extra bytes for the "data" file
3717 - dd: extra bytes for the "data" file
3718 - di: extra bytes for the "index" file
3718 - di: extra bytes for the "index" file
3719
3719
3720 A healthy revlog will return (0, 0).
3720 A healthy revlog will return (0, 0).
3721 """
3721 """
3722 expected = 0
3722 expected = 0
3723 if len(self):
3723 if len(self):
3724 expected = max(0, self.end(len(self) - 1))
3724 expected = max(0, self.end(len(self) - 1))
3725
3725
3726 try:
3726 try:
3727 with self._datafp() as f:
3727 with self._datafp() as f:
3728 f.seek(0, io.SEEK_END)
3728 f.seek(0, io.SEEK_END)
3729 actual = f.tell()
3729 actual = f.tell()
3730 dd = actual - expected
3730 dd = actual - expected
3731 except FileNotFoundError:
3731 except FileNotFoundError:
3732 dd = 0
3732 dd = 0
3733
3733
3734 try:
3734 try:
3735 f = self.opener(self._indexfile)
3735 f = self.opener(self._indexfile)
3736 f.seek(0, io.SEEK_END)
3736 f.seek(0, io.SEEK_END)
3737 actual = f.tell()
3737 actual = f.tell()
3738 f.close()
3738 f.close()
3739 s = self.index.entry_size
3739 s = self.index.entry_size
3740 i = max(0, actual // s)
3740 i = max(0, actual // s)
3741 di = actual - (i * s)
3741 di = actual - (i * s)
3742 if self._inline:
3742 if self._inline:
3743 databytes = 0
3743 databytes = 0
3744 for r in self:
3744 for r in self:
3745 databytes += max(0, self.length(r))
3745 databytes += max(0, self.length(r))
3746 dd = 0
3746 dd = 0
3747 di = actual - len(self) * s - databytes
3747 di = actual - len(self) * s - databytes
3748 except FileNotFoundError:
3748 except FileNotFoundError:
3749 di = 0
3749 di = 0
3750
3750
3751 return (dd, di)
3751 return (dd, di)
3752
3752
3753 def files(self):
3753 def files(self):
3754 """return list of files that compose this revlog"""
3754 """return list of files that compose this revlog"""
3755 res = [self._indexfile]
3755 res = [self._indexfile]
3756 if self._docket_file is None:
3756 if self._docket_file is None:
3757 if not self._inline:
3757 if not self._inline:
3758 res.append(self._datafile)
3758 res.append(self._datafile)
3759 else:
3759 else:
3760 res.append(self._docket_file)
3760 res.append(self._docket_file)
3761 res.extend(self._docket.old_index_filepaths(include_empty=False))
3761 res.extend(self._docket.old_index_filepaths(include_empty=False))
3762 if self._docket.data_end:
3762 if self._docket.data_end:
3763 res.append(self._datafile)
3763 res.append(self._datafile)
3764 res.extend(self._docket.old_data_filepaths(include_empty=False))
3764 res.extend(self._docket.old_data_filepaths(include_empty=False))
3765 if self._docket.sidedata_end:
3765 if self._docket.sidedata_end:
3766 res.append(self._sidedatafile)
3766 res.append(self._sidedatafile)
3767 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3767 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3768 return res
3768 return res
3769
3769
3770 def emitrevisions(
3770 def emitrevisions(
3771 self,
3771 self,
3772 nodes,
3772 nodes,
3773 nodesorder=None,
3773 nodesorder=None,
3774 revisiondata=False,
3774 revisiondata=False,
3775 assumehaveparentrevisions=False,
3775 assumehaveparentrevisions=False,
3776 deltamode=repository.CG_DELTAMODE_STD,
3776 deltamode=repository.CG_DELTAMODE_STD,
3777 sidedata_helpers=None,
3777 sidedata_helpers=None,
3778 debug_info=None,
3778 debug_info=None,
3779 ):
3779 ):
3780 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3780 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3781 raise error.ProgrammingError(
3781 raise error.ProgrammingError(
3782 b'unhandled value for nodesorder: %s' % nodesorder
3782 b'unhandled value for nodesorder: %s' % nodesorder
3783 )
3783 )
3784
3784
3785 if nodesorder is None and not self.delta_config.general_delta:
3785 if nodesorder is None and not self.delta_config.general_delta:
3786 nodesorder = b'storage'
3786 nodesorder = b'storage'
3787
3787
3788 if (
3788 if (
3789 not self._storedeltachains
3789 not self._storedeltachains
3790 and deltamode != repository.CG_DELTAMODE_PREV
3790 and deltamode != repository.CG_DELTAMODE_PREV
3791 ):
3791 ):
3792 deltamode = repository.CG_DELTAMODE_FULL
3792 deltamode = repository.CG_DELTAMODE_FULL
3793
3793
3794 return storageutil.emitrevisions(
3794 return storageutil.emitrevisions(
3795 self,
3795 self,
3796 nodes,
3796 nodes,
3797 nodesorder,
3797 nodesorder,
3798 revlogrevisiondelta,
3798 revlogrevisiondelta,
3799 deltaparentfn=self.deltaparent,
3799 deltaparentfn=self.deltaparent,
3800 candeltafn=self._candelta,
3800 candeltafn=self._candelta,
3801 rawsizefn=self.rawsize,
3801 rawsizefn=self.rawsize,
3802 revdifffn=self.revdiff,
3802 revdifffn=self.revdiff,
3803 flagsfn=self.flags,
3803 flagsfn=self.flags,
3804 deltamode=deltamode,
3804 deltamode=deltamode,
3805 revisiondata=revisiondata,
3805 revisiondata=revisiondata,
3806 assumehaveparentrevisions=assumehaveparentrevisions,
3806 assumehaveparentrevisions=assumehaveparentrevisions,
3807 sidedata_helpers=sidedata_helpers,
3807 sidedata_helpers=sidedata_helpers,
3808 debug_info=debug_info,
3808 debug_info=debug_info,
3809 )
3809 )
3810
3810
3811 DELTAREUSEALWAYS = b'always'
3811 DELTAREUSEALWAYS = b'always'
3812 DELTAREUSESAMEREVS = b'samerevs'
3812 DELTAREUSESAMEREVS = b'samerevs'
3813 DELTAREUSENEVER = b'never'
3813 DELTAREUSENEVER = b'never'
3814
3814
3815 DELTAREUSEFULLADD = b'fulladd'
3815 DELTAREUSEFULLADD = b'fulladd'
3816
3816
3817 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3817 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3818
3818
3819 def clone(
3819 def clone(
3820 self,
3820 self,
3821 tr,
3821 tr,
3822 destrevlog,
3822 destrevlog,
3823 addrevisioncb=None,
3823 addrevisioncb=None,
3824 deltareuse=DELTAREUSESAMEREVS,
3824 deltareuse=DELTAREUSESAMEREVS,
3825 forcedeltabothparents=None,
3825 forcedeltabothparents=None,
3826 sidedata_helpers=None,
3826 sidedata_helpers=None,
3827 ):
3827 ):
3828 """Copy this revlog to another, possibly with format changes.
3828 """Copy this revlog to another, possibly with format changes.
3829
3829
3830 The destination revlog will contain the same revisions and nodes.
3830 The destination revlog will contain the same revisions and nodes.
3831 However, it may not be bit-for-bit identical due to e.g. delta encoding
3831 However, it may not be bit-for-bit identical due to e.g. delta encoding
3832 differences.
3832 differences.
3833
3833
3834 The ``deltareuse`` argument control how deltas from the existing revlog
3834 The ``deltareuse`` argument control how deltas from the existing revlog
3835 are preserved in the destination revlog. The argument can have the
3835 are preserved in the destination revlog. The argument can have the
3836 following values:
3836 following values:
3837
3837
3838 DELTAREUSEALWAYS
3838 DELTAREUSEALWAYS
3839 Deltas will always be reused (if possible), even if the destination
3839 Deltas will always be reused (if possible), even if the destination
3840 revlog would not select the same revisions for the delta. This is the
3840 revlog would not select the same revisions for the delta. This is the
3841 fastest mode of operation.
3841 fastest mode of operation.
3842 DELTAREUSESAMEREVS
3842 DELTAREUSESAMEREVS
3843 Deltas will be reused if the destination revlog would pick the same
3843 Deltas will be reused if the destination revlog would pick the same
3844 revisions for the delta. This mode strikes a balance between speed
3844 revisions for the delta. This mode strikes a balance between speed
3845 and optimization.
3845 and optimization.
3846 DELTAREUSENEVER
3846 DELTAREUSENEVER
3847 Deltas will never be reused. This is the slowest mode of execution.
3847 Deltas will never be reused. This is the slowest mode of execution.
3848 This mode can be used to recompute deltas (e.g. if the diff/delta
3848 This mode can be used to recompute deltas (e.g. if the diff/delta
3849 algorithm changes).
3849 algorithm changes).
3850 DELTAREUSEFULLADD
3850 DELTAREUSEFULLADD
3851 Revision will be re-added as if their were new content. This is
3851 Revision will be re-added as if their were new content. This is
3852 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3852 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3853 eg: large file detection and handling.
3853 eg: large file detection and handling.
3854
3854
3855 Delta computation can be slow, so the choice of delta reuse policy can
3855 Delta computation can be slow, so the choice of delta reuse policy can
3856 significantly affect run time.
3856 significantly affect run time.
3857
3857
3858 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3858 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3859 two extremes. Deltas will be reused if they are appropriate. But if the
3859 two extremes. Deltas will be reused if they are appropriate. But if the
3860 delta could choose a better revision, it will do so. This means if you
3860 delta could choose a better revision, it will do so. This means if you
3861 are converting a non-generaldelta revlog to a generaldelta revlog,
3861 are converting a non-generaldelta revlog to a generaldelta revlog,
3862 deltas will be recomputed if the delta's parent isn't a parent of the
3862 deltas will be recomputed if the delta's parent isn't a parent of the
3863 revision.
3863 revision.
3864
3864
3865 In addition to the delta policy, the ``forcedeltabothparents``
3865 In addition to the delta policy, the ``forcedeltabothparents``
3866 argument controls whether to force compute deltas against both parents
3866 argument controls whether to force compute deltas against both parents
3867 for merges. By default, the current default is used.
3867 for merges. By default, the current default is used.
3868
3868
3869 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3869 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3870 `sidedata_helpers`.
3870 `sidedata_helpers`.
3871 """
3871 """
3872 if deltareuse not in self.DELTAREUSEALL:
3872 if deltareuse not in self.DELTAREUSEALL:
3873 raise ValueError(
3873 raise ValueError(
3874 _(b'value for deltareuse invalid: %s') % deltareuse
3874 _(b'value for deltareuse invalid: %s') % deltareuse
3875 )
3875 )
3876
3876
3877 if len(destrevlog):
3877 if len(destrevlog):
3878 raise ValueError(_(b'destination revlog is not empty'))
3878 raise ValueError(_(b'destination revlog is not empty'))
3879
3879
3880 if getattr(self, 'filteredrevs', None):
3880 if getattr(self, 'filteredrevs', None):
3881 raise ValueError(_(b'source revlog has filtered revisions'))
3881 raise ValueError(_(b'source revlog has filtered revisions'))
3882 if getattr(destrevlog, 'filteredrevs', None):
3882 if getattr(destrevlog, 'filteredrevs', None):
3883 raise ValueError(_(b'destination revlog has filtered revisions'))
3883 raise ValueError(_(b'destination revlog has filtered revisions'))
3884
3884
3885 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3885 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3886 # if possible.
3886 # if possible.
3887 old_delta_config = destrevlog.delta_config
3887 old_delta_config = destrevlog.delta_config
3888 destrevlog.delta_config = destrevlog.delta_config.copy()
3888 destrevlog.delta_config = destrevlog.delta_config.copy()
3889
3889
3890 try:
3890 try:
3891 if deltareuse == self.DELTAREUSEALWAYS:
3891 if deltareuse == self.DELTAREUSEALWAYS:
3892 destrevlog.delta_config.lazy_delta_base = True
3892 destrevlog.delta_config.lazy_delta_base = True
3893 destrevlog.delta_config.lazy_delta = True
3893 destrevlog.delta_config.lazy_delta = True
3894 elif deltareuse == self.DELTAREUSESAMEREVS:
3894 elif deltareuse == self.DELTAREUSESAMEREVS:
3895 destrevlog.delta_config.lazy_delta_base = False
3895 destrevlog.delta_config.lazy_delta_base = False
3896 destrevlog.delta_config.lazy_delta = True
3896 destrevlog.delta_config.lazy_delta = True
3897 elif deltareuse == self.DELTAREUSENEVER:
3897 elif deltareuse == self.DELTAREUSENEVER:
3898 destrevlog.delta_config.lazy_delta_base = False
3898 destrevlog.delta_config.lazy_delta_base = False
3899 destrevlog.delta_config.lazy_delta = False
3899 destrevlog.delta_config.lazy_delta = False
3900
3900
3901 delta_both_parents = (
3901 delta_both_parents = (
3902 forcedeltabothparents or old_delta_config.delta_both_parents
3902 forcedeltabothparents or old_delta_config.delta_both_parents
3903 )
3903 )
3904 destrevlog.delta_config.delta_both_parents = delta_both_parents
3904 destrevlog.delta_config.delta_both_parents = delta_both_parents
3905
3905
3906 with self.reading(), destrevlog._writing(tr):
3906 with self.reading(), destrevlog._writing(tr):
3907 self._clone(
3907 self._clone(
3908 tr,
3908 tr,
3909 destrevlog,
3909 destrevlog,
3910 addrevisioncb,
3910 addrevisioncb,
3911 deltareuse,
3911 deltareuse,
3912 forcedeltabothparents,
3912 forcedeltabothparents,
3913 sidedata_helpers,
3913 sidedata_helpers,
3914 )
3914 )
3915
3915
3916 finally:
3916 finally:
3917 destrevlog.delta_config = old_delta_config
3917 destrevlog.delta_config = old_delta_config
3918
3918
3919 def _clone(
3919 def _clone(
3920 self,
3920 self,
3921 tr,
3921 tr,
3922 destrevlog,
3922 destrevlog,
3923 addrevisioncb,
3923 addrevisioncb,
3924 deltareuse,
3924 deltareuse,
3925 forcedeltabothparents,
3925 forcedeltabothparents,
3926 sidedata_helpers,
3926 sidedata_helpers,
3927 ):
3927 ):
3928 """perform the core duty of `revlog.clone` after parameter processing"""
3928 """perform the core duty of `revlog.clone` after parameter processing"""
3929 write_debug = None
3929 write_debug = None
3930 if self.delta_config.debug_delta:
3930 if self.delta_config.debug_delta:
3931 write_debug = tr._report
3931 write_debug = tr._report
3932 deltacomputer = deltautil.deltacomputer(
3932 deltacomputer = deltautil.deltacomputer(
3933 destrevlog,
3933 destrevlog,
3934 write_debug=write_debug,
3934 write_debug=write_debug,
3935 )
3935 )
3936 index = self.index
3936 index = self.index
3937 for rev in self:
3937 for rev in self:
3938 entry = index[rev]
3938 entry = index[rev]
3939
3939
3940 # Some classes override linkrev to take filtered revs into
3940 # Some classes override linkrev to take filtered revs into
3941 # account. Use raw entry from index.
3941 # account. Use raw entry from index.
3942 flags = entry[0] & 0xFFFF
3942 flags = entry[0] & 0xFFFF
3943 linkrev = entry[4]
3943 linkrev = entry[4]
3944 p1 = index[entry[5]][7]
3944 p1 = index[entry[5]][7]
3945 p2 = index[entry[6]][7]
3945 p2 = index[entry[6]][7]
3946 node = entry[7]
3946 node = entry[7]
3947
3947
3948 # (Possibly) reuse the delta from the revlog if allowed and
3948 # (Possibly) reuse the delta from the revlog if allowed and
3949 # the revlog chunk is a delta.
3949 # the revlog chunk is a delta.
3950 cachedelta = None
3950 cachedelta = None
3951 rawtext = None
3951 rawtext = None
3952 if deltareuse == self.DELTAREUSEFULLADD:
3952 if deltareuse == self.DELTAREUSEFULLADD:
3953 text = self._revisiondata(rev)
3953 text = self._revisiondata(rev)
3954 sidedata = self.sidedata(rev)
3954 sidedata = self.sidedata(rev)
3955
3955
3956 if sidedata_helpers is not None:
3956 if sidedata_helpers is not None:
3957 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3957 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3958 self, sidedata_helpers, sidedata, rev
3958 self, sidedata_helpers, sidedata, rev
3959 )
3959 )
3960 flags = flags | new_flags[0] & ~new_flags[1]
3960 flags = flags | new_flags[0] & ~new_flags[1]
3961
3961
3962 destrevlog.addrevision(
3962 destrevlog.addrevision(
3963 text,
3963 text,
3964 tr,
3964 tr,
3965 linkrev,
3965 linkrev,
3966 p1,
3966 p1,
3967 p2,
3967 p2,
3968 cachedelta=cachedelta,
3968 cachedelta=cachedelta,
3969 node=node,
3969 node=node,
3970 flags=flags,
3970 flags=flags,
3971 deltacomputer=deltacomputer,
3971 deltacomputer=deltacomputer,
3972 sidedata=sidedata,
3972 sidedata=sidedata,
3973 )
3973 )
3974 else:
3974 else:
3975 if destrevlog.delta_config.lazy_delta:
3975 if destrevlog.delta_config.lazy_delta:
3976 dp = self.deltaparent(rev)
3976 dp = self.deltaparent(rev)
3977 if dp != nullrev:
3977 if dp != nullrev:
3978 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3978 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3979
3979
3980 sidedata = None
3980 sidedata = None
3981 if not cachedelta:
3981 if not cachedelta:
3982 try:
3982 try:
3983 rawtext = self._revisiondata(rev)
3983 rawtext = self._revisiondata(rev)
3984 except error.CensoredNodeError as censored:
3984 except error.CensoredNodeError as censored:
3985 assert flags & REVIDX_ISCENSORED
3985 assert flags & REVIDX_ISCENSORED
3986 rawtext = censored.tombstone
3986 rawtext = censored.tombstone
3987 sidedata = self.sidedata(rev)
3987 sidedata = self.sidedata(rev)
3988 if sidedata is None:
3988 if sidedata is None:
3989 sidedata = self.sidedata(rev)
3989 sidedata = self.sidedata(rev)
3990
3990
3991 if sidedata_helpers is not None:
3991 if sidedata_helpers is not None:
3992 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3992 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3993 self, sidedata_helpers, sidedata, rev
3993 self, sidedata_helpers, sidedata, rev
3994 )
3994 )
3995 flags = flags | new_flags[0] & ~new_flags[1]
3995 flags = flags | new_flags[0] & ~new_flags[1]
3996
3996
3997 destrevlog._addrevision(
3997 destrevlog._addrevision(
3998 node,
3998 node,
3999 rawtext,
3999 rawtext,
4000 tr,
4000 tr,
4001 linkrev,
4001 linkrev,
4002 p1,
4002 p1,
4003 p2,
4003 p2,
4004 flags,
4004 flags,
4005 cachedelta,
4005 cachedelta,
4006 deltacomputer=deltacomputer,
4006 deltacomputer=deltacomputer,
4007 sidedata=sidedata,
4007 sidedata=sidedata,
4008 )
4008 )
4009
4009
4010 if addrevisioncb:
4010 if addrevisioncb:
4011 addrevisioncb(self, rev, node)
4011 addrevisioncb(self, rev, node)
4012
4012
4013 def censorrevision(self, tr, censor_nodes, tombstone=b''):
4013 def censorrevision(self, tr, censor_nodes, tombstone=b''):
4014 if self._format_version == REVLOGV0:
4014 if self._format_version == REVLOGV0:
4015 raise error.RevlogError(
4015 raise error.RevlogError(
4016 _(b'cannot censor with version %d revlogs')
4016 _(b'cannot censor with version %d revlogs')
4017 % self._format_version
4017 % self._format_version
4018 )
4018 )
4019 elif self._format_version == REVLOGV1:
4019 elif self._format_version == REVLOGV1:
4020 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
4020 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
4021 else:
4021 else:
4022 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
4022 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
4023
4023
4024 def verifyintegrity(self, state) -> Iterable[revlogproblem]:
4024 def verifyintegrity(self, state) -> Iterable[repository.iverifyproblem]:
4025 """Verifies the integrity of the revlog.
4025 """Verifies the integrity of the revlog.
4026
4026
4027 Yields ``revlogproblem`` instances describing problems that are
4027 Yields ``revlogproblem`` instances describing problems that are
4028 found.
4028 found.
4029 """
4029 """
4030 dd, di = self.checksize()
4030 dd, di = self.checksize()
4031 if dd:
4031 if dd:
4032 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
4032 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
4033 if di:
4033 if di:
4034 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
4034 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
4035
4035
4036 version = self._format_version
4036 version = self._format_version
4037
4037
4038 # The verifier tells us what version revlog we should be.
4038 # The verifier tells us what version revlog we should be.
4039 if version != state[b'expectedversion']:
4039 if version != state[b'expectedversion']:
4040 yield revlogproblem(
4040 yield revlogproblem(
4041 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
4041 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
4042 % (self.display_id, version, state[b'expectedversion'])
4042 % (self.display_id, version, state[b'expectedversion'])
4043 )
4043 )
4044
4044
4045 state[b'skipread'] = set()
4045 state[b'skipread'] = set()
4046 state[b'safe_renamed'] = set()
4046 state[b'safe_renamed'] = set()
4047
4047
4048 for rev in self:
4048 for rev in self:
4049 node = self.node(rev)
4049 node = self.node(rev)
4050
4050
4051 # Verify contents. 4 cases to care about:
4051 # Verify contents. 4 cases to care about:
4052 #
4052 #
4053 # common: the most common case
4053 # common: the most common case
4054 # rename: with a rename
4054 # rename: with a rename
4055 # meta: file content starts with b'\1\n', the metadata
4055 # meta: file content starts with b'\1\n', the metadata
4056 # header defined in filelog.py, but without a rename
4056 # header defined in filelog.py, but without a rename
4057 # ext: content stored externally
4057 # ext: content stored externally
4058 #
4058 #
4059 # More formally, their differences are shown below:
4059 # More formally, their differences are shown below:
4060 #
4060 #
4061 # | common | rename | meta | ext
4061 # | common | rename | meta | ext
4062 # -------------------------------------------------------
4062 # -------------------------------------------------------
4063 # flags() | 0 | 0 | 0 | not 0
4063 # flags() | 0 | 0 | 0 | not 0
4064 # renamed() | False | True | False | ?
4064 # renamed() | False | True | False | ?
4065 # rawtext[0:2]=='\1\n'| False | True | True | ?
4065 # rawtext[0:2]=='\1\n'| False | True | True | ?
4066 #
4066 #
4067 # "rawtext" means the raw text stored in revlog data, which
4067 # "rawtext" means the raw text stored in revlog data, which
4068 # could be retrieved by "rawdata(rev)". "text"
4068 # could be retrieved by "rawdata(rev)". "text"
4069 # mentioned below is "revision(rev)".
4069 # mentioned below is "revision(rev)".
4070 #
4070 #
4071 # There are 3 different lengths stored physically:
4071 # There are 3 different lengths stored physically:
4072 # 1. L1: rawsize, stored in revlog index
4072 # 1. L1: rawsize, stored in revlog index
4073 # 2. L2: len(rawtext), stored in revlog data
4073 # 2. L2: len(rawtext), stored in revlog data
4074 # 3. L3: len(text), stored in revlog data if flags==0, or
4074 # 3. L3: len(text), stored in revlog data if flags==0, or
4075 # possibly somewhere else if flags!=0
4075 # possibly somewhere else if flags!=0
4076 #
4076 #
4077 # L1 should be equal to L2. L3 could be different from them.
4077 # L1 should be equal to L2. L3 could be different from them.
4078 # "text" may or may not affect commit hash depending on flag
4078 # "text" may or may not affect commit hash depending on flag
4079 # processors (see flagutil.addflagprocessor).
4079 # processors (see flagutil.addflagprocessor).
4080 #
4080 #
4081 # | common | rename | meta | ext
4081 # | common | rename | meta | ext
4082 # -------------------------------------------------
4082 # -------------------------------------------------
4083 # rawsize() | L1 | L1 | L1 | L1
4083 # rawsize() | L1 | L1 | L1 | L1
4084 # size() | L1 | L2-LM | L1(*) | L1 (?)
4084 # size() | L1 | L2-LM | L1(*) | L1 (?)
4085 # len(rawtext) | L2 | L2 | L2 | L2
4085 # len(rawtext) | L2 | L2 | L2 | L2
4086 # len(text) | L2 | L2 | L2 | L3
4086 # len(text) | L2 | L2 | L2 | L3
4087 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
4087 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
4088 #
4088 #
4089 # LM: length of metadata, depending on rawtext
4089 # LM: length of metadata, depending on rawtext
4090 # (*): not ideal, see comment in filelog.size
4090 # (*): not ideal, see comment in filelog.size
4091 # (?): could be "- len(meta)" if the resolved content has
4091 # (?): could be "- len(meta)" if the resolved content has
4092 # rename metadata
4092 # rename metadata
4093 #
4093 #
4094 # Checks needed to be done:
4094 # Checks needed to be done:
4095 # 1. length check: L1 == L2, in all cases.
4095 # 1. length check: L1 == L2, in all cases.
4096 # 2. hash check: depending on flag processor, we may need to
4096 # 2. hash check: depending on flag processor, we may need to
4097 # use either "text" (external), or "rawtext" (in revlog).
4097 # use either "text" (external), or "rawtext" (in revlog).
4098
4098
4099 try:
4099 try:
4100 skipflags = state.get(b'skipflags', 0)
4100 skipflags = state.get(b'skipflags', 0)
4101 if skipflags:
4101 if skipflags:
4102 skipflags &= self.flags(rev)
4102 skipflags &= self.flags(rev)
4103
4103
4104 _verify_revision(self, skipflags, state, node)
4104 _verify_revision(self, skipflags, state, node)
4105
4105
4106 l1 = self.rawsize(rev)
4106 l1 = self.rawsize(rev)
4107 l2 = len(self.rawdata(node))
4107 l2 = len(self.rawdata(node))
4108
4108
4109 if l1 != l2:
4109 if l1 != l2:
4110 yield revlogproblem(
4110 yield revlogproblem(
4111 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
4111 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
4112 node=node,
4112 node=node,
4113 )
4113 )
4114
4114
4115 except error.CensoredNodeError:
4115 except error.CensoredNodeError:
4116 if state[b'erroroncensored']:
4116 if state[b'erroroncensored']:
4117 yield revlogproblem(
4117 yield revlogproblem(
4118 error=_(b'censored file data'), node=node
4118 error=_(b'censored file data'), node=node
4119 )
4119 )
4120 state[b'skipread'].add(node)
4120 state[b'skipread'].add(node)
4121 except Exception as e:
4121 except Exception as e:
4122 yield revlogproblem(
4122 yield revlogproblem(
4123 error=_(b'unpacking %s: %s')
4123 error=_(b'unpacking %s: %s')
4124 % (short(node), stringutil.forcebytestr(e)),
4124 % (short(node), stringutil.forcebytestr(e)),
4125 node=node,
4125 node=node,
4126 )
4126 )
4127 state[b'skipread'].add(node)
4127 state[b'skipread'].add(node)
4128
4128
4129 def storageinfo(
4129 def storageinfo(
4130 self,
4130 self,
4131 exclusivefiles=False,
4131 exclusivefiles=False,
4132 sharedfiles=False,
4132 sharedfiles=False,
4133 revisionscount=False,
4133 revisionscount=False,
4134 trackedsize=False,
4134 trackedsize=False,
4135 storedsize=False,
4135 storedsize=False,
4136 ):
4136 ):
4137 d = {}
4137 d = {}
4138
4138
4139 if exclusivefiles:
4139 if exclusivefiles:
4140 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4140 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4141 if not self._inline:
4141 if not self._inline:
4142 d[b'exclusivefiles'].append((self.opener, self._datafile))
4142 d[b'exclusivefiles'].append((self.opener, self._datafile))
4143
4143
4144 if sharedfiles:
4144 if sharedfiles:
4145 d[b'sharedfiles'] = []
4145 d[b'sharedfiles'] = []
4146
4146
4147 if revisionscount:
4147 if revisionscount:
4148 d[b'revisionscount'] = len(self)
4148 d[b'revisionscount'] = len(self)
4149
4149
4150 if trackedsize:
4150 if trackedsize:
4151 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4151 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4152
4152
4153 if storedsize:
4153 if storedsize:
4154 d[b'storedsize'] = sum(
4154 d[b'storedsize'] = sum(
4155 self.opener.stat(path).st_size for path in self.files()
4155 self.opener.stat(path).st_size for path in self.files()
4156 )
4156 )
4157
4157
4158 return d
4158 return d
4159
4159
4160 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4160 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4161 if not self.feature_config.has_side_data:
4161 if not self.feature_config.has_side_data:
4162 return
4162 return
4163 # revlog formats with sidedata support does not support inline
4163 # revlog formats with sidedata support does not support inline
4164 assert not self._inline
4164 assert not self._inline
4165 if not helpers[1] and not helpers[2]:
4165 if not helpers[1] and not helpers[2]:
4166 # Nothing to generate or remove
4166 # Nothing to generate or remove
4167 return
4167 return
4168
4168
4169 new_entries = []
4169 new_entries = []
4170 # append the new sidedata
4170 # append the new sidedata
4171 with self._writing(transaction):
4171 with self._writing(transaction):
4172 ifh, dfh, sdfh = self._inner._writinghandles
4172 ifh, dfh, sdfh = self._inner._writinghandles
4173 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4173 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4174
4174
4175 current_offset = sdfh.tell()
4175 current_offset = sdfh.tell()
4176 for rev in range(startrev, endrev + 1):
4176 for rev in range(startrev, endrev + 1):
4177 entry = self.index[rev]
4177 entry = self.index[rev]
4178 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4178 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4179 store=self,
4179 store=self,
4180 sidedata_helpers=helpers,
4180 sidedata_helpers=helpers,
4181 sidedata={},
4181 sidedata={},
4182 rev=rev,
4182 rev=rev,
4183 )
4183 )
4184
4184
4185 serialized_sidedata = sidedatautil.serialize_sidedata(
4185 serialized_sidedata = sidedatautil.serialize_sidedata(
4186 new_sidedata
4186 new_sidedata
4187 )
4187 )
4188
4188
4189 sidedata_compression_mode = COMP_MODE_INLINE
4189 sidedata_compression_mode = COMP_MODE_INLINE
4190 if serialized_sidedata and self.feature_config.has_side_data:
4190 if serialized_sidedata and self.feature_config.has_side_data:
4191 sidedata_compression_mode = COMP_MODE_PLAIN
4191 sidedata_compression_mode = COMP_MODE_PLAIN
4192 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4192 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4193 if (
4193 if (
4194 h != b'u'
4194 h != b'u'
4195 and comp_sidedata[0] != b'\0'
4195 and comp_sidedata[0] != b'\0'
4196 and len(comp_sidedata) < len(serialized_sidedata)
4196 and len(comp_sidedata) < len(serialized_sidedata)
4197 ):
4197 ):
4198 assert not h
4198 assert not h
4199 if (
4199 if (
4200 comp_sidedata[0]
4200 comp_sidedata[0]
4201 == self._docket.default_compression_header
4201 == self._docket.default_compression_header
4202 ):
4202 ):
4203 sidedata_compression_mode = COMP_MODE_DEFAULT
4203 sidedata_compression_mode = COMP_MODE_DEFAULT
4204 serialized_sidedata = comp_sidedata
4204 serialized_sidedata = comp_sidedata
4205 else:
4205 else:
4206 sidedata_compression_mode = COMP_MODE_INLINE
4206 sidedata_compression_mode = COMP_MODE_INLINE
4207 serialized_sidedata = comp_sidedata
4207 serialized_sidedata = comp_sidedata
4208 if entry[8] != 0 or entry[9] != 0:
4208 if entry[8] != 0 or entry[9] != 0:
4209 # rewriting entries that already have sidedata is not
4209 # rewriting entries that already have sidedata is not
4210 # supported yet, because it introduces garbage data in the
4210 # supported yet, because it introduces garbage data in the
4211 # revlog.
4211 # revlog.
4212 msg = b"rewriting existing sidedata is not supported yet"
4212 msg = b"rewriting existing sidedata is not supported yet"
4213 raise error.Abort(msg)
4213 raise error.Abort(msg)
4214
4214
4215 # Apply (potential) flags to add and to remove after running
4215 # Apply (potential) flags to add and to remove after running
4216 # the sidedata helpers
4216 # the sidedata helpers
4217 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4217 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4218 entry_update = (
4218 entry_update = (
4219 current_offset,
4219 current_offset,
4220 len(serialized_sidedata),
4220 len(serialized_sidedata),
4221 new_offset_flags,
4221 new_offset_flags,
4222 sidedata_compression_mode,
4222 sidedata_compression_mode,
4223 )
4223 )
4224
4224
4225 # the sidedata computation might have move the file cursors around
4225 # the sidedata computation might have move the file cursors around
4226 sdfh.seek(current_offset, os.SEEK_SET)
4226 sdfh.seek(current_offset, os.SEEK_SET)
4227 sdfh.write(serialized_sidedata)
4227 sdfh.write(serialized_sidedata)
4228 new_entries.append(entry_update)
4228 new_entries.append(entry_update)
4229 current_offset += len(serialized_sidedata)
4229 current_offset += len(serialized_sidedata)
4230 self._docket.sidedata_end = sdfh.tell()
4230 self._docket.sidedata_end = sdfh.tell()
4231
4231
4232 # rewrite the new index entries
4232 # rewrite the new index entries
4233 ifh.seek(startrev * self.index.entry_size)
4233 ifh.seek(startrev * self.index.entry_size)
4234 for i, e in enumerate(new_entries):
4234 for i, e in enumerate(new_entries):
4235 rev = startrev + i
4235 rev = startrev + i
4236 self.index.replace_sidedata_info(
4236 self.index.replace_sidedata_info(
4237 rev, *e
4237 rev, *e
4238 ) # pytype: disable=attribute-error
4238 ) # pytype: disable=attribute-error
4239 packed = self.index.entry_binary(rev)
4239 packed = self.index.entry_binary(rev)
4240 if rev == 0 and self._docket is None:
4240 if rev == 0 and self._docket is None:
4241 header = self._format_flags | self._format_version
4241 header = self._format_flags | self._format_version
4242 header = self.index.pack_header(header)
4242 header = self.index.pack_header(header)
4243 packed = header + packed
4243 packed = header + packed
4244 ifh.write(packed)
4244 ifh.write(packed)
@@ -1,744 +1,747
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 #
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 # To use this with the test suite:
8 # To use this with the test suite:
9 #
9 #
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12
12
13
13
14 import stat
14 import stat
15
15
16 from typing import (
17 Optional,
18 )
19
16 from mercurial.i18n import _
20 from mercurial.i18n import _
17 from mercurial.node import (
21 from mercurial.node import (
18 bin,
22 bin,
19 hex,
23 hex,
20 nullrev,
24 nullrev,
21 )
25 )
22 from mercurial.thirdparty import attr
26 from mercurial.thirdparty import attr
23 from mercurial import (
27 from mercurial import (
24 ancestor,
28 ancestor,
25 bundlerepo,
29 bundlerepo,
26 error,
30 error,
27 extensions,
31 extensions,
28 localrepo,
32 localrepo,
29 mdiff,
33 mdiff,
30 pycompat,
34 pycompat,
31 revlog,
35 revlog,
32 store,
36 store,
33 verify,
37 verify,
34 )
38 )
35 from mercurial.interfaces import (
39 from mercurial.interfaces import (
36 repository,
40 repository,
37 util as interfaceutil,
41 util as interfaceutil,
38 )
42 )
39 from mercurial.utils import (
43 from mercurial.utils import (
40 cborutil,
44 cborutil,
41 storageutil,
45 storageutil,
42 )
46 )
43 from mercurial.revlogutils import flagutil
47 from mercurial.revlogutils import flagutil
44
48
45 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
49 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
46 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
50 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
47 # be specifying the version(s) of Mercurial they are tested with, or
51 # be specifying the version(s) of Mercurial they are tested with, or
48 # leave the attribute unspecified.
52 # leave the attribute unspecified.
49 testedwith = b'ships-with-hg-core'
53 testedwith = b'ships-with-hg-core'
50
54
51 REQUIREMENT = b'testonly-simplestore'
55 REQUIREMENT = b'testonly-simplestore'
52
56
53
57
54 def validatenode(node):
58 def validatenode(node):
55 if isinstance(node, int):
59 if isinstance(node, int):
56 raise ValueError('expected node; got int')
60 raise ValueError('expected node; got int')
57
61
58 if len(node) != 20:
62 if len(node) != 20:
59 raise ValueError('expected 20 byte node')
63 raise ValueError('expected 20 byte node')
60
64
61
65
62 def validaterev(rev):
66 def validaterev(rev):
63 if not isinstance(rev, int):
67 if not isinstance(rev, int):
64 raise ValueError('expected int')
68 raise ValueError('expected int')
65
69
66
70
67 class simplestoreerror(error.StorageError):
71 class simplestoreerror(error.StorageError):
68 pass
72 pass
69
73
70
74
71 @interfaceutil.implementer(repository.irevisiondelta)
75 @interfaceutil.implementer(repository.irevisiondelta)
72 @attr.s(slots=True)
76 @attr.s(slots=True)
73 class simplestorerevisiondelta:
77 class simplestorerevisiondelta:
74 node = attr.ib()
78 node = attr.ib()
75 p1node = attr.ib()
79 p1node = attr.ib()
76 p2node = attr.ib()
80 p2node = attr.ib()
77 basenode = attr.ib()
81 basenode = attr.ib()
78 flags = attr.ib()
82 flags = attr.ib()
79 baserevisionsize = attr.ib()
83 baserevisionsize = attr.ib()
80 revision = attr.ib()
84 revision = attr.ib()
81 delta = attr.ib()
85 delta = attr.ib()
82 linknode = attr.ib(default=None)
86 linknode = attr.ib(default=None)
83
87
84
88
85 @interfaceutil.implementer(repository.iverifyproblem)
86 @attr.s(frozen=True)
89 @attr.s(frozen=True)
87 class simplefilestoreproblem:
90 class simplefilestoreproblem(repository.iverifyproblem):
88 warning = attr.ib(default=None)
91 warning = attr.ib(default=None, type=Optional[bytes])
89 error = attr.ib(default=None)
92 error = attr.ib(default=None, type=Optional[bytes])
90 node = attr.ib(default=None)
93 node = attr.ib(default=None, type=Optional[bytes])
91
94
92
95
93 @interfaceutil.implementer(repository.ifilestorage)
96 @interfaceutil.implementer(repository.ifilestorage)
94 class filestorage:
97 class filestorage:
95 """Implements storage for a tracked path.
98 """Implements storage for a tracked path.
96
99
97 Data is stored in the VFS in a directory corresponding to the tracked
100 Data is stored in the VFS in a directory corresponding to the tracked
98 path.
101 path.
99
102
100 Index data is stored in an ``index`` file using CBOR.
103 Index data is stored in an ``index`` file using CBOR.
101
104
102 Fulltext data is stored in files having names of the node.
105 Fulltext data is stored in files having names of the node.
103 """
106 """
104
107
105 _flagserrorclass = simplestoreerror
108 _flagserrorclass = simplestoreerror
106
109
107 def __init__(self, repo, svfs, path):
110 def __init__(self, repo, svfs, path):
108 self.nullid = repo.nullid
111 self.nullid = repo.nullid
109 self._repo = repo
112 self._repo = repo
110 self._svfs = svfs
113 self._svfs = svfs
111 self._path = path
114 self._path = path
112
115
113 self._storepath = b'/'.join([b'data', path])
116 self._storepath = b'/'.join([b'data', path])
114 self._indexpath = b'/'.join([self._storepath, b'index'])
117 self._indexpath = b'/'.join([self._storepath, b'index'])
115
118
116 indexdata = self._svfs.tryread(self._indexpath)
119 indexdata = self._svfs.tryread(self._indexpath)
117 if indexdata:
120 if indexdata:
118 indexdata = cborutil.decodeall(indexdata)
121 indexdata = cborutil.decodeall(indexdata)
119
122
120 self._indexdata = indexdata or []
123 self._indexdata = indexdata or []
121 self._indexbynode = {}
124 self._indexbynode = {}
122 self._indexbyrev = {}
125 self._indexbyrev = {}
123 self._index = []
126 self._index = []
124 self._refreshindex()
127 self._refreshindex()
125
128
126 self._flagprocessors = dict(flagutil.flagprocessors)
129 self._flagprocessors = dict(flagutil.flagprocessors)
127
130
128 def _refreshindex(self):
131 def _refreshindex(self):
129 self._indexbynode.clear()
132 self._indexbynode.clear()
130 self._indexbyrev.clear()
133 self._indexbyrev.clear()
131 self._index = []
134 self._index = []
132
135
133 for i, entry in enumerate(self._indexdata):
136 for i, entry in enumerate(self._indexdata):
134 self._indexbynode[entry[b'node']] = entry
137 self._indexbynode[entry[b'node']] = entry
135 self._indexbyrev[i] = entry
138 self._indexbyrev[i] = entry
136
139
137 self._indexbynode[self._repo.nullid] = {
140 self._indexbynode[self._repo.nullid] = {
138 b'node': self._repo.nullid,
141 b'node': self._repo.nullid,
139 b'p1': self._repo.nullid,
142 b'p1': self._repo.nullid,
140 b'p2': self._repo.nullid,
143 b'p2': self._repo.nullid,
141 b'linkrev': nullrev,
144 b'linkrev': nullrev,
142 b'flags': 0,
145 b'flags': 0,
143 }
146 }
144
147
145 self._indexbyrev[nullrev] = {
148 self._indexbyrev[nullrev] = {
146 b'node': self._repo.nullid,
149 b'node': self._repo.nullid,
147 b'p1': self._repo.nullid,
150 b'p1': self._repo.nullid,
148 b'p2': self._repo.nullid,
151 b'p2': self._repo.nullid,
149 b'linkrev': nullrev,
152 b'linkrev': nullrev,
150 b'flags': 0,
153 b'flags': 0,
151 }
154 }
152
155
153 for i, entry in enumerate(self._indexdata):
156 for i, entry in enumerate(self._indexdata):
154 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
157 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
155
158
156 # start, length, rawsize, chainbase, linkrev, p1, p2, node
159 # start, length, rawsize, chainbase, linkrev, p1, p2, node
157 self._index.append(
160 self._index.append(
158 (0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev, entry[b'node'])
161 (0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev, entry[b'node'])
159 )
162 )
160
163
161 self._index.append((0, 0, 0, -1, -1, -1, -1, self._repo.nullid))
164 self._index.append((0, 0, 0, -1, -1, -1, -1, self._repo.nullid))
162
165
163 def __len__(self):
166 def __len__(self):
164 return len(self._indexdata)
167 return len(self._indexdata)
165
168
166 def __iter__(self):
169 def __iter__(self):
167 return iter(range(len(self)))
170 return iter(range(len(self)))
168
171
169 def revs(self, start=0, stop=None):
172 def revs(self, start=0, stop=None):
170 step = 1
173 step = 1
171 if stop is not None:
174 if stop is not None:
172 if start > stop:
175 if start > stop:
173 step = -1
176 step = -1
174
177
175 stop += step
178 stop += step
176 else:
179 else:
177 stop = len(self)
180 stop = len(self)
178
181
179 return range(start, stop, step)
182 return range(start, stop, step)
180
183
181 def parents(self, node):
184 def parents(self, node):
182 validatenode(node)
185 validatenode(node)
183
186
184 if node not in self._indexbynode:
187 if node not in self._indexbynode:
185 raise KeyError('unknown node')
188 raise KeyError('unknown node')
186
189
187 entry = self._indexbynode[node]
190 entry = self._indexbynode[node]
188
191
189 return entry[b'p1'], entry[b'p2']
192 return entry[b'p1'], entry[b'p2']
190
193
191 def parentrevs(self, rev):
194 def parentrevs(self, rev):
192 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
195 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
193 return self.rev(p1), self.rev(p2)
196 return self.rev(p1), self.rev(p2)
194
197
195 def rev(self, node):
198 def rev(self, node):
196 validatenode(node)
199 validatenode(node)
197
200
198 try:
201 try:
199 self._indexbynode[node]
202 self._indexbynode[node]
200 except KeyError:
203 except KeyError:
201 raise error.LookupError(node, self._indexpath, _('no node'))
204 raise error.LookupError(node, self._indexpath, _('no node'))
202
205
203 for rev, entry in self._indexbyrev.items():
206 for rev, entry in self._indexbyrev.items():
204 if entry[b'node'] == node:
207 if entry[b'node'] == node:
205 return rev
208 return rev
206
209
207 raise error.ProgrammingError(b'this should not occur')
210 raise error.ProgrammingError(b'this should not occur')
208
211
209 def node(self, rev):
212 def node(self, rev):
210 validaterev(rev)
213 validaterev(rev)
211
214
212 return self._indexbyrev[rev][b'node']
215 return self._indexbyrev[rev][b'node']
213
216
214 def hasnode(self, node):
217 def hasnode(self, node):
215 validatenode(node)
218 validatenode(node)
216 return node in self._indexbynode
219 return node in self._indexbynode
217
220
218 def censorrevision(self, tr, censornode, tombstone=b''):
221 def censorrevision(self, tr, censornode, tombstone=b''):
219 raise NotImplementedError('TODO')
222 raise NotImplementedError('TODO')
220
223
221 def lookup(self, node):
224 def lookup(self, node):
222 if isinstance(node, int):
225 if isinstance(node, int):
223 return self.node(node)
226 return self.node(node)
224
227
225 if len(node) == 20:
228 if len(node) == 20:
226 self.rev(node)
229 self.rev(node)
227 return node
230 return node
228
231
229 try:
232 try:
230 rev = int(node)
233 rev = int(node)
231 if '%d' % rev != node:
234 if '%d' % rev != node:
232 raise ValueError
235 raise ValueError
233
236
234 if rev < 0:
237 if rev < 0:
235 rev = len(self) + rev
238 rev = len(self) + rev
236 if rev < 0 or rev >= len(self):
239 if rev < 0 or rev >= len(self):
237 raise ValueError
240 raise ValueError
238
241
239 return self.node(rev)
242 return self.node(rev)
240 except (ValueError, OverflowError):
243 except (ValueError, OverflowError):
241 pass
244 pass
242
245
243 if len(node) == 40:
246 if len(node) == 40:
244 try:
247 try:
245 rawnode = bin(node)
248 rawnode = bin(node)
246 self.rev(rawnode)
249 self.rev(rawnode)
247 return rawnode
250 return rawnode
248 except TypeError:
251 except TypeError:
249 pass
252 pass
250
253
251 raise error.LookupError(node, self._path, _('invalid lookup input'))
254 raise error.LookupError(node, self._path, _('invalid lookup input'))
252
255
253 def linkrev(self, rev):
256 def linkrev(self, rev):
254 validaterev(rev)
257 validaterev(rev)
255
258
256 return self._indexbyrev[rev][b'linkrev']
259 return self._indexbyrev[rev][b'linkrev']
257
260
258 def _flags(self, rev):
261 def _flags(self, rev):
259 validaterev(rev)
262 validaterev(rev)
260
263
261 return self._indexbyrev[rev][b'flags']
264 return self._indexbyrev[rev][b'flags']
262
265
263 def _candelta(self, baserev, rev):
266 def _candelta(self, baserev, rev):
264 validaterev(baserev)
267 validaterev(baserev)
265 validaterev(rev)
268 validaterev(rev)
266
269
267 if (self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS) or (
270 if (self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS) or (
268 self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS
271 self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS
269 ):
272 ):
270 return False
273 return False
271
274
272 return True
275 return True
273
276
274 def checkhash(self, text, node, p1=None, p2=None, rev=None):
277 def checkhash(self, text, node, p1=None, p2=None, rev=None):
275 if p1 is None and p2 is None:
278 if p1 is None and p2 is None:
276 p1, p2 = self.parents(node)
279 p1, p2 = self.parents(node)
277 if node != storageutil.hashrevisionsha1(text, p1, p2):
280 if node != storageutil.hashrevisionsha1(text, p1, p2):
278 raise simplestoreerror(
281 raise simplestoreerror(
279 _("integrity check failed on %s") % self._path
282 _("integrity check failed on %s") % self._path
280 )
283 )
281
284
282 def revision(self, nodeorrev, raw=False):
285 def revision(self, nodeorrev, raw=False):
283 if isinstance(nodeorrev, int):
286 if isinstance(nodeorrev, int):
284 node = self.node(nodeorrev)
287 node = self.node(nodeorrev)
285 else:
288 else:
286 node = nodeorrev
289 node = nodeorrev
287 validatenode(node)
290 validatenode(node)
288
291
289 if node == self._repo.nullid:
292 if node == self._repo.nullid:
290 return b''
293 return b''
291
294
292 rev = self.rev(node)
295 rev = self.rev(node)
293 flags = self._flags(rev)
296 flags = self._flags(rev)
294
297
295 path = b'/'.join([self._storepath, hex(node)])
298 path = b'/'.join([self._storepath, hex(node)])
296 rawtext = self._svfs.read(path)
299 rawtext = self._svfs.read(path)
297
300
298 if raw:
301 if raw:
299 validatehash = flagutil.processflagsraw(self, rawtext, flags)
302 validatehash = flagutil.processflagsraw(self, rawtext, flags)
300 text = rawtext
303 text = rawtext
301 else:
304 else:
302 r = flagutil.processflagsread(self, rawtext, flags)
305 r = flagutil.processflagsread(self, rawtext, flags)
303 text, validatehash = r
306 text, validatehash = r
304 if validatehash:
307 if validatehash:
305 self.checkhash(text, node, rev=rev)
308 self.checkhash(text, node, rev=rev)
306
309
307 return text
310 return text
308
311
309 def rawdata(self, nodeorrev):
312 def rawdata(self, nodeorrev):
310 return self.revision(raw=True)
313 return self.revision(raw=True)
311
314
312 def read(self, node):
315 def read(self, node):
313 validatenode(node)
316 validatenode(node)
314
317
315 revision = self.revision(node)
318 revision = self.revision(node)
316
319
317 if not revision.startswith(b'\1\n'):
320 if not revision.startswith(b'\1\n'):
318 return revision
321 return revision
319
322
320 start = revision.index(b'\1\n', 2)
323 start = revision.index(b'\1\n', 2)
321 return revision[start + 2 :]
324 return revision[start + 2 :]
322
325
323 def renamed(self, node):
326 def renamed(self, node):
324 validatenode(node)
327 validatenode(node)
325
328
326 if self.parents(node)[0] != self._repo.nullid:
329 if self.parents(node)[0] != self._repo.nullid:
327 return False
330 return False
328
331
329 fulltext = self.revision(node)
332 fulltext = self.revision(node)
330 m = storageutil.parsemeta(fulltext)[0]
333 m = storageutil.parsemeta(fulltext)[0]
331
334
332 if m and 'copy' in m:
335 if m and 'copy' in m:
333 return m['copy'], bin(m['copyrev'])
336 return m['copy'], bin(m['copyrev'])
334
337
335 return False
338 return False
336
339
337 def cmp(self, node, text):
340 def cmp(self, node, text):
338 validatenode(node)
341 validatenode(node)
339
342
340 t = text
343 t = text
341
344
342 if text.startswith(b'\1\n'):
345 if text.startswith(b'\1\n'):
343 t = b'\1\n\1\n' + text
346 t = b'\1\n\1\n' + text
344
347
345 p1, p2 = self.parents(node)
348 p1, p2 = self.parents(node)
346
349
347 if storageutil.hashrevisionsha1(t, p1, p2) == node:
350 if storageutil.hashrevisionsha1(t, p1, p2) == node:
348 return False
351 return False
349
352
350 if self.iscensored(self.rev(node)):
353 if self.iscensored(self.rev(node)):
351 return text != b''
354 return text != b''
352
355
353 if self.renamed(node):
356 if self.renamed(node):
354 t2 = self.read(node)
357 t2 = self.read(node)
355 return t2 != text
358 return t2 != text
356
359
357 return True
360 return True
358
361
359 def size(self, rev):
362 def size(self, rev):
360 validaterev(rev)
363 validaterev(rev)
361
364
362 node = self._indexbyrev[rev][b'node']
365 node = self._indexbyrev[rev][b'node']
363
366
364 if self.renamed(node):
367 if self.renamed(node):
365 return len(self.read(node))
368 return len(self.read(node))
366
369
367 if self.iscensored(rev):
370 if self.iscensored(rev):
368 return 0
371 return 0
369
372
370 return len(self.revision(node))
373 return len(self.revision(node))
371
374
372 def iscensored(self, rev):
375 def iscensored(self, rev):
373 validaterev(rev)
376 validaterev(rev)
374
377
375 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
378 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
376
379
377 def commonancestorsheads(self, a, b):
380 def commonancestorsheads(self, a, b):
378 validatenode(a)
381 validatenode(a)
379 validatenode(b)
382 validatenode(b)
380
383
381 a = self.rev(a)
384 a = self.rev(a)
382 b = self.rev(b)
385 b = self.rev(b)
383
386
384 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
387 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
385 return pycompat.maplist(self.node, ancestors)
388 return pycompat.maplist(self.node, ancestors)
386
389
387 def descendants(self, revs):
390 def descendants(self, revs):
388 # This is a copy of revlog.descendants()
391 # This is a copy of revlog.descendants()
389 first = min(revs)
392 first = min(revs)
390 if first == nullrev:
393 if first == nullrev:
391 for i in self:
394 for i in self:
392 yield i
395 yield i
393 return
396 return
394
397
395 seen = set(revs)
398 seen = set(revs)
396 for i in self.revs(start=first + 1):
399 for i in self.revs(start=first + 1):
397 for x in self.parentrevs(i):
400 for x in self.parentrevs(i):
398 if x != nullrev and x in seen:
401 if x != nullrev and x in seen:
399 seen.add(i)
402 seen.add(i)
400 yield i
403 yield i
401 break
404 break
402
405
403 # Required by verify.
406 # Required by verify.
404 def files(self):
407 def files(self):
405 entries = self._svfs.listdir(self._storepath)
408 entries = self._svfs.listdir(self._storepath)
406
409
407 # Strip out undo.backup.* files created as part of transaction
410 # Strip out undo.backup.* files created as part of transaction
408 # recording.
411 # recording.
409 entries = [f for f in entries if not f.startswith('undo.backup.')]
412 entries = [f for f in entries if not f.startswith('undo.backup.')]
410
413
411 return [b'/'.join((self._storepath, f)) for f in entries]
414 return [b'/'.join((self._storepath, f)) for f in entries]
412
415
413 def storageinfo(
416 def storageinfo(
414 self,
417 self,
415 exclusivefiles=False,
418 exclusivefiles=False,
416 sharedfiles=False,
419 sharedfiles=False,
417 revisionscount=False,
420 revisionscount=False,
418 trackedsize=False,
421 trackedsize=False,
419 storedsize=False,
422 storedsize=False,
420 ):
423 ):
421 # TODO do a real implementation of this
424 # TODO do a real implementation of this
422 return {
425 return {
423 'exclusivefiles': [],
426 'exclusivefiles': [],
424 'sharedfiles': [],
427 'sharedfiles': [],
425 'revisionscount': len(self),
428 'revisionscount': len(self),
426 'trackedsize': 0,
429 'trackedsize': 0,
427 'storedsize': None,
430 'storedsize': None,
428 }
431 }
429
432
430 def verifyintegrity(self, state):
433 def verifyintegrity(self, state):
431 state['skipread'] = set()
434 state['skipread'] = set()
432 for rev in self:
435 for rev in self:
433 node = self.node(rev)
436 node = self.node(rev)
434 try:
437 try:
435 self.revision(node)
438 self.revision(node)
436 except Exception as e:
439 except Exception as e:
437 yield simplefilestoreproblem(
440 yield simplefilestoreproblem(
438 error='unpacking %s: %s' % (node, e), node=node
441 error='unpacking %s: %s' % (node, e), node=node
439 )
442 )
440 state['skipread'].add(node)
443 state['skipread'].add(node)
441
444
442 def emitrevisions(
445 def emitrevisions(
443 self,
446 self,
444 nodes,
447 nodes,
445 nodesorder=None,
448 nodesorder=None,
446 revisiondata=False,
449 revisiondata=False,
447 assumehaveparentrevisions=False,
450 assumehaveparentrevisions=False,
448 deltamode=repository.CG_DELTAMODE_STD,
451 deltamode=repository.CG_DELTAMODE_STD,
449 sidedata_helpers=None,
452 sidedata_helpers=None,
450 ):
453 ):
451 # TODO this will probably break on some ordering options.
454 # TODO this will probably break on some ordering options.
452 nodes = [n for n in nodes if n != self._repo.nullid]
455 nodes = [n for n in nodes if n != self._repo.nullid]
453 if not nodes:
456 if not nodes:
454 return
457 return
455 for delta in storageutil.emitrevisions(
458 for delta in storageutil.emitrevisions(
456 self,
459 self,
457 nodes,
460 nodes,
458 nodesorder,
461 nodesorder,
459 simplestorerevisiondelta,
462 simplestorerevisiondelta,
460 revisiondata=revisiondata,
463 revisiondata=revisiondata,
461 assumehaveparentrevisions=assumehaveparentrevisions,
464 assumehaveparentrevisions=assumehaveparentrevisions,
462 deltamode=deltamode,
465 deltamode=deltamode,
463 sidedata_helpers=sidedata_helpers,
466 sidedata_helpers=sidedata_helpers,
464 ):
467 ):
465 yield delta
468 yield delta
466
469
467 def add(self, text, meta, transaction, linkrev, p1, p2):
470 def add(self, text, meta, transaction, linkrev, p1, p2):
468 if meta or text.startswith(b'\1\n'):
471 if meta or text.startswith(b'\1\n'):
469 text = storageutil.packmeta(meta, text)
472 text = storageutil.packmeta(meta, text)
470
473
471 return self.addrevision(text, transaction, linkrev, p1, p2)
474 return self.addrevision(text, transaction, linkrev, p1, p2)
472
475
473 def addrevision(
476 def addrevision(
474 self,
477 self,
475 text,
478 text,
476 transaction,
479 transaction,
477 linkrev,
480 linkrev,
478 p1,
481 p1,
479 p2,
482 p2,
480 node=None,
483 node=None,
481 flags=revlog.REVIDX_DEFAULT_FLAGS,
484 flags=revlog.REVIDX_DEFAULT_FLAGS,
482 cachedelta=None,
485 cachedelta=None,
483 ):
486 ):
484 validatenode(p1)
487 validatenode(p1)
485 validatenode(p2)
488 validatenode(p2)
486
489
487 if flags:
490 if flags:
488 node = node or storageutil.hashrevisionsha1(text, p1, p2)
491 node = node or storageutil.hashrevisionsha1(text, p1, p2)
489
492
490 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
493 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
491
494
492 node = node or storageutil.hashrevisionsha1(text, p1, p2)
495 node = node or storageutil.hashrevisionsha1(text, p1, p2)
493
496
494 if node in self._indexbynode:
497 if node in self._indexbynode:
495 return node
498 return node
496
499
497 if validatehash:
500 if validatehash:
498 self.checkhash(rawtext, node, p1=p1, p2=p2)
501 self.checkhash(rawtext, node, p1=p1, p2=p2)
499
502
500 return self._addrawrevision(
503 return self._addrawrevision(
501 node, rawtext, transaction, linkrev, p1, p2, flags
504 node, rawtext, transaction, linkrev, p1, p2, flags
502 )
505 )
503
506
504 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
507 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
505 transaction.addbackup(self._indexpath)
508 transaction.addbackup(self._indexpath)
506
509
507 path = b'/'.join([self._storepath, hex(node)])
510 path = b'/'.join([self._storepath, hex(node)])
508
511
509 self._svfs.write(path, rawtext)
512 self._svfs.write(path, rawtext)
510
513
511 self._indexdata.append(
514 self._indexdata.append(
512 {
515 {
513 b'node': node,
516 b'node': node,
514 b'p1': p1,
517 b'p1': p1,
515 b'p2': p2,
518 b'p2': p2,
516 b'linkrev': link,
519 b'linkrev': link,
517 b'flags': flags,
520 b'flags': flags,
518 }
521 }
519 )
522 )
520
523
521 self._reflectindexupdate()
524 self._reflectindexupdate()
522
525
523 return node
526 return node
524
527
525 def _reflectindexupdate(self):
528 def _reflectindexupdate(self):
526 self._refreshindex()
529 self._refreshindex()
527 self._svfs.write(
530 self._svfs.write(
528 self._indexpath, ''.join(cborutil.streamencode(self._indexdata))
531 self._indexpath, ''.join(cborutil.streamencode(self._indexdata))
529 )
532 )
530
533
531 def addgroup(
534 def addgroup(
532 self,
535 self,
533 deltas,
536 deltas,
534 linkmapper,
537 linkmapper,
535 transaction,
538 transaction,
536 addrevisioncb=None,
539 addrevisioncb=None,
537 duplicaterevisioncb=None,
540 duplicaterevisioncb=None,
538 maybemissingparents=False,
541 maybemissingparents=False,
539 ):
542 ):
540 if maybemissingparents:
543 if maybemissingparents:
541 raise error.Abort(
544 raise error.Abort(
542 _('simple store does not support missing parents ' 'write mode')
545 _('simple store does not support missing parents ' 'write mode')
543 )
546 )
544
547
545 empty = True
548 empty = True
546
549
547 transaction.addbackup(self._indexpath)
550 transaction.addbackup(self._indexpath)
548
551
549 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
552 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
550 linkrev = linkmapper(linknode)
553 linkrev = linkmapper(linknode)
551 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
554 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
552
555
553 if node in self._indexbynode:
556 if node in self._indexbynode:
554 if duplicaterevisioncb:
557 if duplicaterevisioncb:
555 duplicaterevisioncb(self, self.rev(node))
558 duplicaterevisioncb(self, self.rev(node))
556 empty = False
559 empty = False
557 continue
560 continue
558
561
559 # Need to resolve the fulltext from the delta base.
562 # Need to resolve the fulltext from the delta base.
560 if deltabase == self._repo.nullid:
563 if deltabase == self._repo.nullid:
561 text = mdiff.patch(b'', delta)
564 text = mdiff.patch(b'', delta)
562 else:
565 else:
563 text = mdiff.patch(self.revision(deltabase), delta)
566 text = mdiff.patch(self.revision(deltabase), delta)
564
567
565 rev = self._addrawrevision(
568 rev = self._addrawrevision(
566 node, text, transaction, linkrev, p1, p2, flags
569 node, text, transaction, linkrev, p1, p2, flags
567 )
570 )
568
571
569 if addrevisioncb:
572 if addrevisioncb:
570 addrevisioncb(self, rev)
573 addrevisioncb(self, rev)
571 empty = False
574 empty = False
572 return not empty
575 return not empty
573
576
574 def _headrevs(self):
577 def _headrevs(self):
575 # Assume all revisions are heads by default.
578 # Assume all revisions are heads by default.
576 revishead = {rev: True for rev in self._indexbyrev}
579 revishead = {rev: True for rev in self._indexbyrev}
577
580
578 for rev, entry in self._indexbyrev.items():
581 for rev, entry in self._indexbyrev.items():
579 # Unset head flag for all seen parents.
582 # Unset head flag for all seen parents.
580 revishead[self.rev(entry[b'p1'])] = False
583 revishead[self.rev(entry[b'p1'])] = False
581 revishead[self.rev(entry[b'p2'])] = False
584 revishead[self.rev(entry[b'p2'])] = False
582
585
583 return [rev for rev, ishead in sorted(revishead.items()) if ishead]
586 return [rev for rev, ishead in sorted(revishead.items()) if ishead]
584
587
585 def heads(self, start=None, stop=None):
588 def heads(self, start=None, stop=None):
586 # This is copied from revlog.py.
589 # This is copied from revlog.py.
587 if start is None and stop is None:
590 if start is None and stop is None:
588 if not len(self):
591 if not len(self):
589 return [self._repo.nullid]
592 return [self._repo.nullid]
590 return [self.node(r) for r in self._headrevs()]
593 return [self.node(r) for r in self._headrevs()]
591
594
592 if start is None:
595 if start is None:
593 start = self._repo.nullid
596 start = self._repo.nullid
594 if stop is None:
597 if stop is None:
595 stop = []
598 stop = []
596 stoprevs = {self.rev(n) for n in stop}
599 stoprevs = {self.rev(n) for n in stop}
597 startrev = self.rev(start)
600 startrev = self.rev(start)
598 reachable = {startrev}
601 reachable = {startrev}
599 heads = {startrev}
602 heads = {startrev}
600
603
601 parentrevs = self.parentrevs
604 parentrevs = self.parentrevs
602 for r in self.revs(start=startrev + 1):
605 for r in self.revs(start=startrev + 1):
603 for p in parentrevs(r):
606 for p in parentrevs(r):
604 if p in reachable:
607 if p in reachable:
605 if r not in stoprevs:
608 if r not in stoprevs:
606 reachable.add(r)
609 reachable.add(r)
607 heads.add(r)
610 heads.add(r)
608 if p in heads and p not in stoprevs:
611 if p in heads and p not in stoprevs:
609 heads.remove(p)
612 heads.remove(p)
610
613
611 return [self.node(r) for r in heads]
614 return [self.node(r) for r in heads]
612
615
613 def children(self, node):
616 def children(self, node):
614 validatenode(node)
617 validatenode(node)
615
618
616 # This is a copy of revlog.children().
619 # This is a copy of revlog.children().
617 c = []
620 c = []
618 p = self.rev(node)
621 p = self.rev(node)
619 for r in self.revs(start=p + 1):
622 for r in self.revs(start=p + 1):
620 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
623 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
621 if prevs:
624 if prevs:
622 for pr in prevs:
625 for pr in prevs:
623 if pr == p:
626 if pr == p:
624 c.append(self.node(r))
627 c.append(self.node(r))
625 elif p == nullrev:
628 elif p == nullrev:
626 c.append(self.node(r))
629 c.append(self.node(r))
627 return c
630 return c
628
631
629 def getstrippoint(self, minlink):
632 def getstrippoint(self, minlink):
630 return storageutil.resolvestripinfo(
633 return storageutil.resolvestripinfo(
631 minlink,
634 minlink,
632 len(self) - 1,
635 len(self) - 1,
633 self._headrevs(),
636 self._headrevs(),
634 self.linkrev,
637 self.linkrev,
635 self.parentrevs,
638 self.parentrevs,
636 )
639 )
637
640
638 def strip(self, minlink, transaction):
641 def strip(self, minlink, transaction):
639 if not len(self):
642 if not len(self):
640 return
643 return
641
644
642 rev, _ignored = self.getstrippoint(minlink)
645 rev, _ignored = self.getstrippoint(minlink)
643 if rev == len(self):
646 if rev == len(self):
644 return
647 return
645
648
646 # Purge index data starting at the requested revision.
649 # Purge index data starting at the requested revision.
647 self._indexdata[rev:] = []
650 self._indexdata[rev:] = []
648 self._reflectindexupdate()
651 self._reflectindexupdate()
649
652
650
653
651 def issimplestorefile(f, kind, st):
654 def issimplestorefile(f, kind, st):
652 if kind != stat.S_IFREG:
655 if kind != stat.S_IFREG:
653 return False
656 return False
654
657
655 if store.isrevlog(f, kind, st):
658 if store.isrevlog(f, kind, st):
656 return False
659 return False
657
660
658 # Ignore transaction undo files.
661 # Ignore transaction undo files.
659 if f.startswith('undo.'):
662 if f.startswith('undo.'):
660 return False
663 return False
661
664
662 # Otherwise assume it belongs to the simple store.
665 # Otherwise assume it belongs to the simple store.
663 return True
666 return True
664
667
665
668
666 class simplestore(store.encodedstore):
669 class simplestore(store.encodedstore):
667 def data_entries(self, undecodable=None):
670 def data_entries(self, undecodable=None):
668 for x in super(simplestore, self).data_entries():
671 for x in super(simplestore, self).data_entries():
669 yield x
672 yield x
670
673
671 # Supplement with non-revlog files.
674 # Supplement with non-revlog files.
672 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
675 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
673
676
674 for f1, size in extrafiles:
677 for f1, size in extrafiles:
675 try:
678 try:
676 f2 = store.decodefilename(f1)
679 f2 = store.decodefilename(f1)
677 except KeyError:
680 except KeyError:
678 if undecodable is None:
681 if undecodable is None:
679 raise error.StorageError(b'undecodable revlog name %s' % f1)
682 raise error.StorageError(b'undecodable revlog name %s' % f1)
680 else:
683 else:
681 undecodable.append(f1)
684 undecodable.append(f1)
682 continue
685 continue
683
686
684 yield f2, size
687 yield f2, size
685
688
686
689
687 def reposetup(ui, repo):
690 def reposetup(ui, repo):
688 if not repo.local():
691 if not repo.local():
689 return
692 return
690
693
691 if isinstance(repo, bundlerepo.bundlerepository):
694 if isinstance(repo, bundlerepo.bundlerepository):
692 raise error.Abort(_('cannot use simple store with bundlerepo'))
695 raise error.Abort(_('cannot use simple store with bundlerepo'))
693
696
694 class simplestorerepo(repo.__class__):
697 class simplestorerepo(repo.__class__):
695 def file(self, f):
698 def file(self, f):
696 return filestorage(repo, self.svfs, f)
699 return filestorage(repo, self.svfs, f)
697
700
698 repo.__class__ = simplestorerepo
701 repo.__class__ = simplestorerepo
699
702
700
703
701 def featuresetup(ui, supported):
704 def featuresetup(ui, supported):
702 supported.add(REQUIREMENT)
705 supported.add(REQUIREMENT)
703
706
704
707
705 def newreporequirements(orig, ui, createopts):
708 def newreporequirements(orig, ui, createopts):
706 """Modifies default requirements for new repos to use the simple store."""
709 """Modifies default requirements for new repos to use the simple store."""
707 requirements = orig(ui, createopts)
710 requirements = orig(ui, createopts)
708
711
709 # These requirements are only used to affect creation of the store
712 # These requirements are only used to affect creation of the store
710 # object. We have our own store. So we can remove them.
713 # object. We have our own store. So we can remove them.
711 # TODO do this once we feel like taking the test hit.
714 # TODO do this once we feel like taking the test hit.
712 # if 'fncache' in requirements:
715 # if 'fncache' in requirements:
713 # requirements.remove('fncache')
716 # requirements.remove('fncache')
714 # if 'dotencode' in requirements:
717 # if 'dotencode' in requirements:
715 # requirements.remove('dotencode')
718 # requirements.remove('dotencode')
716
719
717 requirements.add(REQUIREMENT)
720 requirements.add(REQUIREMENT)
718
721
719 return requirements
722 return requirements
720
723
721
724
722 def makestore(orig, requirements, path, vfstype):
725 def makestore(orig, requirements, path, vfstype):
723 if REQUIREMENT not in requirements:
726 if REQUIREMENT not in requirements:
724 return orig(requirements, path, vfstype)
727 return orig(requirements, path, vfstype)
725
728
726 return simplestore(path, vfstype)
729 return simplestore(path, vfstype)
727
730
728
731
729 def verifierinit(orig, self, *args, **kwargs):
732 def verifierinit(orig, self, *args, **kwargs):
730 orig(self, *args, **kwargs)
733 orig(self, *args, **kwargs)
731
734
732 # We don't care that files in the store don't align with what is
735 # We don't care that files in the store don't align with what is
733 # advertised. So suppress these warnings.
736 # advertised. So suppress these warnings.
734 self.warnorphanstorefiles = False
737 self.warnorphanstorefiles = False
735
738
736
739
737 def extsetup(ui):
740 def extsetup(ui):
738 localrepo.featuresetupfuncs.add(featuresetup)
741 localrepo.featuresetupfuncs.add(featuresetup)
739
742
740 extensions.wrapfunction(
743 extensions.wrapfunction(
741 localrepo, 'newreporequirements', newreporequirements
744 localrepo, 'newreporequirements', newreporequirements
742 )
745 )
743 extensions.wrapfunction(localrepo, 'makestore', makestore)
746 extensions.wrapfunction(localrepo, 'makestore', makestore)
744 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
747 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
General Comments 0
You need to be logged in to leave comments. Login now