##// END OF EJS Templates
changegroupv4: add sidedata helpers...
Raphaël Gomès -
r47449:45f0d529 default
parent child Browse files
Show More
@@ -1,1315 +1,1317 b''
1 # sqlitestore.py - Storage backend that uses SQLite
1 # sqlitestore.py - Storage backend that uses SQLite
2 #
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """store repository data in SQLite (EXPERIMENTAL)
8 """store repository data in SQLite (EXPERIMENTAL)
9
9
10 The sqlitestore extension enables the storage of repository data in SQLite.
10 The sqlitestore extension enables the storage of repository data in SQLite.
11
11
12 This extension is HIGHLY EXPERIMENTAL. There are NO BACKWARDS COMPATIBILITY
12 This extension is HIGHLY EXPERIMENTAL. There are NO BACKWARDS COMPATIBILITY
13 GUARANTEES. This means that repositories created with this extension may
13 GUARANTEES. This means that repositories created with this extension may
14 only be usable with the exact version of this extension/Mercurial that was
14 only be usable with the exact version of this extension/Mercurial that was
15 used. The extension attempts to enforce this in order to prevent repository
15 used. The extension attempts to enforce this in order to prevent repository
16 corruption.
16 corruption.
17
17
18 In addition, several features are not yet supported or have known bugs:
18 In addition, several features are not yet supported or have known bugs:
19
19
20 * Only some data is stored in SQLite. Changeset, manifest, and other repository
20 * Only some data is stored in SQLite. Changeset, manifest, and other repository
21 data is not yet stored in SQLite.
21 data is not yet stored in SQLite.
22 * Transactions are not robust. If the process is aborted at the right time
22 * Transactions are not robust. If the process is aborted at the right time
23 during transaction close/rollback, the repository could be in an inconsistent
23 during transaction close/rollback, the repository could be in an inconsistent
24 state. This problem will diminish once all repository data is tracked by
24 state. This problem will diminish once all repository data is tracked by
25 SQLite.
25 SQLite.
26 * Bundle repositories do not work (the ability to use e.g.
26 * Bundle repositories do not work (the ability to use e.g.
27 `hg -R <bundle-file> log` to automatically overlay a bundle on top of the
27 `hg -R <bundle-file> log` to automatically overlay a bundle on top of the
28 existing repository).
28 existing repository).
29 * Various other features don't work.
29 * Various other features don't work.
30
30
31 This extension should work for basic clone/pull, update, and commit workflows.
31 This extension should work for basic clone/pull, update, and commit workflows.
32 Some history rewriting operations may fail due to lack of support for bundle
32 Some history rewriting operations may fail due to lack of support for bundle
33 repositories.
33 repositories.
34
34
35 To use, activate the extension and set the ``storage.new-repo-backend`` config
35 To use, activate the extension and set the ``storage.new-repo-backend`` config
36 option to ``sqlite`` to enable new repositories to use SQLite for storage.
36 option to ``sqlite`` to enable new repositories to use SQLite for storage.
37 """
37 """
38
38
39 # To run the test suite with repos using SQLite by default, execute the
39 # To run the test suite with repos using SQLite by default, execute the
40 # following:
40 # following:
41 #
41 #
42 # HGREPOFEATURES="sqlitestore" run-tests.py \
42 # HGREPOFEATURES="sqlitestore" run-tests.py \
43 # --extra-config-opt extensions.sqlitestore= \
43 # --extra-config-opt extensions.sqlitestore= \
44 # --extra-config-opt storage.new-repo-backend=sqlite
44 # --extra-config-opt storage.new-repo-backend=sqlite
45
45
46 from __future__ import absolute_import
46 from __future__ import absolute_import
47
47
48 import sqlite3
48 import sqlite3
49 import struct
49 import struct
50 import threading
50 import threading
51 import zlib
51 import zlib
52
52
53 from mercurial.i18n import _
53 from mercurial.i18n import _
54 from mercurial.node import (
54 from mercurial.node import (
55 nullid,
55 nullid,
56 nullrev,
56 nullrev,
57 short,
57 short,
58 )
58 )
59 from mercurial.thirdparty import attr
59 from mercurial.thirdparty import attr
60 from mercurial import (
60 from mercurial import (
61 ancestor,
61 ancestor,
62 dagop,
62 dagop,
63 encoding,
63 encoding,
64 error,
64 error,
65 extensions,
65 extensions,
66 localrepo,
66 localrepo,
67 mdiff,
67 mdiff,
68 pycompat,
68 pycompat,
69 registrar,
69 registrar,
70 requirements,
70 requirements,
71 util,
71 util,
72 verify,
72 verify,
73 )
73 )
74 from mercurial.interfaces import (
74 from mercurial.interfaces import (
75 repository,
75 repository,
76 util as interfaceutil,
76 util as interfaceutil,
77 )
77 )
78 from mercurial.utils import (
78 from mercurial.utils import (
79 hashutil,
79 hashutil,
80 storageutil,
80 storageutil,
81 )
81 )
82
82
83 try:
83 try:
84 from mercurial import zstd
84 from mercurial import zstd
85
85
86 zstd.__version__
86 zstd.__version__
87 except ImportError:
87 except ImportError:
88 zstd = None
88 zstd = None
89
89
90 configtable = {}
90 configtable = {}
91 configitem = registrar.configitem(configtable)
91 configitem = registrar.configitem(configtable)
92
92
93 # experimental config: storage.sqlite.compression
93 # experimental config: storage.sqlite.compression
94 configitem(
94 configitem(
95 b'storage',
95 b'storage',
96 b'sqlite.compression',
96 b'sqlite.compression',
97 default=b'zstd' if zstd else b'zlib',
97 default=b'zstd' if zstd else b'zlib',
98 experimental=True,
98 experimental=True,
99 )
99 )
100
100
101 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
101 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
102 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
102 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
103 # be specifying the version(s) of Mercurial they are tested with, or
103 # be specifying the version(s) of Mercurial they are tested with, or
104 # leave the attribute unspecified.
104 # leave the attribute unspecified.
105 testedwith = b'ships-with-hg-core'
105 testedwith = b'ships-with-hg-core'
106
106
107 REQUIREMENT = b'exp-sqlite-001'
107 REQUIREMENT = b'exp-sqlite-001'
108 REQUIREMENT_ZSTD = b'exp-sqlite-comp-001=zstd'
108 REQUIREMENT_ZSTD = b'exp-sqlite-comp-001=zstd'
109 REQUIREMENT_ZLIB = b'exp-sqlite-comp-001=zlib'
109 REQUIREMENT_ZLIB = b'exp-sqlite-comp-001=zlib'
110 REQUIREMENT_NONE = b'exp-sqlite-comp-001=none'
110 REQUIREMENT_NONE = b'exp-sqlite-comp-001=none'
111 REQUIREMENT_SHALLOW_FILES = b'exp-sqlite-shallow-files'
111 REQUIREMENT_SHALLOW_FILES = b'exp-sqlite-shallow-files'
112
112
113 CURRENT_SCHEMA_VERSION = 1
113 CURRENT_SCHEMA_VERSION = 1
114
114
115 COMPRESSION_NONE = 1
115 COMPRESSION_NONE = 1
116 COMPRESSION_ZSTD = 2
116 COMPRESSION_ZSTD = 2
117 COMPRESSION_ZLIB = 3
117 COMPRESSION_ZLIB = 3
118
118
119 FLAG_CENSORED = 1
119 FLAG_CENSORED = 1
120 FLAG_MISSING_P1 = 2
120 FLAG_MISSING_P1 = 2
121 FLAG_MISSING_P2 = 4
121 FLAG_MISSING_P2 = 4
122
122
123 CREATE_SCHEMA = [
123 CREATE_SCHEMA = [
124 # Deltas are stored as content-indexed blobs.
124 # Deltas are stored as content-indexed blobs.
125 # compression column holds COMPRESSION_* constant for how the
125 # compression column holds COMPRESSION_* constant for how the
126 # delta is encoded.
126 # delta is encoded.
127 'CREATE TABLE delta ('
127 'CREATE TABLE delta ('
128 ' id INTEGER PRIMARY KEY, '
128 ' id INTEGER PRIMARY KEY, '
129 ' compression INTEGER NOT NULL, '
129 ' compression INTEGER NOT NULL, '
130 ' hash BLOB UNIQUE ON CONFLICT ABORT, '
130 ' hash BLOB UNIQUE ON CONFLICT ABORT, '
131 ' delta BLOB NOT NULL '
131 ' delta BLOB NOT NULL '
132 ')',
132 ')',
133 # Tracked paths are denormalized to integers to avoid redundant
133 # Tracked paths are denormalized to integers to avoid redundant
134 # storage of the path name.
134 # storage of the path name.
135 'CREATE TABLE filepath ('
135 'CREATE TABLE filepath ('
136 ' id INTEGER PRIMARY KEY, '
136 ' id INTEGER PRIMARY KEY, '
137 ' path BLOB NOT NULL '
137 ' path BLOB NOT NULL '
138 ')',
138 ')',
139 'CREATE UNIQUE INDEX filepath_path ON filepath (path)',
139 'CREATE UNIQUE INDEX filepath_path ON filepath (path)',
140 # We have a single table for all file revision data.
140 # We have a single table for all file revision data.
141 # Each file revision is uniquely described by a (path, rev) and
141 # Each file revision is uniquely described by a (path, rev) and
142 # (path, node).
142 # (path, node).
143 #
143 #
144 # Revision data is stored as a pointer to the delta producing this
144 # Revision data is stored as a pointer to the delta producing this
145 # revision and the file revision whose delta should be applied before
145 # revision and the file revision whose delta should be applied before
146 # that one. One can reconstruct the delta chain by recursively following
146 # that one. One can reconstruct the delta chain by recursively following
147 # the delta base revision pointers until one encounters NULL.
147 # the delta base revision pointers until one encounters NULL.
148 #
148 #
149 # flags column holds bitwise integer flags controlling storage options.
149 # flags column holds bitwise integer flags controlling storage options.
150 # These flags are defined by the FLAG_* constants.
150 # These flags are defined by the FLAG_* constants.
151 'CREATE TABLE fileindex ('
151 'CREATE TABLE fileindex ('
152 ' id INTEGER PRIMARY KEY, '
152 ' id INTEGER PRIMARY KEY, '
153 ' pathid INTEGER REFERENCES filepath(id), '
153 ' pathid INTEGER REFERENCES filepath(id), '
154 ' revnum INTEGER NOT NULL, '
154 ' revnum INTEGER NOT NULL, '
155 ' p1rev INTEGER NOT NULL, '
155 ' p1rev INTEGER NOT NULL, '
156 ' p2rev INTEGER NOT NULL, '
156 ' p2rev INTEGER NOT NULL, '
157 ' linkrev INTEGER NOT NULL, '
157 ' linkrev INTEGER NOT NULL, '
158 ' flags INTEGER NOT NULL, '
158 ' flags INTEGER NOT NULL, '
159 ' deltaid INTEGER REFERENCES delta(id), '
159 ' deltaid INTEGER REFERENCES delta(id), '
160 ' deltabaseid INTEGER REFERENCES fileindex(id), '
160 ' deltabaseid INTEGER REFERENCES fileindex(id), '
161 ' node BLOB NOT NULL '
161 ' node BLOB NOT NULL '
162 ')',
162 ')',
163 'CREATE UNIQUE INDEX fileindex_pathrevnum '
163 'CREATE UNIQUE INDEX fileindex_pathrevnum '
164 ' ON fileindex (pathid, revnum)',
164 ' ON fileindex (pathid, revnum)',
165 'CREATE UNIQUE INDEX fileindex_pathnode ON fileindex (pathid, node)',
165 'CREATE UNIQUE INDEX fileindex_pathnode ON fileindex (pathid, node)',
166 # Provide a view over all file data for convenience.
166 # Provide a view over all file data for convenience.
167 'CREATE VIEW filedata AS '
167 'CREATE VIEW filedata AS '
168 'SELECT '
168 'SELECT '
169 ' fileindex.id AS id, '
169 ' fileindex.id AS id, '
170 ' filepath.id AS pathid, '
170 ' filepath.id AS pathid, '
171 ' filepath.path AS path, '
171 ' filepath.path AS path, '
172 ' fileindex.revnum AS revnum, '
172 ' fileindex.revnum AS revnum, '
173 ' fileindex.node AS node, '
173 ' fileindex.node AS node, '
174 ' fileindex.p1rev AS p1rev, '
174 ' fileindex.p1rev AS p1rev, '
175 ' fileindex.p2rev AS p2rev, '
175 ' fileindex.p2rev AS p2rev, '
176 ' fileindex.linkrev AS linkrev, '
176 ' fileindex.linkrev AS linkrev, '
177 ' fileindex.flags AS flags, '
177 ' fileindex.flags AS flags, '
178 ' fileindex.deltaid AS deltaid, '
178 ' fileindex.deltaid AS deltaid, '
179 ' fileindex.deltabaseid AS deltabaseid '
179 ' fileindex.deltabaseid AS deltabaseid '
180 'FROM filepath, fileindex '
180 'FROM filepath, fileindex '
181 'WHERE fileindex.pathid=filepath.id',
181 'WHERE fileindex.pathid=filepath.id',
182 'PRAGMA user_version=%d' % CURRENT_SCHEMA_VERSION,
182 'PRAGMA user_version=%d' % CURRENT_SCHEMA_VERSION,
183 ]
183 ]
184
184
185
185
186 def resolvedeltachain(db, pathid, node, revisioncache, stoprids, zstddctx=None):
186 def resolvedeltachain(db, pathid, node, revisioncache, stoprids, zstddctx=None):
187 """Resolve a delta chain for a file node."""
187 """Resolve a delta chain for a file node."""
188
188
189 # TODO the "not in ({stops})" here is possibly slowing down the query
189 # TODO the "not in ({stops})" here is possibly slowing down the query
190 # because it needs to perform the lookup on every recursive invocation.
190 # because it needs to perform the lookup on every recursive invocation.
191 # This could possibly be faster if we created a temporary query with
191 # This could possibly be faster if we created a temporary query with
192 # baseid "poisoned" to null and limited the recursive filter to
192 # baseid "poisoned" to null and limited the recursive filter to
193 # "is not null".
193 # "is not null".
194 res = db.execute(
194 res = db.execute(
195 'WITH RECURSIVE '
195 'WITH RECURSIVE '
196 ' deltachain(deltaid, baseid) AS ('
196 ' deltachain(deltaid, baseid) AS ('
197 ' SELECT deltaid, deltabaseid FROM fileindex '
197 ' SELECT deltaid, deltabaseid FROM fileindex '
198 ' WHERE pathid=? AND node=? '
198 ' WHERE pathid=? AND node=? '
199 ' UNION ALL '
199 ' UNION ALL '
200 ' SELECT fileindex.deltaid, deltabaseid '
200 ' SELECT fileindex.deltaid, deltabaseid '
201 ' FROM fileindex, deltachain '
201 ' FROM fileindex, deltachain '
202 ' WHERE '
202 ' WHERE '
203 ' fileindex.id=deltachain.baseid '
203 ' fileindex.id=deltachain.baseid '
204 ' AND deltachain.baseid IS NOT NULL '
204 ' AND deltachain.baseid IS NOT NULL '
205 ' AND fileindex.id NOT IN ({stops}) '
205 ' AND fileindex.id NOT IN ({stops}) '
206 ' ) '
206 ' ) '
207 'SELECT deltachain.baseid, compression, delta '
207 'SELECT deltachain.baseid, compression, delta '
208 'FROM deltachain, delta '
208 'FROM deltachain, delta '
209 'WHERE delta.id=deltachain.deltaid'.format(
209 'WHERE delta.id=deltachain.deltaid'.format(
210 stops=','.join(['?'] * len(stoprids))
210 stops=','.join(['?'] * len(stoprids))
211 ),
211 ),
212 tuple([pathid, node] + list(stoprids.keys())),
212 tuple([pathid, node] + list(stoprids.keys())),
213 )
213 )
214
214
215 deltas = []
215 deltas = []
216 lastdeltabaseid = None
216 lastdeltabaseid = None
217
217
218 for deltabaseid, compression, delta in res:
218 for deltabaseid, compression, delta in res:
219 lastdeltabaseid = deltabaseid
219 lastdeltabaseid = deltabaseid
220
220
221 if compression == COMPRESSION_ZSTD:
221 if compression == COMPRESSION_ZSTD:
222 delta = zstddctx.decompress(delta)
222 delta = zstddctx.decompress(delta)
223 elif compression == COMPRESSION_NONE:
223 elif compression == COMPRESSION_NONE:
224 delta = delta
224 delta = delta
225 elif compression == COMPRESSION_ZLIB:
225 elif compression == COMPRESSION_ZLIB:
226 delta = zlib.decompress(delta)
226 delta = zlib.decompress(delta)
227 else:
227 else:
228 raise SQLiteStoreError(
228 raise SQLiteStoreError(
229 b'unhandled compression type: %d' % compression
229 b'unhandled compression type: %d' % compression
230 )
230 )
231
231
232 deltas.append(delta)
232 deltas.append(delta)
233
233
234 if lastdeltabaseid in stoprids:
234 if lastdeltabaseid in stoprids:
235 basetext = revisioncache[stoprids[lastdeltabaseid]]
235 basetext = revisioncache[stoprids[lastdeltabaseid]]
236 else:
236 else:
237 basetext = deltas.pop()
237 basetext = deltas.pop()
238
238
239 deltas.reverse()
239 deltas.reverse()
240 fulltext = mdiff.patches(basetext, deltas)
240 fulltext = mdiff.patches(basetext, deltas)
241
241
242 # SQLite returns buffer instances for blob columns on Python 2. This
242 # SQLite returns buffer instances for blob columns on Python 2. This
243 # type can propagate through the delta application layer. Because
243 # type can propagate through the delta application layer. Because
244 # downstream callers assume revisions are bytes, cast as needed.
244 # downstream callers assume revisions are bytes, cast as needed.
245 if not isinstance(fulltext, bytes):
245 if not isinstance(fulltext, bytes):
246 fulltext = bytes(delta)
246 fulltext = bytes(delta)
247
247
248 return fulltext
248 return fulltext
249
249
250
250
251 def insertdelta(db, compression, hash, delta):
251 def insertdelta(db, compression, hash, delta):
252 try:
252 try:
253 return db.execute(
253 return db.execute(
254 'INSERT INTO delta (compression, hash, delta) VALUES (?, ?, ?)',
254 'INSERT INTO delta (compression, hash, delta) VALUES (?, ?, ?)',
255 (compression, hash, delta),
255 (compression, hash, delta),
256 ).lastrowid
256 ).lastrowid
257 except sqlite3.IntegrityError:
257 except sqlite3.IntegrityError:
258 return db.execute(
258 return db.execute(
259 'SELECT id FROM delta WHERE hash=?', (hash,)
259 'SELECT id FROM delta WHERE hash=?', (hash,)
260 ).fetchone()[0]
260 ).fetchone()[0]
261
261
262
262
263 class SQLiteStoreError(error.StorageError):
263 class SQLiteStoreError(error.StorageError):
264 pass
264 pass
265
265
266
266
267 @attr.s
267 @attr.s
268 class revisionentry(object):
268 class revisionentry(object):
269 rid = attr.ib()
269 rid = attr.ib()
270 rev = attr.ib()
270 rev = attr.ib()
271 node = attr.ib()
271 node = attr.ib()
272 p1rev = attr.ib()
272 p1rev = attr.ib()
273 p2rev = attr.ib()
273 p2rev = attr.ib()
274 p1node = attr.ib()
274 p1node = attr.ib()
275 p2node = attr.ib()
275 p2node = attr.ib()
276 linkrev = attr.ib()
276 linkrev = attr.ib()
277 flags = attr.ib()
277 flags = attr.ib()
278
278
279
279
280 @interfaceutil.implementer(repository.irevisiondelta)
280 @interfaceutil.implementer(repository.irevisiondelta)
281 @attr.s(slots=True)
281 @attr.s(slots=True)
282 class sqliterevisiondelta(object):
282 class sqliterevisiondelta(object):
283 node = attr.ib()
283 node = attr.ib()
284 p1node = attr.ib()
284 p1node = attr.ib()
285 p2node = attr.ib()
285 p2node = attr.ib()
286 basenode = attr.ib()
286 basenode = attr.ib()
287 flags = attr.ib()
287 flags = attr.ib()
288 baserevisionsize = attr.ib()
288 baserevisionsize = attr.ib()
289 revision = attr.ib()
289 revision = attr.ib()
290 delta = attr.ib()
290 delta = attr.ib()
291 sidedata = attr.ib()
291 sidedata = attr.ib()
292 linknode = attr.ib(default=None)
292 linknode = attr.ib(default=None)
293
293
294
294
295 @interfaceutil.implementer(repository.iverifyproblem)
295 @interfaceutil.implementer(repository.iverifyproblem)
296 @attr.s(frozen=True)
296 @attr.s(frozen=True)
297 class sqliteproblem(object):
297 class sqliteproblem(object):
298 warning = attr.ib(default=None)
298 warning = attr.ib(default=None)
299 error = attr.ib(default=None)
299 error = attr.ib(default=None)
300 node = attr.ib(default=None)
300 node = attr.ib(default=None)
301
301
302
302
303 @interfaceutil.implementer(repository.ifilestorage)
303 @interfaceutil.implementer(repository.ifilestorage)
304 class sqlitefilestore(object):
304 class sqlitefilestore(object):
305 """Implements storage for an individual tracked path."""
305 """Implements storage for an individual tracked path."""
306
306
307 def __init__(self, db, path, compression):
307 def __init__(self, db, path, compression):
308 self._db = db
308 self._db = db
309 self._path = path
309 self._path = path
310
310
311 self._pathid = None
311 self._pathid = None
312
312
313 # revnum -> node
313 # revnum -> node
314 self._revtonode = {}
314 self._revtonode = {}
315 # node -> revnum
315 # node -> revnum
316 self._nodetorev = {}
316 self._nodetorev = {}
317 # node -> data structure
317 # node -> data structure
318 self._revisions = {}
318 self._revisions = {}
319
319
320 self._revisioncache = util.lrucachedict(10)
320 self._revisioncache = util.lrucachedict(10)
321
321
322 self._compengine = compression
322 self._compengine = compression
323
323
324 if compression == b'zstd':
324 if compression == b'zstd':
325 self._cctx = zstd.ZstdCompressor(level=3)
325 self._cctx = zstd.ZstdCompressor(level=3)
326 self._dctx = zstd.ZstdDecompressor()
326 self._dctx = zstd.ZstdDecompressor()
327 else:
327 else:
328 self._cctx = None
328 self._cctx = None
329 self._dctx = None
329 self._dctx = None
330
330
331 self._refreshindex()
331 self._refreshindex()
332
332
333 def _refreshindex(self):
333 def _refreshindex(self):
334 self._revtonode = {}
334 self._revtonode = {}
335 self._nodetorev = {}
335 self._nodetorev = {}
336 self._revisions = {}
336 self._revisions = {}
337
337
338 res = list(
338 res = list(
339 self._db.execute(
339 self._db.execute(
340 'SELECT id FROM filepath WHERE path=?', (self._path,)
340 'SELECT id FROM filepath WHERE path=?', (self._path,)
341 )
341 )
342 )
342 )
343
343
344 if not res:
344 if not res:
345 self._pathid = None
345 self._pathid = None
346 return
346 return
347
347
348 self._pathid = res[0][0]
348 self._pathid = res[0][0]
349
349
350 res = self._db.execute(
350 res = self._db.execute(
351 'SELECT id, revnum, node, p1rev, p2rev, linkrev, flags '
351 'SELECT id, revnum, node, p1rev, p2rev, linkrev, flags '
352 'FROM fileindex '
352 'FROM fileindex '
353 'WHERE pathid=? '
353 'WHERE pathid=? '
354 'ORDER BY revnum ASC',
354 'ORDER BY revnum ASC',
355 (self._pathid,),
355 (self._pathid,),
356 )
356 )
357
357
358 for i, row in enumerate(res):
358 for i, row in enumerate(res):
359 rid, rev, node, p1rev, p2rev, linkrev, flags = row
359 rid, rev, node, p1rev, p2rev, linkrev, flags = row
360
360
361 if i != rev:
361 if i != rev:
362 raise SQLiteStoreError(
362 raise SQLiteStoreError(
363 _(b'sqlite database has inconsistent revision numbers')
363 _(b'sqlite database has inconsistent revision numbers')
364 )
364 )
365
365
366 if p1rev == nullrev:
366 if p1rev == nullrev:
367 p1node = nullid
367 p1node = nullid
368 else:
368 else:
369 p1node = self._revtonode[p1rev]
369 p1node = self._revtonode[p1rev]
370
370
371 if p2rev == nullrev:
371 if p2rev == nullrev:
372 p2node = nullid
372 p2node = nullid
373 else:
373 else:
374 p2node = self._revtonode[p2rev]
374 p2node = self._revtonode[p2rev]
375
375
376 entry = revisionentry(
376 entry = revisionentry(
377 rid=rid,
377 rid=rid,
378 rev=rev,
378 rev=rev,
379 node=node,
379 node=node,
380 p1rev=p1rev,
380 p1rev=p1rev,
381 p2rev=p2rev,
381 p2rev=p2rev,
382 p1node=p1node,
382 p1node=p1node,
383 p2node=p2node,
383 p2node=p2node,
384 linkrev=linkrev,
384 linkrev=linkrev,
385 flags=flags,
385 flags=flags,
386 )
386 )
387
387
388 self._revtonode[rev] = node
388 self._revtonode[rev] = node
389 self._nodetorev[node] = rev
389 self._nodetorev[node] = rev
390 self._revisions[node] = entry
390 self._revisions[node] = entry
391
391
392 # Start of ifileindex interface.
392 # Start of ifileindex interface.
393
393
394 def __len__(self):
394 def __len__(self):
395 return len(self._revisions)
395 return len(self._revisions)
396
396
397 def __iter__(self):
397 def __iter__(self):
398 return iter(pycompat.xrange(len(self._revisions)))
398 return iter(pycompat.xrange(len(self._revisions)))
399
399
400 def hasnode(self, node):
400 def hasnode(self, node):
401 if node == nullid:
401 if node == nullid:
402 return False
402 return False
403
403
404 return node in self._nodetorev
404 return node in self._nodetorev
405
405
406 def revs(self, start=0, stop=None):
406 def revs(self, start=0, stop=None):
407 return storageutil.iterrevs(
407 return storageutil.iterrevs(
408 len(self._revisions), start=start, stop=stop
408 len(self._revisions), start=start, stop=stop
409 )
409 )
410
410
411 def parents(self, node):
411 def parents(self, node):
412 if node == nullid:
412 if node == nullid:
413 return nullid, nullid
413 return nullid, nullid
414
414
415 if node not in self._revisions:
415 if node not in self._revisions:
416 raise error.LookupError(node, self._path, _(b'no node'))
416 raise error.LookupError(node, self._path, _(b'no node'))
417
417
418 entry = self._revisions[node]
418 entry = self._revisions[node]
419 return entry.p1node, entry.p2node
419 return entry.p1node, entry.p2node
420
420
421 def parentrevs(self, rev):
421 def parentrevs(self, rev):
422 if rev == nullrev:
422 if rev == nullrev:
423 return nullrev, nullrev
423 return nullrev, nullrev
424
424
425 if rev not in self._revtonode:
425 if rev not in self._revtonode:
426 raise IndexError(rev)
426 raise IndexError(rev)
427
427
428 entry = self._revisions[self._revtonode[rev]]
428 entry = self._revisions[self._revtonode[rev]]
429 return entry.p1rev, entry.p2rev
429 return entry.p1rev, entry.p2rev
430
430
431 def rev(self, node):
431 def rev(self, node):
432 if node == nullid:
432 if node == nullid:
433 return nullrev
433 return nullrev
434
434
435 if node not in self._nodetorev:
435 if node not in self._nodetorev:
436 raise error.LookupError(node, self._path, _(b'no node'))
436 raise error.LookupError(node, self._path, _(b'no node'))
437
437
438 return self._nodetorev[node]
438 return self._nodetorev[node]
439
439
440 def node(self, rev):
440 def node(self, rev):
441 if rev == nullrev:
441 if rev == nullrev:
442 return nullid
442 return nullid
443
443
444 if rev not in self._revtonode:
444 if rev not in self._revtonode:
445 raise IndexError(rev)
445 raise IndexError(rev)
446
446
447 return self._revtonode[rev]
447 return self._revtonode[rev]
448
448
449 def lookup(self, node):
449 def lookup(self, node):
450 return storageutil.fileidlookup(self, node, self._path)
450 return storageutil.fileidlookup(self, node, self._path)
451
451
452 def linkrev(self, rev):
452 def linkrev(self, rev):
453 if rev == nullrev:
453 if rev == nullrev:
454 return nullrev
454 return nullrev
455
455
456 if rev not in self._revtonode:
456 if rev not in self._revtonode:
457 raise IndexError(rev)
457 raise IndexError(rev)
458
458
459 entry = self._revisions[self._revtonode[rev]]
459 entry = self._revisions[self._revtonode[rev]]
460 return entry.linkrev
460 return entry.linkrev
461
461
462 def iscensored(self, rev):
462 def iscensored(self, rev):
463 if rev == nullrev:
463 if rev == nullrev:
464 return False
464 return False
465
465
466 if rev not in self._revtonode:
466 if rev not in self._revtonode:
467 raise IndexError(rev)
467 raise IndexError(rev)
468
468
469 return self._revisions[self._revtonode[rev]].flags & FLAG_CENSORED
469 return self._revisions[self._revtonode[rev]].flags & FLAG_CENSORED
470
470
471 def commonancestorsheads(self, node1, node2):
471 def commonancestorsheads(self, node1, node2):
472 rev1 = self.rev(node1)
472 rev1 = self.rev(node1)
473 rev2 = self.rev(node2)
473 rev2 = self.rev(node2)
474
474
475 ancestors = ancestor.commonancestorsheads(self.parentrevs, rev1, rev2)
475 ancestors = ancestor.commonancestorsheads(self.parentrevs, rev1, rev2)
476 return pycompat.maplist(self.node, ancestors)
476 return pycompat.maplist(self.node, ancestors)
477
477
478 def descendants(self, revs):
478 def descendants(self, revs):
479 # TODO we could implement this using a recursive SQL query, which
479 # TODO we could implement this using a recursive SQL query, which
480 # might be faster.
480 # might be faster.
481 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
481 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
482
482
483 def heads(self, start=None, stop=None):
483 def heads(self, start=None, stop=None):
484 if start is None and stop is None:
484 if start is None and stop is None:
485 if not len(self):
485 if not len(self):
486 return [nullid]
486 return [nullid]
487
487
488 startrev = self.rev(start) if start is not None else nullrev
488 startrev = self.rev(start) if start is not None else nullrev
489 stoprevs = {self.rev(n) for n in stop or []}
489 stoprevs = {self.rev(n) for n in stop or []}
490
490
491 revs = dagop.headrevssubset(
491 revs = dagop.headrevssubset(
492 self.revs, self.parentrevs, startrev=startrev, stoprevs=stoprevs
492 self.revs, self.parentrevs, startrev=startrev, stoprevs=stoprevs
493 )
493 )
494
494
495 return [self.node(rev) for rev in revs]
495 return [self.node(rev) for rev in revs]
496
496
497 def children(self, node):
497 def children(self, node):
498 rev = self.rev(node)
498 rev = self.rev(node)
499
499
500 res = self._db.execute(
500 res = self._db.execute(
501 'SELECT'
501 'SELECT'
502 ' node '
502 ' node '
503 ' FROM filedata '
503 ' FROM filedata '
504 ' WHERE path=? AND (p1rev=? OR p2rev=?) '
504 ' WHERE path=? AND (p1rev=? OR p2rev=?) '
505 ' ORDER BY revnum ASC',
505 ' ORDER BY revnum ASC',
506 (self._path, rev, rev),
506 (self._path, rev, rev),
507 )
507 )
508
508
509 return [row[0] for row in res]
509 return [row[0] for row in res]
510
510
511 # End of ifileindex interface.
511 # End of ifileindex interface.
512
512
513 # Start of ifiledata interface.
513 # Start of ifiledata interface.
514
514
515 def size(self, rev):
515 def size(self, rev):
516 if rev == nullrev:
516 if rev == nullrev:
517 return 0
517 return 0
518
518
519 if rev not in self._revtonode:
519 if rev not in self._revtonode:
520 raise IndexError(rev)
520 raise IndexError(rev)
521
521
522 node = self._revtonode[rev]
522 node = self._revtonode[rev]
523
523
524 if self.renamed(node):
524 if self.renamed(node):
525 return len(self.read(node))
525 return len(self.read(node))
526
526
527 return len(self.revision(node))
527 return len(self.revision(node))
528
528
529 def revision(self, node, raw=False, _verifyhash=True):
529 def revision(self, node, raw=False, _verifyhash=True):
530 if node in (nullid, nullrev):
530 if node in (nullid, nullrev):
531 return b''
531 return b''
532
532
533 if isinstance(node, int):
533 if isinstance(node, int):
534 node = self.node(node)
534 node = self.node(node)
535
535
536 if node not in self._nodetorev:
536 if node not in self._nodetorev:
537 raise error.LookupError(node, self._path, _(b'no node'))
537 raise error.LookupError(node, self._path, _(b'no node'))
538
538
539 if node in self._revisioncache:
539 if node in self._revisioncache:
540 return self._revisioncache[node]
540 return self._revisioncache[node]
541
541
542 # Because we have a fulltext revision cache, we are able to
542 # Because we have a fulltext revision cache, we are able to
543 # short-circuit delta chain traversal and decompression as soon as
543 # short-circuit delta chain traversal and decompression as soon as
544 # we encounter a revision in the cache.
544 # we encounter a revision in the cache.
545
545
546 stoprids = {self._revisions[n].rid: n for n in self._revisioncache}
546 stoprids = {self._revisions[n].rid: n for n in self._revisioncache}
547
547
548 if not stoprids:
548 if not stoprids:
549 stoprids[-1] = None
549 stoprids[-1] = None
550
550
551 fulltext = resolvedeltachain(
551 fulltext = resolvedeltachain(
552 self._db,
552 self._db,
553 self._pathid,
553 self._pathid,
554 node,
554 node,
555 self._revisioncache,
555 self._revisioncache,
556 stoprids,
556 stoprids,
557 zstddctx=self._dctx,
557 zstddctx=self._dctx,
558 )
558 )
559
559
560 # Don't verify hashes if parent nodes were rewritten, as the hash
560 # Don't verify hashes if parent nodes were rewritten, as the hash
561 # wouldn't verify.
561 # wouldn't verify.
562 if self._revisions[node].flags & (FLAG_MISSING_P1 | FLAG_MISSING_P2):
562 if self._revisions[node].flags & (FLAG_MISSING_P1 | FLAG_MISSING_P2):
563 _verifyhash = False
563 _verifyhash = False
564
564
565 if _verifyhash:
565 if _verifyhash:
566 self._checkhash(fulltext, node)
566 self._checkhash(fulltext, node)
567 self._revisioncache[node] = fulltext
567 self._revisioncache[node] = fulltext
568
568
569 return fulltext
569 return fulltext
570
570
571 def rawdata(self, *args, **kwargs):
571 def rawdata(self, *args, **kwargs):
572 return self.revision(*args, **kwargs)
572 return self.revision(*args, **kwargs)
573
573
574 def read(self, node):
574 def read(self, node):
575 return storageutil.filtermetadata(self.revision(node))
575 return storageutil.filtermetadata(self.revision(node))
576
576
577 def renamed(self, node):
577 def renamed(self, node):
578 return storageutil.filerevisioncopied(self, node)
578 return storageutil.filerevisioncopied(self, node)
579
579
580 def cmp(self, node, fulltext):
580 def cmp(self, node, fulltext):
581 return not storageutil.filedataequivalent(self, node, fulltext)
581 return not storageutil.filedataequivalent(self, node, fulltext)
582
582
583 def emitrevisions(
583 def emitrevisions(
584 self,
584 self,
585 nodes,
585 nodes,
586 nodesorder=None,
586 nodesorder=None,
587 revisiondata=False,
587 revisiondata=False,
588 assumehaveparentrevisions=False,
588 assumehaveparentrevisions=False,
589 deltamode=repository.CG_DELTAMODE_STD,
589 deltamode=repository.CG_DELTAMODE_STD,
590 sidedata_helpers=None,
590 ):
591 ):
591 if nodesorder not in (b'nodes', b'storage', b'linear', None):
592 if nodesorder not in (b'nodes', b'storage', b'linear', None):
592 raise error.ProgrammingError(
593 raise error.ProgrammingError(
593 b'unhandled value for nodesorder: %s' % nodesorder
594 b'unhandled value for nodesorder: %s' % nodesorder
594 )
595 )
595
596
596 nodes = [n for n in nodes if n != nullid]
597 nodes = [n for n in nodes if n != nullid]
597
598
598 if not nodes:
599 if not nodes:
599 return
600 return
600
601
601 # TODO perform in a single query.
602 # TODO perform in a single query.
602 res = self._db.execute(
603 res = self._db.execute(
603 'SELECT revnum, deltaid FROM fileindex '
604 'SELECT revnum, deltaid FROM fileindex '
604 'WHERE pathid=? '
605 'WHERE pathid=? '
605 ' AND node in (%s)' % (','.join(['?'] * len(nodes))),
606 ' AND node in (%s)' % (','.join(['?'] * len(nodes))),
606 tuple([self._pathid] + nodes),
607 tuple([self._pathid] + nodes),
607 )
608 )
608
609
609 deltabases = {}
610 deltabases = {}
610
611
611 for rev, deltaid in res:
612 for rev, deltaid in res:
612 res = self._db.execute(
613 res = self._db.execute(
613 'SELECT revnum from fileindex WHERE pathid=? AND deltaid=?',
614 'SELECT revnum from fileindex WHERE pathid=? AND deltaid=?',
614 (self._pathid, deltaid),
615 (self._pathid, deltaid),
615 )
616 )
616 deltabases[rev] = res.fetchone()[0]
617 deltabases[rev] = res.fetchone()[0]
617
618
618 # TODO define revdifffn so we can use delta from storage.
619 # TODO define revdifffn so we can use delta from storage.
619 for delta in storageutil.emitrevisions(
620 for delta in storageutil.emitrevisions(
620 self,
621 self,
621 nodes,
622 nodes,
622 nodesorder,
623 nodesorder,
623 sqliterevisiondelta,
624 sqliterevisiondelta,
624 deltaparentfn=deltabases.__getitem__,
625 deltaparentfn=deltabases.__getitem__,
625 revisiondata=revisiondata,
626 revisiondata=revisiondata,
626 assumehaveparentrevisions=assumehaveparentrevisions,
627 assumehaveparentrevisions=assumehaveparentrevisions,
627 deltamode=deltamode,
628 deltamode=deltamode,
629 sidedata_helpers=sidedata_helpers,
628 ):
630 ):
629
631
630 yield delta
632 yield delta
631
633
632 # End of ifiledata interface.
634 # End of ifiledata interface.
633
635
634 # Start of ifilemutation interface.
636 # Start of ifilemutation interface.
635
637
636 def add(self, filedata, meta, transaction, linkrev, p1, p2):
638 def add(self, filedata, meta, transaction, linkrev, p1, p2):
637 if meta or filedata.startswith(b'\x01\n'):
639 if meta or filedata.startswith(b'\x01\n'):
638 filedata = storageutil.packmeta(meta, filedata)
640 filedata = storageutil.packmeta(meta, filedata)
639
641
640 rev = self.addrevision(filedata, transaction, linkrev, p1, p2)
642 rev = self.addrevision(filedata, transaction, linkrev, p1, p2)
641 return self.node(rev)
643 return self.node(rev)
642
644
643 def addrevision(
645 def addrevision(
644 self,
646 self,
645 revisiondata,
647 revisiondata,
646 transaction,
648 transaction,
647 linkrev,
649 linkrev,
648 p1,
650 p1,
649 p2,
651 p2,
650 node=None,
652 node=None,
651 flags=0,
653 flags=0,
652 cachedelta=None,
654 cachedelta=None,
653 ):
655 ):
654 if flags:
656 if flags:
655 raise SQLiteStoreError(_(b'flags not supported on revisions'))
657 raise SQLiteStoreError(_(b'flags not supported on revisions'))
656
658
657 validatehash = node is not None
659 validatehash = node is not None
658 node = node or storageutil.hashrevisionsha1(revisiondata, p1, p2)
660 node = node or storageutil.hashrevisionsha1(revisiondata, p1, p2)
659
661
660 if validatehash:
662 if validatehash:
661 self._checkhash(revisiondata, node, p1, p2)
663 self._checkhash(revisiondata, node, p1, p2)
662
664
663 rev = self._nodetorev.get(node)
665 rev = self._nodetorev.get(node)
664 if rev is not None:
666 if rev is not None:
665 return rev
667 return rev
666
668
667 rev = self._addrawrevision(
669 rev = self._addrawrevision(
668 node, revisiondata, transaction, linkrev, p1, p2
670 node, revisiondata, transaction, linkrev, p1, p2
669 )
671 )
670
672
671 self._revisioncache[node] = revisiondata
673 self._revisioncache[node] = revisiondata
672 return rev
674 return rev
673
675
674 def addgroup(
676 def addgroup(
675 self,
677 self,
676 deltas,
678 deltas,
677 linkmapper,
679 linkmapper,
678 transaction,
680 transaction,
679 addrevisioncb=None,
681 addrevisioncb=None,
680 duplicaterevisioncb=None,
682 duplicaterevisioncb=None,
681 maybemissingparents=False,
683 maybemissingparents=False,
682 ):
684 ):
683 empty = True
685 empty = True
684
686
685 for (
687 for (
686 node,
688 node,
687 p1,
689 p1,
688 p2,
690 p2,
689 linknode,
691 linknode,
690 deltabase,
692 deltabase,
691 delta,
693 delta,
692 wireflags,
694 wireflags,
693 sidedata,
695 sidedata,
694 ) in deltas:
696 ) in deltas:
695 storeflags = 0
697 storeflags = 0
696
698
697 if wireflags & repository.REVISION_FLAG_CENSORED:
699 if wireflags & repository.REVISION_FLAG_CENSORED:
698 storeflags |= FLAG_CENSORED
700 storeflags |= FLAG_CENSORED
699
701
700 if wireflags & ~repository.REVISION_FLAG_CENSORED:
702 if wireflags & ~repository.REVISION_FLAG_CENSORED:
701 raise SQLiteStoreError(b'unhandled revision flag')
703 raise SQLiteStoreError(b'unhandled revision flag')
702
704
703 if maybemissingparents:
705 if maybemissingparents:
704 if p1 != nullid and not self.hasnode(p1):
706 if p1 != nullid and not self.hasnode(p1):
705 p1 = nullid
707 p1 = nullid
706 storeflags |= FLAG_MISSING_P1
708 storeflags |= FLAG_MISSING_P1
707
709
708 if p2 != nullid and not self.hasnode(p2):
710 if p2 != nullid and not self.hasnode(p2):
709 p2 = nullid
711 p2 = nullid
710 storeflags |= FLAG_MISSING_P2
712 storeflags |= FLAG_MISSING_P2
711
713
712 baserev = self.rev(deltabase)
714 baserev = self.rev(deltabase)
713
715
714 # If base is censored, delta must be full replacement in a single
716 # If base is censored, delta must be full replacement in a single
715 # patch operation.
717 # patch operation.
716 if baserev != nullrev and self.iscensored(baserev):
718 if baserev != nullrev and self.iscensored(baserev):
717 hlen = struct.calcsize(b'>lll')
719 hlen = struct.calcsize(b'>lll')
718 oldlen = len(self.rawdata(deltabase, _verifyhash=False))
720 oldlen = len(self.rawdata(deltabase, _verifyhash=False))
719 newlen = len(delta) - hlen
721 newlen = len(delta) - hlen
720
722
721 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
723 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
722 raise error.CensoredBaseError(self._path, deltabase)
724 raise error.CensoredBaseError(self._path, deltabase)
723
725
724 if not (storeflags & FLAG_CENSORED) and storageutil.deltaiscensored(
726 if not (storeflags & FLAG_CENSORED) and storageutil.deltaiscensored(
725 delta, baserev, lambda x: len(self.rawdata(x))
727 delta, baserev, lambda x: len(self.rawdata(x))
726 ):
728 ):
727 storeflags |= FLAG_CENSORED
729 storeflags |= FLAG_CENSORED
728
730
729 linkrev = linkmapper(linknode)
731 linkrev = linkmapper(linknode)
730
732
731 if node in self._revisions:
733 if node in self._revisions:
732 # Possibly reset parents to make them proper.
734 # Possibly reset parents to make them proper.
733 entry = self._revisions[node]
735 entry = self._revisions[node]
734
736
735 if entry.flags & FLAG_MISSING_P1 and p1 != nullid:
737 if entry.flags & FLAG_MISSING_P1 and p1 != nullid:
736 entry.p1node = p1
738 entry.p1node = p1
737 entry.p1rev = self._nodetorev[p1]
739 entry.p1rev = self._nodetorev[p1]
738 entry.flags &= ~FLAG_MISSING_P1
740 entry.flags &= ~FLAG_MISSING_P1
739
741
740 self._db.execute(
742 self._db.execute(
741 'UPDATE fileindex SET p1rev=?, flags=? WHERE id=?',
743 'UPDATE fileindex SET p1rev=?, flags=? WHERE id=?',
742 (self._nodetorev[p1], entry.flags, entry.rid),
744 (self._nodetorev[p1], entry.flags, entry.rid),
743 )
745 )
744
746
745 if entry.flags & FLAG_MISSING_P2 and p2 != nullid:
747 if entry.flags & FLAG_MISSING_P2 and p2 != nullid:
746 entry.p2node = p2
748 entry.p2node = p2
747 entry.p2rev = self._nodetorev[p2]
749 entry.p2rev = self._nodetorev[p2]
748 entry.flags &= ~FLAG_MISSING_P2
750 entry.flags &= ~FLAG_MISSING_P2
749
751
750 self._db.execute(
752 self._db.execute(
751 'UPDATE fileindex SET p2rev=?, flags=? WHERE id=?',
753 'UPDATE fileindex SET p2rev=?, flags=? WHERE id=?',
752 (self._nodetorev[p1], entry.flags, entry.rid),
754 (self._nodetorev[p1], entry.flags, entry.rid),
753 )
755 )
754
756
755 if duplicaterevisioncb:
757 if duplicaterevisioncb:
756 duplicaterevisioncb(self, self.rev(node))
758 duplicaterevisioncb(self, self.rev(node))
757 empty = False
759 empty = False
758 continue
760 continue
759
761
760 if deltabase == nullid:
762 if deltabase == nullid:
761 text = mdiff.patch(b'', delta)
763 text = mdiff.patch(b'', delta)
762 storedelta = None
764 storedelta = None
763 else:
765 else:
764 text = None
766 text = None
765 storedelta = (deltabase, delta)
767 storedelta = (deltabase, delta)
766
768
767 rev = self._addrawrevision(
769 rev = self._addrawrevision(
768 node,
770 node,
769 text,
771 text,
770 transaction,
772 transaction,
771 linkrev,
773 linkrev,
772 p1,
774 p1,
773 p2,
775 p2,
774 storedelta=storedelta,
776 storedelta=storedelta,
775 flags=storeflags,
777 flags=storeflags,
776 )
778 )
777
779
778 if addrevisioncb:
780 if addrevisioncb:
779 addrevisioncb(self, rev)
781 addrevisioncb(self, rev)
780 empty = False
782 empty = False
781
783
782 return not empty
784 return not empty
783
785
784 def censorrevision(self, tr, censornode, tombstone=b''):
786 def censorrevision(self, tr, censornode, tombstone=b''):
785 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
787 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
786
788
787 # This restriction is cargo culted from revlogs and makes no sense for
789 # This restriction is cargo culted from revlogs and makes no sense for
788 # SQLite, since columns can be resized at will.
790 # SQLite, since columns can be resized at will.
789 if len(tombstone) > len(self.rawdata(censornode)):
791 if len(tombstone) > len(self.rawdata(censornode)):
790 raise error.Abort(
792 raise error.Abort(
791 _(b'censor tombstone must be no longer than censored data')
793 _(b'censor tombstone must be no longer than censored data')
792 )
794 )
793
795
794 # We need to replace the censored revision's data with the tombstone.
796 # We need to replace the censored revision's data with the tombstone.
795 # But replacing that data will have implications for delta chains that
797 # But replacing that data will have implications for delta chains that
796 # reference it.
798 # reference it.
797 #
799 #
798 # While "better," more complex strategies are possible, we do something
800 # While "better," more complex strategies are possible, we do something
799 # simple: we find delta chain children of the censored revision and we
801 # simple: we find delta chain children of the censored revision and we
800 # replace those incremental deltas with fulltexts of their corresponding
802 # replace those incremental deltas with fulltexts of their corresponding
801 # revision. Then we delete the now-unreferenced delta and original
803 # revision. Then we delete the now-unreferenced delta and original
802 # revision and insert a replacement.
804 # revision and insert a replacement.
803
805
804 # Find the delta to be censored.
806 # Find the delta to be censored.
805 censoreddeltaid = self._db.execute(
807 censoreddeltaid = self._db.execute(
806 'SELECT deltaid FROM fileindex WHERE id=?',
808 'SELECT deltaid FROM fileindex WHERE id=?',
807 (self._revisions[censornode].rid,),
809 (self._revisions[censornode].rid,),
808 ).fetchone()[0]
810 ).fetchone()[0]
809
811
810 # Find all its delta chain children.
812 # Find all its delta chain children.
811 # TODO once we support storing deltas for !files, we'll need to look
813 # TODO once we support storing deltas for !files, we'll need to look
812 # for those delta chains too.
814 # for those delta chains too.
813 rows = list(
815 rows = list(
814 self._db.execute(
816 self._db.execute(
815 'SELECT id, pathid, node FROM fileindex '
817 'SELECT id, pathid, node FROM fileindex '
816 'WHERE deltabaseid=? OR deltaid=?',
818 'WHERE deltabaseid=? OR deltaid=?',
817 (censoreddeltaid, censoreddeltaid),
819 (censoreddeltaid, censoreddeltaid),
818 )
820 )
819 )
821 )
820
822
821 for row in rows:
823 for row in rows:
822 rid, pathid, node = row
824 rid, pathid, node = row
823
825
824 fulltext = resolvedeltachain(
826 fulltext = resolvedeltachain(
825 self._db, pathid, node, {}, {-1: None}, zstddctx=self._dctx
827 self._db, pathid, node, {}, {-1: None}, zstddctx=self._dctx
826 )
828 )
827
829
828 deltahash = hashutil.sha1(fulltext).digest()
830 deltahash = hashutil.sha1(fulltext).digest()
829
831
830 if self._compengine == b'zstd':
832 if self._compengine == b'zstd':
831 deltablob = self._cctx.compress(fulltext)
833 deltablob = self._cctx.compress(fulltext)
832 compression = COMPRESSION_ZSTD
834 compression = COMPRESSION_ZSTD
833 elif self._compengine == b'zlib':
835 elif self._compengine == b'zlib':
834 deltablob = zlib.compress(fulltext)
836 deltablob = zlib.compress(fulltext)
835 compression = COMPRESSION_ZLIB
837 compression = COMPRESSION_ZLIB
836 elif self._compengine == b'none':
838 elif self._compengine == b'none':
837 deltablob = fulltext
839 deltablob = fulltext
838 compression = COMPRESSION_NONE
840 compression = COMPRESSION_NONE
839 else:
841 else:
840 raise error.ProgrammingError(
842 raise error.ProgrammingError(
841 b'unhandled compression engine: %s' % self._compengine
843 b'unhandled compression engine: %s' % self._compengine
842 )
844 )
843
845
844 if len(deltablob) >= len(fulltext):
846 if len(deltablob) >= len(fulltext):
845 deltablob = fulltext
847 deltablob = fulltext
846 compression = COMPRESSION_NONE
848 compression = COMPRESSION_NONE
847
849
848 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
850 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
849
851
850 self._db.execute(
852 self._db.execute(
851 'UPDATE fileindex SET deltaid=?, deltabaseid=NULL '
853 'UPDATE fileindex SET deltaid=?, deltabaseid=NULL '
852 'WHERE id=?',
854 'WHERE id=?',
853 (deltaid, rid),
855 (deltaid, rid),
854 )
856 )
855
857
856 # Now create the tombstone delta and replace the delta on the censored
858 # Now create the tombstone delta and replace the delta on the censored
857 # node.
859 # node.
858 deltahash = hashutil.sha1(tombstone).digest()
860 deltahash = hashutil.sha1(tombstone).digest()
859 tombstonedeltaid = insertdelta(
861 tombstonedeltaid = insertdelta(
860 self._db, COMPRESSION_NONE, deltahash, tombstone
862 self._db, COMPRESSION_NONE, deltahash, tombstone
861 )
863 )
862
864
863 flags = self._revisions[censornode].flags
865 flags = self._revisions[censornode].flags
864 flags |= FLAG_CENSORED
866 flags |= FLAG_CENSORED
865
867
866 self._db.execute(
868 self._db.execute(
867 'UPDATE fileindex SET flags=?, deltaid=?, deltabaseid=NULL '
869 'UPDATE fileindex SET flags=?, deltaid=?, deltabaseid=NULL '
868 'WHERE pathid=? AND node=?',
870 'WHERE pathid=? AND node=?',
869 (flags, tombstonedeltaid, self._pathid, censornode),
871 (flags, tombstonedeltaid, self._pathid, censornode),
870 )
872 )
871
873
872 self._db.execute('DELETE FROM delta WHERE id=?', (censoreddeltaid,))
874 self._db.execute('DELETE FROM delta WHERE id=?', (censoreddeltaid,))
873
875
874 self._refreshindex()
876 self._refreshindex()
875 self._revisioncache.clear()
877 self._revisioncache.clear()
876
878
877 def getstrippoint(self, minlink):
879 def getstrippoint(self, minlink):
878 return storageutil.resolvestripinfo(
880 return storageutil.resolvestripinfo(
879 minlink,
881 minlink,
880 len(self) - 1,
882 len(self) - 1,
881 [self.rev(n) for n in self.heads()],
883 [self.rev(n) for n in self.heads()],
882 self.linkrev,
884 self.linkrev,
883 self.parentrevs,
885 self.parentrevs,
884 )
886 )
885
887
886 def strip(self, minlink, transaction):
888 def strip(self, minlink, transaction):
887 if not len(self):
889 if not len(self):
888 return
890 return
889
891
890 rev, _ignored = self.getstrippoint(minlink)
892 rev, _ignored = self.getstrippoint(minlink)
891
893
892 if rev == len(self):
894 if rev == len(self):
893 return
895 return
894
896
895 for rev in self.revs(rev):
897 for rev in self.revs(rev):
896 self._db.execute(
898 self._db.execute(
897 'DELETE FROM fileindex WHERE pathid=? AND node=?',
899 'DELETE FROM fileindex WHERE pathid=? AND node=?',
898 (self._pathid, self.node(rev)),
900 (self._pathid, self.node(rev)),
899 )
901 )
900
902
901 # TODO how should we garbage collect data in delta table?
903 # TODO how should we garbage collect data in delta table?
902
904
903 self._refreshindex()
905 self._refreshindex()
904
906
905 # End of ifilemutation interface.
907 # End of ifilemutation interface.
906
908
907 # Start of ifilestorage interface.
909 # Start of ifilestorage interface.
908
910
909 def files(self):
911 def files(self):
910 return []
912 return []
911
913
912 def sidedata(self, nodeorrev, _df=None):
914 def sidedata(self, nodeorrev, _df=None):
913 # Not supported for now
915 # Not supported for now
914 return {}
916 return {}
915
917
916 def storageinfo(
918 def storageinfo(
917 self,
919 self,
918 exclusivefiles=False,
920 exclusivefiles=False,
919 sharedfiles=False,
921 sharedfiles=False,
920 revisionscount=False,
922 revisionscount=False,
921 trackedsize=False,
923 trackedsize=False,
922 storedsize=False,
924 storedsize=False,
923 ):
925 ):
924 d = {}
926 d = {}
925
927
926 if exclusivefiles:
928 if exclusivefiles:
927 d[b'exclusivefiles'] = []
929 d[b'exclusivefiles'] = []
928
930
929 if sharedfiles:
931 if sharedfiles:
930 # TODO list sqlite file(s) here.
932 # TODO list sqlite file(s) here.
931 d[b'sharedfiles'] = []
933 d[b'sharedfiles'] = []
932
934
933 if revisionscount:
935 if revisionscount:
934 d[b'revisionscount'] = len(self)
936 d[b'revisionscount'] = len(self)
935
937
936 if trackedsize:
938 if trackedsize:
937 d[b'trackedsize'] = sum(
939 d[b'trackedsize'] = sum(
938 len(self.revision(node)) for node in self._nodetorev
940 len(self.revision(node)) for node in self._nodetorev
939 )
941 )
940
942
941 if storedsize:
943 if storedsize:
942 # TODO implement this?
944 # TODO implement this?
943 d[b'storedsize'] = None
945 d[b'storedsize'] = None
944
946
945 return d
947 return d
946
948
947 def verifyintegrity(self, state):
949 def verifyintegrity(self, state):
948 state[b'skipread'] = set()
950 state[b'skipread'] = set()
949
951
950 for rev in self:
952 for rev in self:
951 node = self.node(rev)
953 node = self.node(rev)
952
954
953 try:
955 try:
954 self.revision(node)
956 self.revision(node)
955 except Exception as e:
957 except Exception as e:
956 yield sqliteproblem(
958 yield sqliteproblem(
957 error=_(b'unpacking %s: %s') % (short(node), e), node=node
959 error=_(b'unpacking %s: %s') % (short(node), e), node=node
958 )
960 )
959
961
960 state[b'skipread'].add(node)
962 state[b'skipread'].add(node)
961
963
962 # End of ifilestorage interface.
964 # End of ifilestorage interface.
963
965
964 def _checkhash(self, fulltext, node, p1=None, p2=None):
966 def _checkhash(self, fulltext, node, p1=None, p2=None):
965 if p1 is None and p2 is None:
967 if p1 is None and p2 is None:
966 p1, p2 = self.parents(node)
968 p1, p2 = self.parents(node)
967
969
968 if node == storageutil.hashrevisionsha1(fulltext, p1, p2):
970 if node == storageutil.hashrevisionsha1(fulltext, p1, p2):
969 return
971 return
970
972
971 try:
973 try:
972 del self._revisioncache[node]
974 del self._revisioncache[node]
973 except KeyError:
975 except KeyError:
974 pass
976 pass
975
977
976 if storageutil.iscensoredtext(fulltext):
978 if storageutil.iscensoredtext(fulltext):
977 raise error.CensoredNodeError(self._path, node, fulltext)
979 raise error.CensoredNodeError(self._path, node, fulltext)
978
980
979 raise SQLiteStoreError(_(b'integrity check failed on %s') % self._path)
981 raise SQLiteStoreError(_(b'integrity check failed on %s') % self._path)
980
982
981 def _addrawrevision(
983 def _addrawrevision(
982 self,
984 self,
983 node,
985 node,
984 revisiondata,
986 revisiondata,
985 transaction,
987 transaction,
986 linkrev,
988 linkrev,
987 p1,
989 p1,
988 p2,
990 p2,
989 storedelta=None,
991 storedelta=None,
990 flags=0,
992 flags=0,
991 ):
993 ):
992 if self._pathid is None:
994 if self._pathid is None:
993 res = self._db.execute(
995 res = self._db.execute(
994 'INSERT INTO filepath (path) VALUES (?)', (self._path,)
996 'INSERT INTO filepath (path) VALUES (?)', (self._path,)
995 )
997 )
996 self._pathid = res.lastrowid
998 self._pathid = res.lastrowid
997
999
998 # For simplicity, always store a delta against p1.
1000 # For simplicity, always store a delta against p1.
999 # TODO we need a lot more logic here to make behavior reasonable.
1001 # TODO we need a lot more logic here to make behavior reasonable.
1000
1002
1001 if storedelta:
1003 if storedelta:
1002 deltabase, delta = storedelta
1004 deltabase, delta = storedelta
1003
1005
1004 if isinstance(deltabase, int):
1006 if isinstance(deltabase, int):
1005 deltabase = self.node(deltabase)
1007 deltabase = self.node(deltabase)
1006
1008
1007 else:
1009 else:
1008 assert revisiondata is not None
1010 assert revisiondata is not None
1009 deltabase = p1
1011 deltabase = p1
1010
1012
1011 if deltabase == nullid:
1013 if deltabase == nullid:
1012 delta = revisiondata
1014 delta = revisiondata
1013 else:
1015 else:
1014 delta = mdiff.textdiff(
1016 delta = mdiff.textdiff(
1015 self.revision(self.rev(deltabase)), revisiondata
1017 self.revision(self.rev(deltabase)), revisiondata
1016 )
1018 )
1017
1019
1018 # File index stores a pointer to its delta and the parent delta.
1020 # File index stores a pointer to its delta and the parent delta.
1019 # The parent delta is stored via a pointer to the fileindex PK.
1021 # The parent delta is stored via a pointer to the fileindex PK.
1020 if deltabase == nullid:
1022 if deltabase == nullid:
1021 baseid = None
1023 baseid = None
1022 else:
1024 else:
1023 baseid = self._revisions[deltabase].rid
1025 baseid = self._revisions[deltabase].rid
1024
1026
1025 # Deltas are stored with a hash of their content. This allows
1027 # Deltas are stored with a hash of their content. This allows
1026 # us to de-duplicate. The table is configured to ignore conflicts
1028 # us to de-duplicate. The table is configured to ignore conflicts
1027 # and it is faster to just insert and silently noop than to look
1029 # and it is faster to just insert and silently noop than to look
1028 # first.
1030 # first.
1029 deltahash = hashutil.sha1(delta).digest()
1031 deltahash = hashutil.sha1(delta).digest()
1030
1032
1031 if self._compengine == b'zstd':
1033 if self._compengine == b'zstd':
1032 deltablob = self._cctx.compress(delta)
1034 deltablob = self._cctx.compress(delta)
1033 compression = COMPRESSION_ZSTD
1035 compression = COMPRESSION_ZSTD
1034 elif self._compengine == b'zlib':
1036 elif self._compengine == b'zlib':
1035 deltablob = zlib.compress(delta)
1037 deltablob = zlib.compress(delta)
1036 compression = COMPRESSION_ZLIB
1038 compression = COMPRESSION_ZLIB
1037 elif self._compengine == b'none':
1039 elif self._compengine == b'none':
1038 deltablob = delta
1040 deltablob = delta
1039 compression = COMPRESSION_NONE
1041 compression = COMPRESSION_NONE
1040 else:
1042 else:
1041 raise error.ProgrammingError(
1043 raise error.ProgrammingError(
1042 b'unhandled compression engine: %s' % self._compengine
1044 b'unhandled compression engine: %s' % self._compengine
1043 )
1045 )
1044
1046
1045 # Don't store compressed data if it isn't practical.
1047 # Don't store compressed data if it isn't practical.
1046 if len(deltablob) >= len(delta):
1048 if len(deltablob) >= len(delta):
1047 deltablob = delta
1049 deltablob = delta
1048 compression = COMPRESSION_NONE
1050 compression = COMPRESSION_NONE
1049
1051
1050 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
1052 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
1051
1053
1052 rev = len(self)
1054 rev = len(self)
1053
1055
1054 if p1 == nullid:
1056 if p1 == nullid:
1055 p1rev = nullrev
1057 p1rev = nullrev
1056 else:
1058 else:
1057 p1rev = self._nodetorev[p1]
1059 p1rev = self._nodetorev[p1]
1058
1060
1059 if p2 == nullid:
1061 if p2 == nullid:
1060 p2rev = nullrev
1062 p2rev = nullrev
1061 else:
1063 else:
1062 p2rev = self._nodetorev[p2]
1064 p2rev = self._nodetorev[p2]
1063
1065
1064 rid = self._db.execute(
1066 rid = self._db.execute(
1065 'INSERT INTO fileindex ('
1067 'INSERT INTO fileindex ('
1066 ' pathid, revnum, node, p1rev, p2rev, linkrev, flags, '
1068 ' pathid, revnum, node, p1rev, p2rev, linkrev, flags, '
1067 ' deltaid, deltabaseid) '
1069 ' deltaid, deltabaseid) '
1068 ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
1070 ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
1069 (
1071 (
1070 self._pathid,
1072 self._pathid,
1071 rev,
1073 rev,
1072 node,
1074 node,
1073 p1rev,
1075 p1rev,
1074 p2rev,
1076 p2rev,
1075 linkrev,
1077 linkrev,
1076 flags,
1078 flags,
1077 deltaid,
1079 deltaid,
1078 baseid,
1080 baseid,
1079 ),
1081 ),
1080 ).lastrowid
1082 ).lastrowid
1081
1083
1082 entry = revisionentry(
1084 entry = revisionentry(
1083 rid=rid,
1085 rid=rid,
1084 rev=rev,
1086 rev=rev,
1085 node=node,
1087 node=node,
1086 p1rev=p1rev,
1088 p1rev=p1rev,
1087 p2rev=p2rev,
1089 p2rev=p2rev,
1088 p1node=p1,
1090 p1node=p1,
1089 p2node=p2,
1091 p2node=p2,
1090 linkrev=linkrev,
1092 linkrev=linkrev,
1091 flags=flags,
1093 flags=flags,
1092 )
1094 )
1093
1095
1094 self._nodetorev[node] = rev
1096 self._nodetorev[node] = rev
1095 self._revtonode[rev] = node
1097 self._revtonode[rev] = node
1096 self._revisions[node] = entry
1098 self._revisions[node] = entry
1097
1099
1098 return rev
1100 return rev
1099
1101
1100
1102
1101 class sqliterepository(localrepo.localrepository):
1103 class sqliterepository(localrepo.localrepository):
1102 def cancopy(self):
1104 def cancopy(self):
1103 return False
1105 return False
1104
1106
1105 def transaction(self, *args, **kwargs):
1107 def transaction(self, *args, **kwargs):
1106 current = self.currenttransaction()
1108 current = self.currenttransaction()
1107
1109
1108 tr = super(sqliterepository, self).transaction(*args, **kwargs)
1110 tr = super(sqliterepository, self).transaction(*args, **kwargs)
1109
1111
1110 if current:
1112 if current:
1111 return tr
1113 return tr
1112
1114
1113 self._dbconn.execute('BEGIN TRANSACTION')
1115 self._dbconn.execute('BEGIN TRANSACTION')
1114
1116
1115 def committransaction(_):
1117 def committransaction(_):
1116 self._dbconn.commit()
1118 self._dbconn.commit()
1117
1119
1118 tr.addfinalize(b'sqlitestore', committransaction)
1120 tr.addfinalize(b'sqlitestore', committransaction)
1119
1121
1120 return tr
1122 return tr
1121
1123
1122 @property
1124 @property
1123 def _dbconn(self):
1125 def _dbconn(self):
1124 # SQLite connections can only be used on the thread that created
1126 # SQLite connections can only be used on the thread that created
1125 # them. In most cases, this "just works." However, hgweb uses
1127 # them. In most cases, this "just works." However, hgweb uses
1126 # multiple threads.
1128 # multiple threads.
1127 tid = threading.current_thread().ident
1129 tid = threading.current_thread().ident
1128
1130
1129 if self._db:
1131 if self._db:
1130 if self._db[0] == tid:
1132 if self._db[0] == tid:
1131 return self._db[1]
1133 return self._db[1]
1132
1134
1133 db = makedb(self.svfs.join(b'db.sqlite'))
1135 db = makedb(self.svfs.join(b'db.sqlite'))
1134 self._db = (tid, db)
1136 self._db = (tid, db)
1135
1137
1136 return db
1138 return db
1137
1139
1138
1140
1139 def makedb(path):
1141 def makedb(path):
1140 """Construct a database handle for a database at path."""
1142 """Construct a database handle for a database at path."""
1141
1143
1142 db = sqlite3.connect(encoding.strfromlocal(path))
1144 db = sqlite3.connect(encoding.strfromlocal(path))
1143 db.text_factory = bytes
1145 db.text_factory = bytes
1144
1146
1145 res = db.execute('PRAGMA user_version').fetchone()[0]
1147 res = db.execute('PRAGMA user_version').fetchone()[0]
1146
1148
1147 # New database.
1149 # New database.
1148 if res == 0:
1150 if res == 0:
1149 for statement in CREATE_SCHEMA:
1151 for statement in CREATE_SCHEMA:
1150 db.execute(statement)
1152 db.execute(statement)
1151
1153
1152 db.commit()
1154 db.commit()
1153
1155
1154 elif res == CURRENT_SCHEMA_VERSION:
1156 elif res == CURRENT_SCHEMA_VERSION:
1155 pass
1157 pass
1156
1158
1157 else:
1159 else:
1158 raise error.Abort(_(b'sqlite database has unrecognized version'))
1160 raise error.Abort(_(b'sqlite database has unrecognized version'))
1159
1161
1160 db.execute('PRAGMA journal_mode=WAL')
1162 db.execute('PRAGMA journal_mode=WAL')
1161
1163
1162 return db
1164 return db
1163
1165
1164
1166
1165 def featuresetup(ui, supported):
1167 def featuresetup(ui, supported):
1166 supported.add(REQUIREMENT)
1168 supported.add(REQUIREMENT)
1167
1169
1168 if zstd:
1170 if zstd:
1169 supported.add(REQUIREMENT_ZSTD)
1171 supported.add(REQUIREMENT_ZSTD)
1170
1172
1171 supported.add(REQUIREMENT_ZLIB)
1173 supported.add(REQUIREMENT_ZLIB)
1172 supported.add(REQUIREMENT_NONE)
1174 supported.add(REQUIREMENT_NONE)
1173 supported.add(REQUIREMENT_SHALLOW_FILES)
1175 supported.add(REQUIREMENT_SHALLOW_FILES)
1174 supported.add(requirements.NARROW_REQUIREMENT)
1176 supported.add(requirements.NARROW_REQUIREMENT)
1175
1177
1176
1178
1177 def newreporequirements(orig, ui, createopts):
1179 def newreporequirements(orig, ui, createopts):
1178 if createopts[b'backend'] != b'sqlite':
1180 if createopts[b'backend'] != b'sqlite':
1179 return orig(ui, createopts)
1181 return orig(ui, createopts)
1180
1182
1181 # This restriction can be lifted once we have more confidence.
1183 # This restriction can be lifted once we have more confidence.
1182 if b'sharedrepo' in createopts:
1184 if b'sharedrepo' in createopts:
1183 raise error.Abort(
1185 raise error.Abort(
1184 _(b'shared repositories not supported with SQLite store')
1186 _(b'shared repositories not supported with SQLite store')
1185 )
1187 )
1186
1188
1187 # This filtering is out of an abundance of caution: we want to ensure
1189 # This filtering is out of an abundance of caution: we want to ensure
1188 # we honor creation options and we do that by annotating exactly the
1190 # we honor creation options and we do that by annotating exactly the
1189 # creation options we recognize.
1191 # creation options we recognize.
1190 known = {
1192 known = {
1191 b'narrowfiles',
1193 b'narrowfiles',
1192 b'backend',
1194 b'backend',
1193 b'shallowfilestore',
1195 b'shallowfilestore',
1194 }
1196 }
1195
1197
1196 unsupported = set(createopts) - known
1198 unsupported = set(createopts) - known
1197 if unsupported:
1199 if unsupported:
1198 raise error.Abort(
1200 raise error.Abort(
1199 _(b'SQLite store does not support repo creation option: %s')
1201 _(b'SQLite store does not support repo creation option: %s')
1200 % b', '.join(sorted(unsupported))
1202 % b', '.join(sorted(unsupported))
1201 )
1203 )
1202
1204
1203 # Since we're a hybrid store that still relies on revlogs, we fall back
1205 # Since we're a hybrid store that still relies on revlogs, we fall back
1204 # to using the revlogv1 backend's storage requirements then adding our
1206 # to using the revlogv1 backend's storage requirements then adding our
1205 # own requirement.
1207 # own requirement.
1206 createopts[b'backend'] = b'revlogv1'
1208 createopts[b'backend'] = b'revlogv1'
1207 requirements = orig(ui, createopts)
1209 requirements = orig(ui, createopts)
1208 requirements.add(REQUIREMENT)
1210 requirements.add(REQUIREMENT)
1209
1211
1210 compression = ui.config(b'storage', b'sqlite.compression')
1212 compression = ui.config(b'storage', b'sqlite.compression')
1211
1213
1212 if compression == b'zstd' and not zstd:
1214 if compression == b'zstd' and not zstd:
1213 raise error.Abort(
1215 raise error.Abort(
1214 _(
1216 _(
1215 b'storage.sqlite.compression set to "zstd" but '
1217 b'storage.sqlite.compression set to "zstd" but '
1216 b'zstandard compression not available to this '
1218 b'zstandard compression not available to this '
1217 b'Mercurial install'
1219 b'Mercurial install'
1218 )
1220 )
1219 )
1221 )
1220
1222
1221 if compression == b'zstd':
1223 if compression == b'zstd':
1222 requirements.add(REQUIREMENT_ZSTD)
1224 requirements.add(REQUIREMENT_ZSTD)
1223 elif compression == b'zlib':
1225 elif compression == b'zlib':
1224 requirements.add(REQUIREMENT_ZLIB)
1226 requirements.add(REQUIREMENT_ZLIB)
1225 elif compression == b'none':
1227 elif compression == b'none':
1226 requirements.add(REQUIREMENT_NONE)
1228 requirements.add(REQUIREMENT_NONE)
1227 else:
1229 else:
1228 raise error.Abort(
1230 raise error.Abort(
1229 _(
1231 _(
1230 b'unknown compression engine defined in '
1232 b'unknown compression engine defined in '
1231 b'storage.sqlite.compression: %s'
1233 b'storage.sqlite.compression: %s'
1232 )
1234 )
1233 % compression
1235 % compression
1234 )
1236 )
1235
1237
1236 if createopts.get(b'shallowfilestore'):
1238 if createopts.get(b'shallowfilestore'):
1237 requirements.add(REQUIREMENT_SHALLOW_FILES)
1239 requirements.add(REQUIREMENT_SHALLOW_FILES)
1238
1240
1239 return requirements
1241 return requirements
1240
1242
1241
1243
1242 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1244 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1243 class sqlitefilestorage(object):
1245 class sqlitefilestorage(object):
1244 """Repository file storage backed by SQLite."""
1246 """Repository file storage backed by SQLite."""
1245
1247
1246 def file(self, path):
1248 def file(self, path):
1247 if path[0] == b'/':
1249 if path[0] == b'/':
1248 path = path[1:]
1250 path = path[1:]
1249
1251
1250 if REQUIREMENT_ZSTD in self.requirements:
1252 if REQUIREMENT_ZSTD in self.requirements:
1251 compression = b'zstd'
1253 compression = b'zstd'
1252 elif REQUIREMENT_ZLIB in self.requirements:
1254 elif REQUIREMENT_ZLIB in self.requirements:
1253 compression = b'zlib'
1255 compression = b'zlib'
1254 elif REQUIREMENT_NONE in self.requirements:
1256 elif REQUIREMENT_NONE in self.requirements:
1255 compression = b'none'
1257 compression = b'none'
1256 else:
1258 else:
1257 raise error.Abort(
1259 raise error.Abort(
1258 _(
1260 _(
1259 b'unable to determine what compression engine '
1261 b'unable to determine what compression engine '
1260 b'to use for SQLite storage'
1262 b'to use for SQLite storage'
1261 )
1263 )
1262 )
1264 )
1263
1265
1264 return sqlitefilestore(self._dbconn, path, compression)
1266 return sqlitefilestore(self._dbconn, path, compression)
1265
1267
1266
1268
1267 def makefilestorage(orig, requirements, features, **kwargs):
1269 def makefilestorage(orig, requirements, features, **kwargs):
1268 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1270 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1269 if REQUIREMENT in requirements:
1271 if REQUIREMENT in requirements:
1270 if REQUIREMENT_SHALLOW_FILES in requirements:
1272 if REQUIREMENT_SHALLOW_FILES in requirements:
1271 features.add(repository.REPO_FEATURE_SHALLOW_FILE_STORAGE)
1273 features.add(repository.REPO_FEATURE_SHALLOW_FILE_STORAGE)
1272
1274
1273 return sqlitefilestorage
1275 return sqlitefilestorage
1274 else:
1276 else:
1275 return orig(requirements=requirements, features=features, **kwargs)
1277 return orig(requirements=requirements, features=features, **kwargs)
1276
1278
1277
1279
1278 def makemain(orig, ui, requirements, **kwargs):
1280 def makemain(orig, ui, requirements, **kwargs):
1279 if REQUIREMENT in requirements:
1281 if REQUIREMENT in requirements:
1280 if REQUIREMENT_ZSTD in requirements and not zstd:
1282 if REQUIREMENT_ZSTD in requirements and not zstd:
1281 raise error.Abort(
1283 raise error.Abort(
1282 _(
1284 _(
1283 b'repository uses zstandard compression, which '
1285 b'repository uses zstandard compression, which '
1284 b'is not available to this Mercurial install'
1286 b'is not available to this Mercurial install'
1285 )
1287 )
1286 )
1288 )
1287
1289
1288 return sqliterepository
1290 return sqliterepository
1289
1291
1290 return orig(requirements=requirements, **kwargs)
1292 return orig(requirements=requirements, **kwargs)
1291
1293
1292
1294
1293 def verifierinit(orig, self, *args, **kwargs):
1295 def verifierinit(orig, self, *args, **kwargs):
1294 orig(self, *args, **kwargs)
1296 orig(self, *args, **kwargs)
1295
1297
1296 # We don't care that files in the store don't align with what is
1298 # We don't care that files in the store don't align with what is
1297 # advertised. So suppress these warnings.
1299 # advertised. So suppress these warnings.
1298 self.warnorphanstorefiles = False
1300 self.warnorphanstorefiles = False
1299
1301
1300
1302
1301 def extsetup(ui):
1303 def extsetup(ui):
1302 localrepo.featuresetupfuncs.add(featuresetup)
1304 localrepo.featuresetupfuncs.add(featuresetup)
1303 extensions.wrapfunction(
1305 extensions.wrapfunction(
1304 localrepo, b'newreporequirements', newreporequirements
1306 localrepo, b'newreporequirements', newreporequirements
1305 )
1307 )
1306 extensions.wrapfunction(localrepo, b'makefilestorage', makefilestorage)
1308 extensions.wrapfunction(localrepo, b'makefilestorage', makefilestorage)
1307 extensions.wrapfunction(localrepo, b'makemain', makemain)
1309 extensions.wrapfunction(localrepo, b'makemain', makemain)
1308 extensions.wrapfunction(verify.verifier, b'__init__', verifierinit)
1310 extensions.wrapfunction(verify.verifier, b'__init__', verifierinit)
1309
1311
1310
1312
1311 def reposetup(ui, repo):
1313 def reposetup(ui, repo):
1312 if isinstance(repo, sqliterepository):
1314 if isinstance(repo, sqliterepository):
1313 repo._db = None
1315 repo._db = None
1314
1316
1315 # TODO check for bundlerepository?
1317 # TODO check for bundlerepository?
@@ -1,1794 +1,1861 b''
1 # changegroup.py - Mercurial changegroup manipulation functions
1 # changegroup.py - Mercurial changegroup manipulation functions
2 #
2 #
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11 import struct
11 import struct
12 import weakref
12 import weakref
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 hex,
16 hex,
17 nullid,
17 nullid,
18 nullrev,
18 nullrev,
19 short,
19 short,
20 )
20 )
21 from .pycompat import open
21 from .pycompat import open
22
22
23 from . import (
23 from . import (
24 error,
24 error,
25 match as matchmod,
25 match as matchmod,
26 mdiff,
26 mdiff,
27 phases,
27 phases,
28 pycompat,
28 pycompat,
29 requirements,
29 requirements,
30 scmutil,
30 scmutil,
31 util,
31 util,
32 )
32 )
33
33
34 from .interfaces import repository
34 from .interfaces import repository
35 from .revlogutils import sidedata as sidedatamod
35 from .revlogutils import sidedata as sidedatamod
36
36
37 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
37 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
38 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
38 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
39 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
39 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
40
40
41 LFS_REQUIREMENT = b'lfs'
41 LFS_REQUIREMENT = b'lfs'
42
42
43 readexactly = util.readexactly
43 readexactly = util.readexactly
44
44
45
45
46 def getchunk(stream):
46 def getchunk(stream):
47 """return the next chunk from stream as a string"""
47 """return the next chunk from stream as a string"""
48 d = readexactly(stream, 4)
48 d = readexactly(stream, 4)
49 l = struct.unpack(b">l", d)[0]
49 l = struct.unpack(b">l", d)[0]
50 if l <= 4:
50 if l <= 4:
51 if l:
51 if l:
52 raise error.Abort(_(b"invalid chunk length %d") % l)
52 raise error.Abort(_(b"invalid chunk length %d") % l)
53 return b""
53 return b""
54 return readexactly(stream, l - 4)
54 return readexactly(stream, l - 4)
55
55
56
56
57 def chunkheader(length):
57 def chunkheader(length):
58 """return a changegroup chunk header (string)"""
58 """return a changegroup chunk header (string)"""
59 return struct.pack(b">l", length + 4)
59 return struct.pack(b">l", length + 4)
60
60
61
61
62 def closechunk():
62 def closechunk():
63 """return a changegroup chunk header (string) for a zero-length chunk"""
63 """return a changegroup chunk header (string) for a zero-length chunk"""
64 return struct.pack(b">l", 0)
64 return struct.pack(b">l", 0)
65
65
66
66
67 def _fileheader(path):
67 def _fileheader(path):
68 """Obtain a changegroup chunk header for a named path."""
68 """Obtain a changegroup chunk header for a named path."""
69 return chunkheader(len(path)) + path
69 return chunkheader(len(path)) + path
70
70
71
71
72 def writechunks(ui, chunks, filename, vfs=None):
72 def writechunks(ui, chunks, filename, vfs=None):
73 """Write chunks to a file and return its filename.
73 """Write chunks to a file and return its filename.
74
74
75 The stream is assumed to be a bundle file.
75 The stream is assumed to be a bundle file.
76 Existing files will not be overwritten.
76 Existing files will not be overwritten.
77 If no filename is specified, a temporary file is created.
77 If no filename is specified, a temporary file is created.
78 """
78 """
79 fh = None
79 fh = None
80 cleanup = None
80 cleanup = None
81 try:
81 try:
82 if filename:
82 if filename:
83 if vfs:
83 if vfs:
84 fh = vfs.open(filename, b"wb")
84 fh = vfs.open(filename, b"wb")
85 else:
85 else:
86 # Increase default buffer size because default is usually
86 # Increase default buffer size because default is usually
87 # small (4k is common on Linux).
87 # small (4k is common on Linux).
88 fh = open(filename, b"wb", 131072)
88 fh = open(filename, b"wb", 131072)
89 else:
89 else:
90 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
90 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
91 fh = os.fdopen(fd, "wb")
91 fh = os.fdopen(fd, "wb")
92 cleanup = filename
92 cleanup = filename
93 for c in chunks:
93 for c in chunks:
94 fh.write(c)
94 fh.write(c)
95 cleanup = None
95 cleanup = None
96 return filename
96 return filename
97 finally:
97 finally:
98 if fh is not None:
98 if fh is not None:
99 fh.close()
99 fh.close()
100 if cleanup is not None:
100 if cleanup is not None:
101 if filename and vfs:
101 if filename and vfs:
102 vfs.unlink(cleanup)
102 vfs.unlink(cleanup)
103 else:
103 else:
104 os.unlink(cleanup)
104 os.unlink(cleanup)
105
105
106
106
107 class cg1unpacker(object):
107 class cg1unpacker(object):
108 """Unpacker for cg1 changegroup streams.
108 """Unpacker for cg1 changegroup streams.
109
109
110 A changegroup unpacker handles the framing of the revision data in
110 A changegroup unpacker handles the framing of the revision data in
111 the wire format. Most consumers will want to use the apply()
111 the wire format. Most consumers will want to use the apply()
112 method to add the changes from the changegroup to a repository.
112 method to add the changes from the changegroup to a repository.
113
113
114 If you're forwarding a changegroup unmodified to another consumer,
114 If you're forwarding a changegroup unmodified to another consumer,
115 use getchunks(), which returns an iterator of changegroup
115 use getchunks(), which returns an iterator of changegroup
116 chunks. This is mostly useful for cases where you need to know the
116 chunks. This is mostly useful for cases where you need to know the
117 data stream has ended by observing the end of the changegroup.
117 data stream has ended by observing the end of the changegroup.
118
118
119 deltachunk() is useful only if you're applying delta data. Most
119 deltachunk() is useful only if you're applying delta data. Most
120 consumers should prefer apply() instead.
120 consumers should prefer apply() instead.
121
121
122 A few other public methods exist. Those are used only for
122 A few other public methods exist. Those are used only for
123 bundlerepo and some debug commands - their use is discouraged.
123 bundlerepo and some debug commands - their use is discouraged.
124 """
124 """
125
125
126 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
126 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
127 deltaheadersize = deltaheader.size
127 deltaheadersize = deltaheader.size
128 version = b'01'
128 version = b'01'
129 _grouplistcount = 1 # One list of files after the manifests
129 _grouplistcount = 1 # One list of files after the manifests
130
130
131 def __init__(self, fh, alg, extras=None):
131 def __init__(self, fh, alg, extras=None):
132 if alg is None:
132 if alg is None:
133 alg = b'UN'
133 alg = b'UN'
134 if alg not in util.compengines.supportedbundletypes:
134 if alg not in util.compengines.supportedbundletypes:
135 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
135 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
136 if alg == b'BZ':
136 if alg == b'BZ':
137 alg = b'_truncatedBZ'
137 alg = b'_truncatedBZ'
138
138
139 compengine = util.compengines.forbundletype(alg)
139 compengine = util.compengines.forbundletype(alg)
140 self._stream = compengine.decompressorreader(fh)
140 self._stream = compengine.decompressorreader(fh)
141 self._type = alg
141 self._type = alg
142 self.extras = extras or {}
142 self.extras = extras or {}
143 self.callback = None
143 self.callback = None
144
144
145 # These methods (compressed, read, seek, tell) all appear to only
145 # These methods (compressed, read, seek, tell) all appear to only
146 # be used by bundlerepo, but it's a little hard to tell.
146 # be used by bundlerepo, but it's a little hard to tell.
147 def compressed(self):
147 def compressed(self):
148 return self._type is not None and self._type != b'UN'
148 return self._type is not None and self._type != b'UN'
149
149
150 def read(self, l):
150 def read(self, l):
151 return self._stream.read(l)
151 return self._stream.read(l)
152
152
153 def seek(self, pos):
153 def seek(self, pos):
154 return self._stream.seek(pos)
154 return self._stream.seek(pos)
155
155
156 def tell(self):
156 def tell(self):
157 return self._stream.tell()
157 return self._stream.tell()
158
158
159 def close(self):
159 def close(self):
160 return self._stream.close()
160 return self._stream.close()
161
161
162 def _chunklength(self):
162 def _chunklength(self):
163 d = readexactly(self._stream, 4)
163 d = readexactly(self._stream, 4)
164 l = struct.unpack(b">l", d)[0]
164 l = struct.unpack(b">l", d)[0]
165 if l <= 4:
165 if l <= 4:
166 if l:
166 if l:
167 raise error.Abort(_(b"invalid chunk length %d") % l)
167 raise error.Abort(_(b"invalid chunk length %d") % l)
168 return 0
168 return 0
169 if self.callback:
169 if self.callback:
170 self.callback()
170 self.callback()
171 return l - 4
171 return l - 4
172
172
173 def changelogheader(self):
173 def changelogheader(self):
174 """v10 does not have a changelog header chunk"""
174 """v10 does not have a changelog header chunk"""
175 return {}
175 return {}
176
176
177 def manifestheader(self):
177 def manifestheader(self):
178 """v10 does not have a manifest header chunk"""
178 """v10 does not have a manifest header chunk"""
179 return {}
179 return {}
180
180
181 def filelogheader(self):
181 def filelogheader(self):
182 """return the header of the filelogs chunk, v10 only has the filename"""
182 """return the header of the filelogs chunk, v10 only has the filename"""
183 l = self._chunklength()
183 l = self._chunklength()
184 if not l:
184 if not l:
185 return {}
185 return {}
186 fname = readexactly(self._stream, l)
186 fname = readexactly(self._stream, l)
187 return {b'filename': fname}
187 return {b'filename': fname}
188
188
189 def _deltaheader(self, headertuple, prevnode):
189 def _deltaheader(self, headertuple, prevnode):
190 node, p1, p2, cs = headertuple
190 node, p1, p2, cs = headertuple
191 if prevnode is None:
191 if prevnode is None:
192 deltabase = p1
192 deltabase = p1
193 else:
193 else:
194 deltabase = prevnode
194 deltabase = prevnode
195 flags = 0
195 flags = 0
196 return node, p1, p2, deltabase, cs, flags
196 return node, p1, p2, deltabase, cs, flags
197
197
198 def deltachunk(self, prevnode):
198 def deltachunk(self, prevnode):
199 l = self._chunklength()
199 l = self._chunklength()
200 if not l:
200 if not l:
201 return {}
201 return {}
202 headerdata = readexactly(self._stream, self.deltaheadersize)
202 headerdata = readexactly(self._stream, self.deltaheadersize)
203 header = self.deltaheader.unpack(headerdata)
203 header = self.deltaheader.unpack(headerdata)
204 delta = readexactly(self._stream, l - self.deltaheadersize)
204 delta = readexactly(self._stream, l - self.deltaheadersize)
205 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
205 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
206 # cg4 forward-compat
206 # cg4 forward-compat
207 sidedata = {}
207 sidedata = {}
208 return (node, p1, p2, cs, deltabase, delta, flags, sidedata)
208 return (node, p1, p2, cs, deltabase, delta, flags, sidedata)
209
209
210 def getchunks(self):
210 def getchunks(self):
211 """returns all the chunks contains in the bundle
211 """returns all the chunks contains in the bundle
212
212
213 Used when you need to forward the binary stream to a file or another
213 Used when you need to forward the binary stream to a file or another
214 network API. To do so, it parse the changegroup data, otherwise it will
214 network API. To do so, it parse the changegroup data, otherwise it will
215 block in case of sshrepo because it don't know the end of the stream.
215 block in case of sshrepo because it don't know the end of the stream.
216 """
216 """
217 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
217 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
218 # and a list of filelogs. For changegroup 3, we expect 4 parts:
218 # and a list of filelogs. For changegroup 3, we expect 4 parts:
219 # changelog, manifestlog, a list of tree manifestlogs, and a list of
219 # changelog, manifestlog, a list of tree manifestlogs, and a list of
220 # filelogs.
220 # filelogs.
221 #
221 #
222 # Changelog and manifestlog parts are terminated with empty chunks. The
222 # Changelog and manifestlog parts are terminated with empty chunks. The
223 # tree and file parts are a list of entry sections. Each entry section
223 # tree and file parts are a list of entry sections. Each entry section
224 # is a series of chunks terminating in an empty chunk. The list of these
224 # is a series of chunks terminating in an empty chunk. The list of these
225 # entry sections is terminated in yet another empty chunk, so we know
225 # entry sections is terminated in yet another empty chunk, so we know
226 # we've reached the end of the tree/file list when we reach an empty
226 # we've reached the end of the tree/file list when we reach an empty
227 # chunk that was proceeded by no non-empty chunks.
227 # chunk that was proceeded by no non-empty chunks.
228
228
229 parts = 0
229 parts = 0
230 while parts < 2 + self._grouplistcount:
230 while parts < 2 + self._grouplistcount:
231 noentries = True
231 noentries = True
232 while True:
232 while True:
233 chunk = getchunk(self)
233 chunk = getchunk(self)
234 if not chunk:
234 if not chunk:
235 # The first two empty chunks represent the end of the
235 # The first two empty chunks represent the end of the
236 # changelog and the manifestlog portions. The remaining
236 # changelog and the manifestlog portions. The remaining
237 # empty chunks represent either A) the end of individual
237 # empty chunks represent either A) the end of individual
238 # tree or file entries in the file list, or B) the end of
238 # tree or file entries in the file list, or B) the end of
239 # the entire list. It's the end of the entire list if there
239 # the entire list. It's the end of the entire list if there
240 # were no entries (i.e. noentries is True).
240 # were no entries (i.e. noentries is True).
241 if parts < 2:
241 if parts < 2:
242 parts += 1
242 parts += 1
243 elif noentries:
243 elif noentries:
244 parts += 1
244 parts += 1
245 break
245 break
246 noentries = False
246 noentries = False
247 yield chunkheader(len(chunk))
247 yield chunkheader(len(chunk))
248 pos = 0
248 pos = 0
249 while pos < len(chunk):
249 while pos < len(chunk):
250 next = pos + 2 ** 20
250 next = pos + 2 ** 20
251 yield chunk[pos:next]
251 yield chunk[pos:next]
252 pos = next
252 pos = next
253 yield closechunk()
253 yield closechunk()
254
254
255 def _unpackmanifests(self, repo, revmap, trp, prog):
255 def _unpackmanifests(self, repo, revmap, trp, prog):
256 self.callback = prog.increment
256 self.callback = prog.increment
257 # no need to check for empty manifest group here:
257 # no need to check for empty manifest group here:
258 # if the result of the merge of 1 and 2 is the same in 3 and 4,
258 # if the result of the merge of 1 and 2 is the same in 3 and 4,
259 # no new manifest will be created and the manifest group will
259 # no new manifest will be created and the manifest group will
260 # be empty during the pull
260 # be empty during the pull
261 self.manifestheader()
261 self.manifestheader()
262 deltas = self.deltaiter()
262 deltas = self.deltaiter()
263 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
263 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
264 prog.complete()
264 prog.complete()
265 self.callback = None
265 self.callback = None
266
266
267 def apply(
267 def apply(
268 self,
268 self,
269 repo,
269 repo,
270 tr,
270 tr,
271 srctype,
271 srctype,
272 url,
272 url,
273 targetphase=phases.draft,
273 targetphase=phases.draft,
274 expectedtotal=None,
274 expectedtotal=None,
275 sidedata_categories=None,
275 ):
276 ):
276 """Add the changegroup returned by source.read() to this repo.
277 """Add the changegroup returned by source.read() to this repo.
277 srctype is a string like 'push', 'pull', or 'unbundle'. url is
278 srctype is a string like 'push', 'pull', or 'unbundle'. url is
278 the URL of the repo where this changegroup is coming from.
279 the URL of the repo where this changegroup is coming from.
279
280
280 Return an integer summarizing the change to this repo:
281 Return an integer summarizing the change to this repo:
281 - nothing changed or no source: 0
282 - nothing changed or no source: 0
282 - more heads than before: 1+added heads (2..n)
283 - more heads than before: 1+added heads (2..n)
283 - fewer heads than before: -1-removed heads (-2..-n)
284 - fewer heads than before: -1-removed heads (-2..-n)
284 - number of heads stays the same: 1
285 - number of heads stays the same: 1
286
287 `sidedata_categories` is an optional set of the remote's sidedata wanted
288 categories.
285 """
289 """
286 repo = repo.unfiltered()
290 repo = repo.unfiltered()
287
291
292 # Only useful if we're adding sidedata categories. If both peers have
293 # the same categories, then we simply don't do anything.
294 if self.version == b'04' and srctype == b'pull':
295 sidedata_helpers = get_sidedata_helpers(
296 repo,
297 sidedata_categories or set(),
298 pull=True,
299 )
300 else:
301 sidedata_helpers = None
302
288 def csmap(x):
303 def csmap(x):
289 repo.ui.debug(b"add changeset %s\n" % short(x))
304 repo.ui.debug(b"add changeset %s\n" % short(x))
290 return len(cl)
305 return len(cl)
291
306
292 def revmap(x):
307 def revmap(x):
293 return cl.rev(x)
308 return cl.rev(x)
294
309
295 try:
310 try:
296 # The transaction may already carry source information. In this
311 # The transaction may already carry source information. In this
297 # case we use the top level data. We overwrite the argument
312 # case we use the top level data. We overwrite the argument
298 # because we need to use the top level value (if they exist)
313 # because we need to use the top level value (if they exist)
299 # in this function.
314 # in this function.
300 srctype = tr.hookargs.setdefault(b'source', srctype)
315 srctype = tr.hookargs.setdefault(b'source', srctype)
301 tr.hookargs.setdefault(b'url', url)
316 tr.hookargs.setdefault(b'url', url)
302 repo.hook(
317 repo.hook(
303 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
318 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
304 )
319 )
305
320
306 # write changelog data to temp files so concurrent readers
321 # write changelog data to temp files so concurrent readers
307 # will not see an inconsistent view
322 # will not see an inconsistent view
308 cl = repo.changelog
323 cl = repo.changelog
309 cl.delayupdate(tr)
324 cl.delayupdate(tr)
310 oldheads = set(cl.heads())
325 oldheads = set(cl.heads())
311
326
312 trp = weakref.proxy(tr)
327 trp = weakref.proxy(tr)
313 # pull off the changeset group
328 # pull off the changeset group
314 repo.ui.status(_(b"adding changesets\n"))
329 repo.ui.status(_(b"adding changesets\n"))
315 clstart = len(cl)
330 clstart = len(cl)
316 progress = repo.ui.makeprogress(
331 progress = repo.ui.makeprogress(
317 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
332 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
318 )
333 )
319 self.callback = progress.increment
334 self.callback = progress.increment
320
335
321 efilesset = set()
336 efilesset = set()
322 duprevs = []
337 duprevs = []
323
338
324 def ondupchangelog(cl, rev):
339 def ondupchangelog(cl, rev):
325 if rev < clstart:
340 if rev < clstart:
326 duprevs.append(rev)
341 duprevs.append(rev)
327
342
328 def onchangelog(cl, rev):
343 def onchangelog(cl, rev):
329 ctx = cl.changelogrevision(rev)
344 ctx = cl.changelogrevision(rev)
330 efilesset.update(ctx.files)
345 efilesset.update(ctx.files)
331 repo.register_changeset(rev, ctx)
346 repo.register_changeset(rev, ctx)
332
347
333 self.changelogheader()
348 self.changelogheader()
334 deltas = self.deltaiter()
349 deltas = self.deltaiter()
335 if not cl.addgroup(
350 if not cl.addgroup(
336 deltas,
351 deltas,
337 csmap,
352 csmap,
338 trp,
353 trp,
339 alwayscache=True,
354 alwayscache=True,
340 addrevisioncb=onchangelog,
355 addrevisioncb=onchangelog,
341 duplicaterevisioncb=ondupchangelog,
356 duplicaterevisioncb=ondupchangelog,
342 ):
357 ):
343 repo.ui.develwarn(
358 repo.ui.develwarn(
344 b'applied empty changelog from changegroup',
359 b'applied empty changelog from changegroup',
345 config=b'warn-empty-changegroup',
360 config=b'warn-empty-changegroup',
346 )
361 )
347 efiles = len(efilesset)
362 efiles = len(efilesset)
348 clend = len(cl)
363 clend = len(cl)
349 changesets = clend - clstart
364 changesets = clend - clstart
350 progress.complete()
365 progress.complete()
351 del deltas
366 del deltas
352 # TODO Python 2.7 removal
367 # TODO Python 2.7 removal
353 # del efilesset
368 # del efilesset
354 efilesset = None
369 efilesset = None
355 self.callback = None
370 self.callback = None
356
371
357 # pull off the manifest group
372 # pull off the manifest group
358 repo.ui.status(_(b"adding manifests\n"))
373 repo.ui.status(_(b"adding manifests\n"))
359 # We know that we'll never have more manifests than we had
374 # We know that we'll never have more manifests than we had
360 # changesets.
375 # changesets.
361 progress = repo.ui.makeprogress(
376 progress = repo.ui.makeprogress(
362 _(b'manifests'), unit=_(b'chunks'), total=changesets
377 _(b'manifests'), unit=_(b'chunks'), total=changesets
363 )
378 )
364 self._unpackmanifests(repo, revmap, trp, progress)
379 self._unpackmanifests(repo, revmap, trp, progress)
365
380
366 needfiles = {}
381 needfiles = {}
367 if repo.ui.configbool(b'server', b'validate'):
382 if repo.ui.configbool(b'server', b'validate'):
368 cl = repo.changelog
383 cl = repo.changelog
369 ml = repo.manifestlog
384 ml = repo.manifestlog
370 # validate incoming csets have their manifests
385 # validate incoming csets have their manifests
371 for cset in pycompat.xrange(clstart, clend):
386 for cset in pycompat.xrange(clstart, clend):
372 mfnode = cl.changelogrevision(cset).manifest
387 mfnode = cl.changelogrevision(cset).manifest
373 mfest = ml[mfnode].readdelta()
388 mfest = ml[mfnode].readdelta()
374 # store file nodes we must see
389 # store file nodes we must see
375 for f, n in pycompat.iteritems(mfest):
390 for f, n in pycompat.iteritems(mfest):
376 needfiles.setdefault(f, set()).add(n)
391 needfiles.setdefault(f, set()).add(n)
377
392
378 # process the files
393 # process the files
379 repo.ui.status(_(b"adding file changes\n"))
394 repo.ui.status(_(b"adding file changes\n"))
380 newrevs, newfiles = _addchangegroupfiles(
395 newrevs, newfiles = _addchangegroupfiles(
381 repo, self, revmap, trp, efiles, needfiles
396 repo, self, revmap, trp, efiles, needfiles
382 )
397 )
383
398
384 # making sure the value exists
399 # making sure the value exists
385 tr.changes.setdefault(b'changegroup-count-changesets', 0)
400 tr.changes.setdefault(b'changegroup-count-changesets', 0)
386 tr.changes.setdefault(b'changegroup-count-revisions', 0)
401 tr.changes.setdefault(b'changegroup-count-revisions', 0)
387 tr.changes.setdefault(b'changegroup-count-files', 0)
402 tr.changes.setdefault(b'changegroup-count-files', 0)
388 tr.changes.setdefault(b'changegroup-count-heads', 0)
403 tr.changes.setdefault(b'changegroup-count-heads', 0)
389
404
390 # some code use bundle operation for internal purpose. They usually
405 # some code use bundle operation for internal purpose. They usually
391 # set `ui.quiet` to do this outside of user sight. Size the report
406 # set `ui.quiet` to do this outside of user sight. Size the report
392 # of such operation now happens at the end of the transaction, that
407 # of such operation now happens at the end of the transaction, that
393 # ui.quiet has not direct effect on the output.
408 # ui.quiet has not direct effect on the output.
394 #
409 #
395 # To preserve this intend use an inelegant hack, we fail to report
410 # To preserve this intend use an inelegant hack, we fail to report
396 # the change if `quiet` is set. We should probably move to
411 # the change if `quiet` is set. We should probably move to
397 # something better, but this is a good first step to allow the "end
412 # something better, but this is a good first step to allow the "end
398 # of transaction report" to pass tests.
413 # of transaction report" to pass tests.
399 if not repo.ui.quiet:
414 if not repo.ui.quiet:
400 tr.changes[b'changegroup-count-changesets'] += changesets
415 tr.changes[b'changegroup-count-changesets'] += changesets
401 tr.changes[b'changegroup-count-revisions'] += newrevs
416 tr.changes[b'changegroup-count-revisions'] += newrevs
402 tr.changes[b'changegroup-count-files'] += newfiles
417 tr.changes[b'changegroup-count-files'] += newfiles
403
418
404 deltaheads = 0
419 deltaheads = 0
405 if oldheads:
420 if oldheads:
406 heads = cl.heads()
421 heads = cl.heads()
407 deltaheads += len(heads) - len(oldheads)
422 deltaheads += len(heads) - len(oldheads)
408 for h in heads:
423 for h in heads:
409 if h not in oldheads and repo[h].closesbranch():
424 if h not in oldheads and repo[h].closesbranch():
410 deltaheads -= 1
425 deltaheads -= 1
411
426
412 # see previous comment about checking ui.quiet
427 # see previous comment about checking ui.quiet
413 if not repo.ui.quiet:
428 if not repo.ui.quiet:
414 tr.changes[b'changegroup-count-heads'] += deltaheads
429 tr.changes[b'changegroup-count-heads'] += deltaheads
415 repo.invalidatevolatilesets()
430 repo.invalidatevolatilesets()
416
431
417 if changesets > 0:
432 if changesets > 0:
418 if b'node' not in tr.hookargs:
433 if b'node' not in tr.hookargs:
419 tr.hookargs[b'node'] = hex(cl.node(clstart))
434 tr.hookargs[b'node'] = hex(cl.node(clstart))
420 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
435 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
421 hookargs = dict(tr.hookargs)
436 hookargs = dict(tr.hookargs)
422 else:
437 else:
423 hookargs = dict(tr.hookargs)
438 hookargs = dict(tr.hookargs)
424 hookargs[b'node'] = hex(cl.node(clstart))
439 hookargs[b'node'] = hex(cl.node(clstart))
425 hookargs[b'node_last'] = hex(cl.node(clend - 1))
440 hookargs[b'node_last'] = hex(cl.node(clend - 1))
426 repo.hook(
441 repo.hook(
427 b'pretxnchangegroup',
442 b'pretxnchangegroup',
428 throw=True,
443 throw=True,
429 **pycompat.strkwargs(hookargs)
444 **pycompat.strkwargs(hookargs)
430 )
445 )
431
446
432 added = pycompat.xrange(clstart, clend)
447 added = pycompat.xrange(clstart, clend)
433 phaseall = None
448 phaseall = None
434 if srctype in (b'push', b'serve'):
449 if srctype in (b'push', b'serve'):
435 # Old servers can not push the boundary themselves.
450 # Old servers can not push the boundary themselves.
436 # New servers won't push the boundary if changeset already
451 # New servers won't push the boundary if changeset already
437 # exists locally as secret
452 # exists locally as secret
438 #
453 #
439 # We should not use added here but the list of all change in
454 # We should not use added here but the list of all change in
440 # the bundle
455 # the bundle
441 if repo.publishing():
456 if repo.publishing():
442 targetphase = phaseall = phases.public
457 targetphase = phaseall = phases.public
443 else:
458 else:
444 # closer target phase computation
459 # closer target phase computation
445
460
446 # Those changesets have been pushed from the
461 # Those changesets have been pushed from the
447 # outside, their phases are going to be pushed
462 # outside, their phases are going to be pushed
448 # alongside. Therefor `targetphase` is
463 # alongside. Therefor `targetphase` is
449 # ignored.
464 # ignored.
450 targetphase = phaseall = phases.draft
465 targetphase = phaseall = phases.draft
451 if added:
466 if added:
452 phases.registernew(repo, tr, targetphase, added)
467 phases.registernew(repo, tr, targetphase, added)
453 if phaseall is not None:
468 if phaseall is not None:
454 if duprevs:
469 if duprevs:
455 duprevs.extend(added)
470 duprevs.extend(added)
456 else:
471 else:
457 duprevs = added
472 duprevs = added
458 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
473 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
459 duprevs = []
474 duprevs = []
460
475
461 if changesets > 0:
476 if changesets > 0:
462
477
463 def runhooks(unused_success):
478 def runhooks(unused_success):
464 # These hooks run when the lock releases, not when the
479 # These hooks run when the lock releases, not when the
465 # transaction closes. So it's possible for the changelog
480 # transaction closes. So it's possible for the changelog
466 # to have changed since we last saw it.
481 # to have changed since we last saw it.
467 if clstart >= len(repo):
482 if clstart >= len(repo):
468 return
483 return
469
484
470 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
485 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
471
486
472 for rev in added:
487 for rev in added:
473 args = hookargs.copy()
488 args = hookargs.copy()
474 args[b'node'] = hex(cl.node(rev))
489 args[b'node'] = hex(cl.node(rev))
475 del args[b'node_last']
490 del args[b'node_last']
476 repo.hook(b"incoming", **pycompat.strkwargs(args))
491 repo.hook(b"incoming", **pycompat.strkwargs(args))
477
492
478 newheads = [h for h in repo.heads() if h not in oldheads]
493 newheads = [h for h in repo.heads() if h not in oldheads]
479 repo.ui.log(
494 repo.ui.log(
480 b"incoming",
495 b"incoming",
481 b"%d incoming changes - new heads: %s\n",
496 b"%d incoming changes - new heads: %s\n",
482 len(added),
497 len(added),
483 b', '.join([hex(c[:6]) for c in newheads]),
498 b', '.join([hex(c[:6]) for c in newheads]),
484 )
499 )
485
500
486 tr.addpostclose(
501 tr.addpostclose(
487 b'changegroup-runhooks-%020i' % clstart,
502 b'changegroup-runhooks-%020i' % clstart,
488 lambda tr: repo._afterlock(runhooks),
503 lambda tr: repo._afterlock(runhooks),
489 )
504 )
490 finally:
505 finally:
491 repo.ui.flush()
506 repo.ui.flush()
492 # never return 0 here:
507 # never return 0 here:
493 if deltaheads < 0:
508 if deltaheads < 0:
494 ret = deltaheads - 1
509 ret = deltaheads - 1
495 else:
510 else:
496 ret = deltaheads + 1
511 ret = deltaheads + 1
497 return ret
512 return ret
498
513
499 def deltaiter(self):
514 def deltaiter(self):
500 """
515 """
501 returns an iterator of the deltas in this changegroup
516 returns an iterator of the deltas in this changegroup
502
517
503 Useful for passing to the underlying storage system to be stored.
518 Useful for passing to the underlying storage system to be stored.
504 """
519 """
505 chain = None
520 chain = None
506 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
521 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
507 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
522 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
508 yield chunkdata
523 yield chunkdata
509 chain = chunkdata[0]
524 chain = chunkdata[0]
510
525
511
526
512 class cg2unpacker(cg1unpacker):
527 class cg2unpacker(cg1unpacker):
513 """Unpacker for cg2 streams.
528 """Unpacker for cg2 streams.
514
529
515 cg2 streams add support for generaldelta, so the delta header
530 cg2 streams add support for generaldelta, so the delta header
516 format is slightly different. All other features about the data
531 format is slightly different. All other features about the data
517 remain the same.
532 remain the same.
518 """
533 """
519
534
520 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
535 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
521 deltaheadersize = deltaheader.size
536 deltaheadersize = deltaheader.size
522 version = b'02'
537 version = b'02'
523
538
524 def _deltaheader(self, headertuple, prevnode):
539 def _deltaheader(self, headertuple, prevnode):
525 node, p1, p2, deltabase, cs = headertuple
540 node, p1, p2, deltabase, cs = headertuple
526 flags = 0
541 flags = 0
527 return node, p1, p2, deltabase, cs, flags
542 return node, p1, p2, deltabase, cs, flags
528
543
529
544
530 class cg3unpacker(cg2unpacker):
545 class cg3unpacker(cg2unpacker):
531 """Unpacker for cg3 streams.
546 """Unpacker for cg3 streams.
532
547
533 cg3 streams add support for exchanging treemanifests and revlog
548 cg3 streams add support for exchanging treemanifests and revlog
534 flags. It adds the revlog flags to the delta header and an empty chunk
549 flags. It adds the revlog flags to the delta header and an empty chunk
535 separating manifests and files.
550 separating manifests and files.
536 """
551 """
537
552
538 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
553 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
539 deltaheadersize = deltaheader.size
554 deltaheadersize = deltaheader.size
540 version = b'03'
555 version = b'03'
541 _grouplistcount = 2 # One list of manifests and one list of files
556 _grouplistcount = 2 # One list of manifests and one list of files
542
557
543 def _deltaheader(self, headertuple, prevnode):
558 def _deltaheader(self, headertuple, prevnode):
544 node, p1, p2, deltabase, cs, flags = headertuple
559 node, p1, p2, deltabase, cs, flags = headertuple
545 return node, p1, p2, deltabase, cs, flags
560 return node, p1, p2, deltabase, cs, flags
546
561
547 def _unpackmanifests(self, repo, revmap, trp, prog):
562 def _unpackmanifests(self, repo, revmap, trp, prog):
548 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
563 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
549 for chunkdata in iter(self.filelogheader, {}):
564 for chunkdata in iter(self.filelogheader, {}):
550 # If we get here, there are directory manifests in the changegroup
565 # If we get here, there are directory manifests in the changegroup
551 d = chunkdata[b"filename"]
566 d = chunkdata[b"filename"]
552 repo.ui.debug(b"adding %s revisions\n" % d)
567 repo.ui.debug(b"adding %s revisions\n" % d)
553 deltas = self.deltaiter()
568 deltas = self.deltaiter()
554 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
569 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
555 raise error.Abort(_(b"received dir revlog group is empty"))
570 raise error.Abort(_(b"received dir revlog group is empty"))
556
571
557
572
558 class cg4unpacker(cg3unpacker):
573 class cg4unpacker(cg3unpacker):
559 """Unpacker for cg4 streams.
574 """Unpacker for cg4 streams.
560
575
561 cg4 streams add support for exchanging sidedata.
576 cg4 streams add support for exchanging sidedata.
562 """
577 """
563
578
564 version = b'04'
579 version = b'04'
565
580
566 def deltachunk(self, prevnode):
581 def deltachunk(self, prevnode):
567 res = super(cg4unpacker, self).deltachunk(prevnode)
582 res = super(cg4unpacker, self).deltachunk(prevnode)
568 if not res:
583 if not res:
569 return res
584 return res
570
585
571 (node, p1, p2, cs, deltabase, delta, flags, _sidedata) = res
586 (node, p1, p2, cs, deltabase, delta, flags, _sidedata) = res
572
587
573 sidedata_raw = getchunk(self._stream)
588 sidedata_raw = getchunk(self._stream)
574 sidedata = {}
589 sidedata = {}
575 if len(sidedata_raw) > 0:
590 if len(sidedata_raw) > 0:
576 sidedata = sidedatamod.deserialize_sidedata(sidedata_raw)
591 sidedata = sidedatamod.deserialize_sidedata(sidedata_raw)
577
592
578 return node, p1, p2, cs, deltabase, delta, flags, sidedata
593 return node, p1, p2, cs, deltabase, delta, flags, sidedata
579
594
580
595
581 class headerlessfixup(object):
596 class headerlessfixup(object):
582 def __init__(self, fh, h):
597 def __init__(self, fh, h):
583 self._h = h
598 self._h = h
584 self._fh = fh
599 self._fh = fh
585
600
586 def read(self, n):
601 def read(self, n):
587 if self._h:
602 if self._h:
588 d, self._h = self._h[:n], self._h[n:]
603 d, self._h = self._h[:n], self._h[n:]
589 if len(d) < n:
604 if len(d) < n:
590 d += readexactly(self._fh, n - len(d))
605 d += readexactly(self._fh, n - len(d))
591 return d
606 return d
592 return readexactly(self._fh, n)
607 return readexactly(self._fh, n)
593
608
594
609
595 def _revisiondeltatochunks(delta, headerfn):
610 def _revisiondeltatochunks(delta, headerfn):
596 """Serialize a revisiondelta to changegroup chunks."""
611 """Serialize a revisiondelta to changegroup chunks."""
597
612
598 # The captured revision delta may be encoded as a delta against
613 # The captured revision delta may be encoded as a delta against
599 # a base revision or as a full revision. The changegroup format
614 # a base revision or as a full revision. The changegroup format
600 # requires that everything on the wire be deltas. So for full
615 # requires that everything on the wire be deltas. So for full
601 # revisions, we need to invent a header that says to rewrite
616 # revisions, we need to invent a header that says to rewrite
602 # data.
617 # data.
603
618
604 if delta.delta is not None:
619 if delta.delta is not None:
605 prefix, data = b'', delta.delta
620 prefix, data = b'', delta.delta
606 elif delta.basenode == nullid:
621 elif delta.basenode == nullid:
607 data = delta.revision
622 data = delta.revision
608 prefix = mdiff.trivialdiffheader(len(data))
623 prefix = mdiff.trivialdiffheader(len(data))
609 else:
624 else:
610 data = delta.revision
625 data = delta.revision
611 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
626 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
612
627
613 meta = headerfn(delta)
628 meta = headerfn(delta)
614
629
615 yield chunkheader(len(meta) + len(prefix) + len(data))
630 yield chunkheader(len(meta) + len(prefix) + len(data))
616 yield meta
631 yield meta
617 if prefix:
632 if prefix:
618 yield prefix
633 yield prefix
619 yield data
634 yield data
620
635
621 sidedata = delta.sidedata
636 sidedata = delta.sidedata
622 if sidedata is not None:
637 if sidedata is not None:
623 # Need a separate chunk for sidedata to be able to differentiate
638 # Need a separate chunk for sidedata to be able to differentiate
624 # "raw delta" length and sidedata length
639 # "raw delta" length and sidedata length
625 yield chunkheader(len(sidedata))
640 yield chunkheader(len(sidedata))
626 yield sidedata
641 yield sidedata
627
642
628
643
629 def _sortnodesellipsis(store, nodes, cl, lookup):
644 def _sortnodesellipsis(store, nodes, cl, lookup):
630 """Sort nodes for changegroup generation."""
645 """Sort nodes for changegroup generation."""
631 # Ellipses serving mode.
646 # Ellipses serving mode.
632 #
647 #
633 # In a perfect world, we'd generate better ellipsis-ified graphs
648 # In a perfect world, we'd generate better ellipsis-ified graphs
634 # for non-changelog revlogs. In practice, we haven't started doing
649 # for non-changelog revlogs. In practice, we haven't started doing
635 # that yet, so the resulting DAGs for the manifestlog and filelogs
650 # that yet, so the resulting DAGs for the manifestlog and filelogs
636 # are actually full of bogus parentage on all the ellipsis
651 # are actually full of bogus parentage on all the ellipsis
637 # nodes. This has the side effect that, while the contents are
652 # nodes. This has the side effect that, while the contents are
638 # correct, the individual DAGs might be completely out of whack in
653 # correct, the individual DAGs might be completely out of whack in
639 # a case like 882681bc3166 and its ancestors (back about 10
654 # a case like 882681bc3166 and its ancestors (back about 10
640 # revisions or so) in the main hg repo.
655 # revisions or so) in the main hg repo.
641 #
656 #
642 # The one invariant we *know* holds is that the new (potentially
657 # The one invariant we *know* holds is that the new (potentially
643 # bogus) DAG shape will be valid if we order the nodes in the
658 # bogus) DAG shape will be valid if we order the nodes in the
644 # order that they're introduced in dramatis personae by the
659 # order that they're introduced in dramatis personae by the
645 # changelog, so what we do is we sort the non-changelog histories
660 # changelog, so what we do is we sort the non-changelog histories
646 # by the order in which they are used by the changelog.
661 # by the order in which they are used by the changelog.
647 key = lambda n: cl.rev(lookup(n))
662 key = lambda n: cl.rev(lookup(n))
648 return sorted(nodes, key=key)
663 return sorted(nodes, key=key)
649
664
650
665
651 def _resolvenarrowrevisioninfo(
666 def _resolvenarrowrevisioninfo(
652 cl,
667 cl,
653 store,
668 store,
654 ischangelog,
669 ischangelog,
655 rev,
670 rev,
656 linkrev,
671 linkrev,
657 linknode,
672 linknode,
658 clrevtolocalrev,
673 clrevtolocalrev,
659 fullclnodes,
674 fullclnodes,
660 precomputedellipsis,
675 precomputedellipsis,
661 ):
676 ):
662 linkparents = precomputedellipsis[linkrev]
677 linkparents = precomputedellipsis[linkrev]
663
678
664 def local(clrev):
679 def local(clrev):
665 """Turn a changelog revnum into a local revnum.
680 """Turn a changelog revnum into a local revnum.
666
681
667 The ellipsis dag is stored as revnums on the changelog,
682 The ellipsis dag is stored as revnums on the changelog,
668 but when we're producing ellipsis entries for
683 but when we're producing ellipsis entries for
669 non-changelog revlogs, we need to turn those numbers into
684 non-changelog revlogs, we need to turn those numbers into
670 something local. This does that for us, and during the
685 something local. This does that for us, and during the
671 changelog sending phase will also expand the stored
686 changelog sending phase will also expand the stored
672 mappings as needed.
687 mappings as needed.
673 """
688 """
674 if clrev == nullrev:
689 if clrev == nullrev:
675 return nullrev
690 return nullrev
676
691
677 if ischangelog:
692 if ischangelog:
678 return clrev
693 return clrev
679
694
680 # Walk the ellipsis-ized changelog breadth-first looking for a
695 # Walk the ellipsis-ized changelog breadth-first looking for a
681 # change that has been linked from the current revlog.
696 # change that has been linked from the current revlog.
682 #
697 #
683 # For a flat manifest revlog only a single step should be necessary
698 # For a flat manifest revlog only a single step should be necessary
684 # as all relevant changelog entries are relevant to the flat
699 # as all relevant changelog entries are relevant to the flat
685 # manifest.
700 # manifest.
686 #
701 #
687 # For a filelog or tree manifest dirlog however not every changelog
702 # For a filelog or tree manifest dirlog however not every changelog
688 # entry will have been relevant, so we need to skip some changelog
703 # entry will have been relevant, so we need to skip some changelog
689 # nodes even after ellipsis-izing.
704 # nodes even after ellipsis-izing.
690 walk = [clrev]
705 walk = [clrev]
691 while walk:
706 while walk:
692 p = walk[0]
707 p = walk[0]
693 walk = walk[1:]
708 walk = walk[1:]
694 if p in clrevtolocalrev:
709 if p in clrevtolocalrev:
695 return clrevtolocalrev[p]
710 return clrevtolocalrev[p]
696 elif p in fullclnodes:
711 elif p in fullclnodes:
697 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
712 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
698 elif p in precomputedellipsis:
713 elif p in precomputedellipsis:
699 walk.extend(
714 walk.extend(
700 [pp for pp in precomputedellipsis[p] if pp != nullrev]
715 [pp for pp in precomputedellipsis[p] if pp != nullrev]
701 )
716 )
702 else:
717 else:
703 # In this case, we've got an ellipsis with parents
718 # In this case, we've got an ellipsis with parents
704 # outside the current bundle (likely an
719 # outside the current bundle (likely an
705 # incremental pull). We "know" that we can use the
720 # incremental pull). We "know" that we can use the
706 # value of this same revlog at whatever revision
721 # value of this same revlog at whatever revision
707 # is pointed to by linknode. "Know" is in scare
722 # is pointed to by linknode. "Know" is in scare
708 # quotes because I haven't done enough examination
723 # quotes because I haven't done enough examination
709 # of edge cases to convince myself this is really
724 # of edge cases to convince myself this is really
710 # a fact - it works for all the (admittedly
725 # a fact - it works for all the (admittedly
711 # thorough) cases in our testsuite, but I would be
726 # thorough) cases in our testsuite, but I would be
712 # somewhat unsurprised to find a case in the wild
727 # somewhat unsurprised to find a case in the wild
713 # where this breaks down a bit. That said, I don't
728 # where this breaks down a bit. That said, I don't
714 # know if it would hurt anything.
729 # know if it would hurt anything.
715 for i in pycompat.xrange(rev, 0, -1):
730 for i in pycompat.xrange(rev, 0, -1):
716 if store.linkrev(i) == clrev:
731 if store.linkrev(i) == clrev:
717 return i
732 return i
718 # We failed to resolve a parent for this node, so
733 # We failed to resolve a parent for this node, so
719 # we crash the changegroup construction.
734 # we crash the changegroup construction.
720 raise error.Abort(
735 raise error.Abort(
721 b"unable to resolve parent while packing '%s' %r"
736 b"unable to resolve parent while packing '%s' %r"
722 b' for changeset %r' % (store.indexfile, rev, clrev)
737 b' for changeset %r' % (store.indexfile, rev, clrev)
723 )
738 )
724
739
725 return nullrev
740 return nullrev
726
741
727 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
742 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
728 p1, p2 = nullrev, nullrev
743 p1, p2 = nullrev, nullrev
729 elif len(linkparents) == 1:
744 elif len(linkparents) == 1:
730 (p1,) = sorted(local(p) for p in linkparents)
745 (p1,) = sorted(local(p) for p in linkparents)
731 p2 = nullrev
746 p2 = nullrev
732 else:
747 else:
733 p1, p2 = sorted(local(p) for p in linkparents)
748 p1, p2 = sorted(local(p) for p in linkparents)
734
749
735 p1node, p2node = store.node(p1), store.node(p2)
750 p1node, p2node = store.node(p1), store.node(p2)
736
751
737 return p1node, p2node, linknode
752 return p1node, p2node, linknode
738
753
739
754
740 def deltagroup(
755 def deltagroup(
741 repo,
756 repo,
742 store,
757 store,
743 nodes,
758 nodes,
744 ischangelog,
759 ischangelog,
745 lookup,
760 lookup,
746 forcedeltaparentprev,
761 forcedeltaparentprev,
747 topic=None,
762 topic=None,
748 ellipses=False,
763 ellipses=False,
749 clrevtolocalrev=None,
764 clrevtolocalrev=None,
750 fullclnodes=None,
765 fullclnodes=None,
751 precomputedellipsis=None,
766 precomputedellipsis=None,
767 sidedata_helpers=None,
752 ):
768 ):
753 """Calculate deltas for a set of revisions.
769 """Calculate deltas for a set of revisions.
754
770
755 Is a generator of ``revisiondelta`` instances.
771 Is a generator of ``revisiondelta`` instances.
756
772
757 If topic is not None, progress detail will be generated using this
773 If topic is not None, progress detail will be generated using this
758 topic name (e.g. changesets, manifests, etc).
774 topic name (e.g. changesets, manifests, etc).
775
776 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
759 """
777 """
760 if not nodes:
778 if not nodes:
761 return
779 return
762
780
763 cl = repo.changelog
781 cl = repo.changelog
764
782
765 if ischangelog:
783 if ischangelog:
766 # `hg log` shows changesets in storage order. To preserve order
784 # `hg log` shows changesets in storage order. To preserve order
767 # across clones, send out changesets in storage order.
785 # across clones, send out changesets in storage order.
768 nodesorder = b'storage'
786 nodesorder = b'storage'
769 elif ellipses:
787 elif ellipses:
770 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
788 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
771 nodesorder = b'nodes'
789 nodesorder = b'nodes'
772 else:
790 else:
773 nodesorder = None
791 nodesorder = None
774
792
775 # Perform ellipses filtering and revision massaging. We do this before
793 # Perform ellipses filtering and revision massaging. We do this before
776 # emitrevisions() because a) filtering out revisions creates less work
794 # emitrevisions() because a) filtering out revisions creates less work
777 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
795 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
778 # assumptions about delta choices and we would possibly send a delta
796 # assumptions about delta choices and we would possibly send a delta
779 # referencing a missing base revision.
797 # referencing a missing base revision.
780 #
798 #
781 # Also, calling lookup() has side-effects with regards to populating
799 # Also, calling lookup() has side-effects with regards to populating
782 # data structures. If we don't call lookup() for each node or if we call
800 # data structures. If we don't call lookup() for each node or if we call
783 # lookup() after the first pass through each node, things can break -
801 # lookup() after the first pass through each node, things can break -
784 # possibly intermittently depending on the python hash seed! For that
802 # possibly intermittently depending on the python hash seed! For that
785 # reason, we store a mapping of all linknodes during the initial node
803 # reason, we store a mapping of all linknodes during the initial node
786 # pass rather than use lookup() on the output side.
804 # pass rather than use lookup() on the output side.
787 if ellipses:
805 if ellipses:
788 filtered = []
806 filtered = []
789 adjustedparents = {}
807 adjustedparents = {}
790 linknodes = {}
808 linknodes = {}
791
809
792 for node in nodes:
810 for node in nodes:
793 rev = store.rev(node)
811 rev = store.rev(node)
794 linknode = lookup(node)
812 linknode = lookup(node)
795 linkrev = cl.rev(linknode)
813 linkrev = cl.rev(linknode)
796 clrevtolocalrev[linkrev] = rev
814 clrevtolocalrev[linkrev] = rev
797
815
798 # If linknode is in fullclnodes, it means the corresponding
816 # If linknode is in fullclnodes, it means the corresponding
799 # changeset was a full changeset and is being sent unaltered.
817 # changeset was a full changeset and is being sent unaltered.
800 if linknode in fullclnodes:
818 if linknode in fullclnodes:
801 linknodes[node] = linknode
819 linknodes[node] = linknode
802
820
803 # If the corresponding changeset wasn't in the set computed
821 # If the corresponding changeset wasn't in the set computed
804 # as relevant to us, it should be dropped outright.
822 # as relevant to us, it should be dropped outright.
805 elif linkrev not in precomputedellipsis:
823 elif linkrev not in precomputedellipsis:
806 continue
824 continue
807
825
808 else:
826 else:
809 # We could probably do this later and avoid the dict
827 # We could probably do this later and avoid the dict
810 # holding state. But it likely doesn't matter.
828 # holding state. But it likely doesn't matter.
811 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
829 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
812 cl,
830 cl,
813 store,
831 store,
814 ischangelog,
832 ischangelog,
815 rev,
833 rev,
816 linkrev,
834 linkrev,
817 linknode,
835 linknode,
818 clrevtolocalrev,
836 clrevtolocalrev,
819 fullclnodes,
837 fullclnodes,
820 precomputedellipsis,
838 precomputedellipsis,
821 )
839 )
822
840
823 adjustedparents[node] = (p1node, p2node)
841 adjustedparents[node] = (p1node, p2node)
824 linknodes[node] = linknode
842 linknodes[node] = linknode
825
843
826 filtered.append(node)
844 filtered.append(node)
827
845
828 nodes = filtered
846 nodes = filtered
829
847
830 # We expect the first pass to be fast, so we only engage the progress
848 # We expect the first pass to be fast, so we only engage the progress
831 # meter for constructing the revision deltas.
849 # meter for constructing the revision deltas.
832 progress = None
850 progress = None
833 if topic is not None:
851 if topic is not None:
834 progress = repo.ui.makeprogress(
852 progress = repo.ui.makeprogress(
835 topic, unit=_(b'chunks'), total=len(nodes)
853 topic, unit=_(b'chunks'), total=len(nodes)
836 )
854 )
837
855
838 configtarget = repo.ui.config(b'devel', b'bundle.delta')
856 configtarget = repo.ui.config(b'devel', b'bundle.delta')
839 if configtarget not in (b'', b'p1', b'full'):
857 if configtarget not in (b'', b'p1', b'full'):
840 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
858 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
841 repo.ui.warn(msg % configtarget)
859 repo.ui.warn(msg % configtarget)
842
860
843 deltamode = repository.CG_DELTAMODE_STD
861 deltamode = repository.CG_DELTAMODE_STD
844 if forcedeltaparentprev:
862 if forcedeltaparentprev:
845 deltamode = repository.CG_DELTAMODE_PREV
863 deltamode = repository.CG_DELTAMODE_PREV
846 elif configtarget == b'p1':
864 elif configtarget == b'p1':
847 deltamode = repository.CG_DELTAMODE_P1
865 deltamode = repository.CG_DELTAMODE_P1
848 elif configtarget == b'full':
866 elif configtarget == b'full':
849 deltamode = repository.CG_DELTAMODE_FULL
867 deltamode = repository.CG_DELTAMODE_FULL
850
868
851 revisions = store.emitrevisions(
869 revisions = store.emitrevisions(
852 nodes,
870 nodes,
853 nodesorder=nodesorder,
871 nodesorder=nodesorder,
854 revisiondata=True,
872 revisiondata=True,
855 assumehaveparentrevisions=not ellipses,
873 assumehaveparentrevisions=not ellipses,
856 deltamode=deltamode,
874 deltamode=deltamode,
875 sidedata_helpers=sidedata_helpers,
857 )
876 )
858
877
859 for i, revision in enumerate(revisions):
878 for i, revision in enumerate(revisions):
860 if progress:
879 if progress:
861 progress.update(i + 1)
880 progress.update(i + 1)
862
881
863 if ellipses:
882 if ellipses:
864 linknode = linknodes[revision.node]
883 linknode = linknodes[revision.node]
865
884
866 if revision.node in adjustedparents:
885 if revision.node in adjustedparents:
867 p1node, p2node = adjustedparents[revision.node]
886 p1node, p2node = adjustedparents[revision.node]
868 revision.p1node = p1node
887 revision.p1node = p1node
869 revision.p2node = p2node
888 revision.p2node = p2node
870 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
889 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
871
890
872 else:
891 else:
873 linknode = lookup(revision.node)
892 linknode = lookup(revision.node)
874
893
875 revision.linknode = linknode
894 revision.linknode = linknode
876 yield revision
895 yield revision
877
896
878 if progress:
897 if progress:
879 progress.complete()
898 progress.complete()
880
899
881
900
882 class cgpacker(object):
901 class cgpacker(object):
883 def __init__(
902 def __init__(
884 self,
903 self,
885 repo,
904 repo,
886 oldmatcher,
905 oldmatcher,
887 matcher,
906 matcher,
888 version,
907 version,
889 builddeltaheader,
908 builddeltaheader,
890 manifestsend,
909 manifestsend,
891 forcedeltaparentprev=False,
910 forcedeltaparentprev=False,
892 bundlecaps=None,
911 bundlecaps=None,
893 ellipses=False,
912 ellipses=False,
894 shallow=False,
913 shallow=False,
895 ellipsisroots=None,
914 ellipsisroots=None,
896 fullnodes=None,
915 fullnodes=None,
897 remote_sidedata=None,
916 remote_sidedata=None,
898 ):
917 ):
899 """Given a source repo, construct a bundler.
918 """Given a source repo, construct a bundler.
900
919
901 oldmatcher is a matcher that matches on files the client already has.
920 oldmatcher is a matcher that matches on files the client already has.
902 These will not be included in the changegroup.
921 These will not be included in the changegroup.
903
922
904 matcher is a matcher that matches on files to include in the
923 matcher is a matcher that matches on files to include in the
905 changegroup. Used to facilitate sparse changegroups.
924 changegroup. Used to facilitate sparse changegroups.
906
925
907 forcedeltaparentprev indicates whether delta parents must be against
926 forcedeltaparentprev indicates whether delta parents must be against
908 the previous revision in a delta group. This should only be used for
927 the previous revision in a delta group. This should only be used for
909 compatibility with changegroup version 1.
928 compatibility with changegroup version 1.
910
929
911 builddeltaheader is a callable that constructs the header for a group
930 builddeltaheader is a callable that constructs the header for a group
912 delta.
931 delta.
913
932
914 manifestsend is a chunk to send after manifests have been fully emitted.
933 manifestsend is a chunk to send after manifests have been fully emitted.
915
934
916 ellipses indicates whether ellipsis serving mode is enabled.
935 ellipses indicates whether ellipsis serving mode is enabled.
917
936
918 bundlecaps is optional and can be used to specify the set of
937 bundlecaps is optional and can be used to specify the set of
919 capabilities which can be used to build the bundle. While bundlecaps is
938 capabilities which can be used to build the bundle. While bundlecaps is
920 unused in core Mercurial, extensions rely on this feature to communicate
939 unused in core Mercurial, extensions rely on this feature to communicate
921 capabilities to customize the changegroup packer.
940 capabilities to customize the changegroup packer.
922
941
923 shallow indicates whether shallow data might be sent. The packer may
942 shallow indicates whether shallow data might be sent. The packer may
924 need to pack file contents not introduced by the changes being packed.
943 need to pack file contents not introduced by the changes being packed.
925
944
926 fullnodes is the set of changelog nodes which should not be ellipsis
945 fullnodes is the set of changelog nodes which should not be ellipsis
927 nodes. We store this rather than the set of nodes that should be
946 nodes. We store this rather than the set of nodes that should be
928 ellipsis because for very large histories we expect this to be
947 ellipsis because for very large histories we expect this to be
929 significantly smaller.
948 significantly smaller.
930
949
931 remote_sidedata is the set of sidedata categories wanted by the remote.
950 remote_sidedata is the set of sidedata categories wanted by the remote.
932 """
951 """
933 assert oldmatcher
952 assert oldmatcher
934 assert matcher
953 assert matcher
935 self._oldmatcher = oldmatcher
954 self._oldmatcher = oldmatcher
936 self._matcher = matcher
955 self._matcher = matcher
937
956
938 self.version = version
957 self.version = version
939 self._forcedeltaparentprev = forcedeltaparentprev
958 self._forcedeltaparentprev = forcedeltaparentprev
940 self._builddeltaheader = builddeltaheader
959 self._builddeltaheader = builddeltaheader
941 self._manifestsend = manifestsend
960 self._manifestsend = manifestsend
942 self._ellipses = ellipses
961 self._ellipses = ellipses
943
962
944 # Set of capabilities we can use to build the bundle.
963 # Set of capabilities we can use to build the bundle.
945 if bundlecaps is None:
964 if bundlecaps is None:
946 bundlecaps = set()
965 bundlecaps = set()
947 self._bundlecaps = bundlecaps
966 self._bundlecaps = bundlecaps
948 if remote_sidedata is None:
967 if remote_sidedata is None:
949 remote_sidedata = set()
968 remote_sidedata = set()
950 self._remote_sidedata = remote_sidedata
969 self._remote_sidedata = remote_sidedata
951 self._isshallow = shallow
970 self._isshallow = shallow
952 self._fullclnodes = fullnodes
971 self._fullclnodes = fullnodes
953
972
954 # Maps ellipsis revs to their roots at the changelog level.
973 # Maps ellipsis revs to their roots at the changelog level.
955 self._precomputedellipsis = ellipsisroots
974 self._precomputedellipsis = ellipsisroots
956
975
957 self._repo = repo
976 self._repo = repo
958
977
959 if self._repo.ui.verbose and not self._repo.ui.debugflag:
978 if self._repo.ui.verbose and not self._repo.ui.debugflag:
960 self._verbosenote = self._repo.ui.note
979 self._verbosenote = self._repo.ui.note
961 else:
980 else:
962 self._verbosenote = lambda s: None
981 self._verbosenote = lambda s: None
963
982
964 def generate(
983 def generate(
965 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
984 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
966 ):
985 ):
967 """Yield a sequence of changegroup byte chunks.
986 """Yield a sequence of changegroup byte chunks.
968 If changelog is False, changelog data won't be added to changegroup
987 If changelog is False, changelog data won't be added to changegroup
969 """
988 """
970
989
971 repo = self._repo
990 repo = self._repo
972 cl = repo.changelog
991 cl = repo.changelog
973
992
974 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
993 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
975 size = 0
994 size = 0
976
995
996 sidedata_helpers = None
997 if self.version == b'04':
998 remote_sidedata = self._remote_sidedata
999 if source == b'strip':
1000 # We're our own remote when stripping, get the no-op helpers
1001 # TODO a better approach would be for the strip bundle to
1002 # correctly advertise its sidedata categories directly.
1003 remote_sidedata = repo._wanted_sidedata
1004 sidedata_helpers = get_sidedata_helpers(repo, remote_sidedata)
1005
977 clstate, deltas = self._generatechangelog(
1006 clstate, deltas = self._generatechangelog(
978 cl, clnodes, generate=changelog
1007 cl,
1008 clnodes,
1009 generate=changelog,
1010 sidedata_helpers=sidedata_helpers,
979 )
1011 )
980 for delta in deltas:
1012 for delta in deltas:
981 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
1013 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
982 size += len(chunk)
1014 size += len(chunk)
983 yield chunk
1015 yield chunk
984
1016
985 close = closechunk()
1017 close = closechunk()
986 size += len(close)
1018 size += len(close)
987 yield closechunk()
1019 yield closechunk()
988
1020
989 self._verbosenote(_(b'%8.i (changelog)\n') % size)
1021 self._verbosenote(_(b'%8.i (changelog)\n') % size)
990
1022
991 clrevorder = clstate[b'clrevorder']
1023 clrevorder = clstate[b'clrevorder']
992 manifests = clstate[b'manifests']
1024 manifests = clstate[b'manifests']
993 changedfiles = clstate[b'changedfiles']
1025 changedfiles = clstate[b'changedfiles']
994
1026
995 # We need to make sure that the linkrev in the changegroup refers to
1027 # We need to make sure that the linkrev in the changegroup refers to
996 # the first changeset that introduced the manifest or file revision.
1028 # the first changeset that introduced the manifest or file revision.
997 # The fastpath is usually safer than the slowpath, because the filelogs
1029 # The fastpath is usually safer than the slowpath, because the filelogs
998 # are walked in revlog order.
1030 # are walked in revlog order.
999 #
1031 #
1000 # When taking the slowpath when the manifest revlog uses generaldelta,
1032 # When taking the slowpath when the manifest revlog uses generaldelta,
1001 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
1033 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
1002 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
1034 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
1003 #
1035 #
1004 # When taking the fastpath, we are only vulnerable to reordering
1036 # When taking the fastpath, we are only vulnerable to reordering
1005 # of the changelog itself. The changelog never uses generaldelta and is
1037 # of the changelog itself. The changelog never uses generaldelta and is
1006 # never reordered. To handle this case, we simply take the slowpath,
1038 # never reordered. To handle this case, we simply take the slowpath,
1007 # which already has the 'clrevorder' logic. This was also fixed in
1039 # which already has the 'clrevorder' logic. This was also fixed in
1008 # cc0ff93d0c0c.
1040 # cc0ff93d0c0c.
1009
1041
1010 # Treemanifests don't work correctly with fastpathlinkrev
1042 # Treemanifests don't work correctly with fastpathlinkrev
1011 # either, because we don't discover which directory nodes to
1043 # either, because we don't discover which directory nodes to
1012 # send along with files. This could probably be fixed.
1044 # send along with files. This could probably be fixed.
1013 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
1045 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
1014
1046
1015 fnodes = {} # needed file nodes
1047 fnodes = {} # needed file nodes
1016
1048
1017 size = 0
1049 size = 0
1018 it = self.generatemanifests(
1050 it = self.generatemanifests(
1019 commonrevs,
1051 commonrevs,
1020 clrevorder,
1052 clrevorder,
1021 fastpathlinkrev,
1053 fastpathlinkrev,
1022 manifests,
1054 manifests,
1023 fnodes,
1055 fnodes,
1024 source,
1056 source,
1025 clstate[b'clrevtomanifestrev'],
1057 clstate[b'clrevtomanifestrev'],
1058 sidedata_helpers=sidedata_helpers,
1026 )
1059 )
1027
1060
1028 for tree, deltas in it:
1061 for tree, deltas in it:
1029 if tree:
1062 if tree:
1030 assert self.version in (b'03', b'04')
1063 assert self.version in (b'03', b'04')
1031 chunk = _fileheader(tree)
1064 chunk = _fileheader(tree)
1032 size += len(chunk)
1065 size += len(chunk)
1033 yield chunk
1066 yield chunk
1034
1067
1035 for delta in deltas:
1068 for delta in deltas:
1036 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1069 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1037 for chunk in chunks:
1070 for chunk in chunks:
1038 size += len(chunk)
1071 size += len(chunk)
1039 yield chunk
1072 yield chunk
1040
1073
1041 close = closechunk()
1074 close = closechunk()
1042 size += len(close)
1075 size += len(close)
1043 yield close
1076 yield close
1044
1077
1045 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1078 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1046 yield self._manifestsend
1079 yield self._manifestsend
1047
1080
1048 mfdicts = None
1081 mfdicts = None
1049 if self._ellipses and self._isshallow:
1082 if self._ellipses and self._isshallow:
1050 mfdicts = [
1083 mfdicts = [
1051 (repo.manifestlog[n].read(), lr)
1084 (repo.manifestlog[n].read(), lr)
1052 for (n, lr) in pycompat.iteritems(manifests)
1085 for (n, lr) in pycompat.iteritems(manifests)
1053 ]
1086 ]
1054
1087
1055 manifests.clear()
1088 manifests.clear()
1056 clrevs = {cl.rev(x) for x in clnodes}
1089 clrevs = {cl.rev(x) for x in clnodes}
1057
1090
1058 it = self.generatefiles(
1091 it = self.generatefiles(
1059 changedfiles,
1092 changedfiles,
1060 commonrevs,
1093 commonrevs,
1061 source,
1094 source,
1062 mfdicts,
1095 mfdicts,
1063 fastpathlinkrev,
1096 fastpathlinkrev,
1064 fnodes,
1097 fnodes,
1065 clrevs,
1098 clrevs,
1099 sidedata_helpers=sidedata_helpers,
1066 )
1100 )
1067
1101
1068 for path, deltas in it:
1102 for path, deltas in it:
1069 h = _fileheader(path)
1103 h = _fileheader(path)
1070 size = len(h)
1104 size = len(h)
1071 yield h
1105 yield h
1072
1106
1073 for delta in deltas:
1107 for delta in deltas:
1074 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1108 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1075 for chunk in chunks:
1109 for chunk in chunks:
1076 size += len(chunk)
1110 size += len(chunk)
1077 yield chunk
1111 yield chunk
1078
1112
1079 close = closechunk()
1113 close = closechunk()
1080 size += len(close)
1114 size += len(close)
1081 yield close
1115 yield close
1082
1116
1083 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1117 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1084
1118
1085 yield closechunk()
1119 yield closechunk()
1086
1120
1087 if clnodes:
1121 if clnodes:
1088 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1122 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1089
1123
1090 def _generatechangelog(self, cl, nodes, generate=True):
1124 def _generatechangelog(
1125 self, cl, nodes, generate=True, sidedata_helpers=None
1126 ):
1091 """Generate data for changelog chunks.
1127 """Generate data for changelog chunks.
1092
1128
1093 Returns a 2-tuple of a dict containing state and an iterable of
1129 Returns a 2-tuple of a dict containing state and an iterable of
1094 byte chunks. The state will not be fully populated until the
1130 byte chunks. The state will not be fully populated until the
1095 chunk stream has been fully consumed.
1131 chunk stream has been fully consumed.
1096
1132
1097 if generate is False, the state will be fully populated and no chunk
1133 if generate is False, the state will be fully populated and no chunk
1098 stream will be yielded
1134 stream will be yielded
1135
1136 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
1099 """
1137 """
1100 clrevorder = {}
1138 clrevorder = {}
1101 manifests = {}
1139 manifests = {}
1102 mfl = self._repo.manifestlog
1140 mfl = self._repo.manifestlog
1103 changedfiles = set()
1141 changedfiles = set()
1104 clrevtomanifestrev = {}
1142 clrevtomanifestrev = {}
1105
1143
1106 state = {
1144 state = {
1107 b'clrevorder': clrevorder,
1145 b'clrevorder': clrevorder,
1108 b'manifests': manifests,
1146 b'manifests': manifests,
1109 b'changedfiles': changedfiles,
1147 b'changedfiles': changedfiles,
1110 b'clrevtomanifestrev': clrevtomanifestrev,
1148 b'clrevtomanifestrev': clrevtomanifestrev,
1111 }
1149 }
1112
1150
1113 if not (generate or self._ellipses):
1151 if not (generate or self._ellipses):
1114 # sort the nodes in storage order
1152 # sort the nodes in storage order
1115 nodes = sorted(nodes, key=cl.rev)
1153 nodes = sorted(nodes, key=cl.rev)
1116 for node in nodes:
1154 for node in nodes:
1117 c = cl.changelogrevision(node)
1155 c = cl.changelogrevision(node)
1118 clrevorder[node] = len(clrevorder)
1156 clrevorder[node] = len(clrevorder)
1119 # record the first changeset introducing this manifest version
1157 # record the first changeset introducing this manifest version
1120 manifests.setdefault(c.manifest, node)
1158 manifests.setdefault(c.manifest, node)
1121 # Record a complete list of potentially-changed files in
1159 # Record a complete list of potentially-changed files in
1122 # this manifest.
1160 # this manifest.
1123 changedfiles.update(c.files)
1161 changedfiles.update(c.files)
1124
1162
1125 return state, ()
1163 return state, ()
1126
1164
1127 # Callback for the changelog, used to collect changed files and
1165 # Callback for the changelog, used to collect changed files and
1128 # manifest nodes.
1166 # manifest nodes.
1129 # Returns the linkrev node (identity in the changelog case).
1167 # Returns the linkrev node (identity in the changelog case).
1130 def lookupcl(x):
1168 def lookupcl(x):
1131 c = cl.changelogrevision(x)
1169 c = cl.changelogrevision(x)
1132 clrevorder[x] = len(clrevorder)
1170 clrevorder[x] = len(clrevorder)
1133
1171
1134 if self._ellipses:
1172 if self._ellipses:
1135 # Only update manifests if x is going to be sent. Otherwise we
1173 # Only update manifests if x is going to be sent. Otherwise we
1136 # end up with bogus linkrevs specified for manifests and
1174 # end up with bogus linkrevs specified for manifests and
1137 # we skip some manifest nodes that we should otherwise
1175 # we skip some manifest nodes that we should otherwise
1138 # have sent.
1176 # have sent.
1139 if (
1177 if (
1140 x in self._fullclnodes
1178 x in self._fullclnodes
1141 or cl.rev(x) in self._precomputedellipsis
1179 or cl.rev(x) in self._precomputedellipsis
1142 ):
1180 ):
1143
1181
1144 manifestnode = c.manifest
1182 manifestnode = c.manifest
1145 # Record the first changeset introducing this manifest
1183 # Record the first changeset introducing this manifest
1146 # version.
1184 # version.
1147 manifests.setdefault(manifestnode, x)
1185 manifests.setdefault(manifestnode, x)
1148 # Set this narrow-specific dict so we have the lowest
1186 # Set this narrow-specific dict so we have the lowest
1149 # manifest revnum to look up for this cl revnum. (Part of
1187 # manifest revnum to look up for this cl revnum. (Part of
1150 # mapping changelog ellipsis parents to manifest ellipsis
1188 # mapping changelog ellipsis parents to manifest ellipsis
1151 # parents)
1189 # parents)
1152 clrevtomanifestrev.setdefault(
1190 clrevtomanifestrev.setdefault(
1153 cl.rev(x), mfl.rev(manifestnode)
1191 cl.rev(x), mfl.rev(manifestnode)
1154 )
1192 )
1155 # We can't trust the changed files list in the changeset if the
1193 # We can't trust the changed files list in the changeset if the
1156 # client requested a shallow clone.
1194 # client requested a shallow clone.
1157 if self._isshallow:
1195 if self._isshallow:
1158 changedfiles.update(mfl[c.manifest].read().keys())
1196 changedfiles.update(mfl[c.manifest].read().keys())
1159 else:
1197 else:
1160 changedfiles.update(c.files)
1198 changedfiles.update(c.files)
1161 else:
1199 else:
1162 # record the first changeset introducing this manifest version
1200 # record the first changeset introducing this manifest version
1163 manifests.setdefault(c.manifest, x)
1201 manifests.setdefault(c.manifest, x)
1164 # Record a complete list of potentially-changed files in
1202 # Record a complete list of potentially-changed files in
1165 # this manifest.
1203 # this manifest.
1166 changedfiles.update(c.files)
1204 changedfiles.update(c.files)
1167
1205
1168 return x
1206 return x
1169
1207
1170 gen = deltagroup(
1208 gen = deltagroup(
1171 self._repo,
1209 self._repo,
1172 cl,
1210 cl,
1173 nodes,
1211 nodes,
1174 True,
1212 True,
1175 lookupcl,
1213 lookupcl,
1176 self._forcedeltaparentprev,
1214 self._forcedeltaparentprev,
1177 ellipses=self._ellipses,
1215 ellipses=self._ellipses,
1178 topic=_(b'changesets'),
1216 topic=_(b'changesets'),
1179 clrevtolocalrev={},
1217 clrevtolocalrev={},
1180 fullclnodes=self._fullclnodes,
1218 fullclnodes=self._fullclnodes,
1181 precomputedellipsis=self._precomputedellipsis,
1219 precomputedellipsis=self._precomputedellipsis,
1220 sidedata_helpers=sidedata_helpers,
1182 )
1221 )
1183
1222
1184 return state, gen
1223 return state, gen
1185
1224
1186 def generatemanifests(
1225 def generatemanifests(
1187 self,
1226 self,
1188 commonrevs,
1227 commonrevs,
1189 clrevorder,
1228 clrevorder,
1190 fastpathlinkrev,
1229 fastpathlinkrev,
1191 manifests,
1230 manifests,
1192 fnodes,
1231 fnodes,
1193 source,
1232 source,
1194 clrevtolocalrev,
1233 clrevtolocalrev,
1234 sidedata_helpers=None,
1195 ):
1235 ):
1196 """Returns an iterator of changegroup chunks containing manifests.
1236 """Returns an iterator of changegroup chunks containing manifests.
1197
1237
1198 `source` is unused here, but is used by extensions like remotefilelog to
1238 `source` is unused here, but is used by extensions like remotefilelog to
1199 change what is sent based in pulls vs pushes, etc.
1239 change what is sent based in pulls vs pushes, etc.
1240
1241 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
1200 """
1242 """
1201 repo = self._repo
1243 repo = self._repo
1202 mfl = repo.manifestlog
1244 mfl = repo.manifestlog
1203 tmfnodes = {b'': manifests}
1245 tmfnodes = {b'': manifests}
1204
1246
1205 # Callback for the manifest, used to collect linkrevs for filelog
1247 # Callback for the manifest, used to collect linkrevs for filelog
1206 # revisions.
1248 # revisions.
1207 # Returns the linkrev node (collected in lookupcl).
1249 # Returns the linkrev node (collected in lookupcl).
1208 def makelookupmflinknode(tree, nodes):
1250 def makelookupmflinknode(tree, nodes):
1209 if fastpathlinkrev:
1251 if fastpathlinkrev:
1210 assert not tree
1252 assert not tree
1211 return (
1253 return (
1212 manifests.__getitem__
1254 manifests.__getitem__
1213 ) # pytype: disable=unsupported-operands
1255 ) # pytype: disable=unsupported-operands
1214
1256
1215 def lookupmflinknode(x):
1257 def lookupmflinknode(x):
1216 """Callback for looking up the linknode for manifests.
1258 """Callback for looking up the linknode for manifests.
1217
1259
1218 Returns the linkrev node for the specified manifest.
1260 Returns the linkrev node for the specified manifest.
1219
1261
1220 SIDE EFFECT:
1262 SIDE EFFECT:
1221
1263
1222 1) fclnodes gets populated with the list of relevant
1264 1) fclnodes gets populated with the list of relevant
1223 file nodes if we're not using fastpathlinkrev
1265 file nodes if we're not using fastpathlinkrev
1224 2) When treemanifests are in use, collects treemanifest nodes
1266 2) When treemanifests are in use, collects treemanifest nodes
1225 to send
1267 to send
1226
1268
1227 Note that this means manifests must be completely sent to
1269 Note that this means manifests must be completely sent to
1228 the client before you can trust the list of files and
1270 the client before you can trust the list of files and
1229 treemanifests to send.
1271 treemanifests to send.
1230 """
1272 """
1231 clnode = nodes[x]
1273 clnode = nodes[x]
1232 mdata = mfl.get(tree, x).readfast(shallow=True)
1274 mdata = mfl.get(tree, x).readfast(shallow=True)
1233 for p, n, fl in mdata.iterentries():
1275 for p, n, fl in mdata.iterentries():
1234 if fl == b't': # subdirectory manifest
1276 if fl == b't': # subdirectory manifest
1235 subtree = tree + p + b'/'
1277 subtree = tree + p + b'/'
1236 tmfclnodes = tmfnodes.setdefault(subtree, {})
1278 tmfclnodes = tmfnodes.setdefault(subtree, {})
1237 tmfclnode = tmfclnodes.setdefault(n, clnode)
1279 tmfclnode = tmfclnodes.setdefault(n, clnode)
1238 if clrevorder[clnode] < clrevorder[tmfclnode]:
1280 if clrevorder[clnode] < clrevorder[tmfclnode]:
1239 tmfclnodes[n] = clnode
1281 tmfclnodes[n] = clnode
1240 else:
1282 else:
1241 f = tree + p
1283 f = tree + p
1242 fclnodes = fnodes.setdefault(f, {})
1284 fclnodes = fnodes.setdefault(f, {})
1243 fclnode = fclnodes.setdefault(n, clnode)
1285 fclnode = fclnodes.setdefault(n, clnode)
1244 if clrevorder[clnode] < clrevorder[fclnode]:
1286 if clrevorder[clnode] < clrevorder[fclnode]:
1245 fclnodes[n] = clnode
1287 fclnodes[n] = clnode
1246 return clnode
1288 return clnode
1247
1289
1248 return lookupmflinknode
1290 return lookupmflinknode
1249
1291
1250 while tmfnodes:
1292 while tmfnodes:
1251 tree, nodes = tmfnodes.popitem()
1293 tree, nodes = tmfnodes.popitem()
1252
1294
1253 should_visit = self._matcher.visitdir(tree[:-1])
1295 should_visit = self._matcher.visitdir(tree[:-1])
1254 if tree and not should_visit:
1296 if tree and not should_visit:
1255 continue
1297 continue
1256
1298
1257 store = mfl.getstorage(tree)
1299 store = mfl.getstorage(tree)
1258
1300
1259 if not should_visit:
1301 if not should_visit:
1260 # No nodes to send because this directory is out of
1302 # No nodes to send because this directory is out of
1261 # the client's view of the repository (probably
1303 # the client's view of the repository (probably
1262 # because of narrow clones). Do this even for the root
1304 # because of narrow clones). Do this even for the root
1263 # directory (tree=='')
1305 # directory (tree=='')
1264 prunednodes = []
1306 prunednodes = []
1265 else:
1307 else:
1266 # Avoid sending any manifest nodes we can prove the
1308 # Avoid sending any manifest nodes we can prove the
1267 # client already has by checking linkrevs. See the
1309 # client already has by checking linkrevs. See the
1268 # related comment in generatefiles().
1310 # related comment in generatefiles().
1269 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1311 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1270
1312
1271 if tree and not prunednodes:
1313 if tree and not prunednodes:
1272 continue
1314 continue
1273
1315
1274 lookupfn = makelookupmflinknode(tree, nodes)
1316 lookupfn = makelookupmflinknode(tree, nodes)
1275
1317
1276 deltas = deltagroup(
1318 deltas = deltagroup(
1277 self._repo,
1319 self._repo,
1278 store,
1320 store,
1279 prunednodes,
1321 prunednodes,
1280 False,
1322 False,
1281 lookupfn,
1323 lookupfn,
1282 self._forcedeltaparentprev,
1324 self._forcedeltaparentprev,
1283 ellipses=self._ellipses,
1325 ellipses=self._ellipses,
1284 topic=_(b'manifests'),
1326 topic=_(b'manifests'),
1285 clrevtolocalrev=clrevtolocalrev,
1327 clrevtolocalrev=clrevtolocalrev,
1286 fullclnodes=self._fullclnodes,
1328 fullclnodes=self._fullclnodes,
1287 precomputedellipsis=self._precomputedellipsis,
1329 precomputedellipsis=self._precomputedellipsis,
1330 sidedata_helpers=sidedata_helpers,
1288 )
1331 )
1289
1332
1290 if not self._oldmatcher.visitdir(store.tree[:-1]):
1333 if not self._oldmatcher.visitdir(store.tree[:-1]):
1291 yield tree, deltas
1334 yield tree, deltas
1292 else:
1335 else:
1293 # 'deltas' is a generator and we need to consume it even if
1336 # 'deltas' is a generator and we need to consume it even if
1294 # we are not going to send it because a side-effect is that
1337 # we are not going to send it because a side-effect is that
1295 # it updates tmdnodes (via lookupfn)
1338 # it updates tmdnodes (via lookupfn)
1296 for d in deltas:
1339 for d in deltas:
1297 pass
1340 pass
1298 if not tree:
1341 if not tree:
1299 yield tree, []
1342 yield tree, []
1300
1343
1301 def _prunemanifests(self, store, nodes, commonrevs):
1344 def _prunemanifests(self, store, nodes, commonrevs):
1302 if not self._ellipses:
1345 if not self._ellipses:
1303 # In non-ellipses case and large repositories, it is better to
1346 # In non-ellipses case and large repositories, it is better to
1304 # prevent calling of store.rev and store.linkrev on a lot of
1347 # prevent calling of store.rev and store.linkrev on a lot of
1305 # nodes as compared to sending some extra data
1348 # nodes as compared to sending some extra data
1306 return nodes.copy()
1349 return nodes.copy()
1307 # This is split out as a separate method to allow filtering
1350 # This is split out as a separate method to allow filtering
1308 # commonrevs in extension code.
1351 # commonrevs in extension code.
1309 #
1352 #
1310 # TODO(augie): this shouldn't be required, instead we should
1353 # TODO(augie): this shouldn't be required, instead we should
1311 # make filtering of revisions to send delegated to the store
1354 # make filtering of revisions to send delegated to the store
1312 # layer.
1355 # layer.
1313 frev, flr = store.rev, store.linkrev
1356 frev, flr = store.rev, store.linkrev
1314 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1357 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1315
1358
1316 # The 'source' parameter is useful for extensions
1359 # The 'source' parameter is useful for extensions
1317 def generatefiles(
1360 def generatefiles(
1318 self,
1361 self,
1319 changedfiles,
1362 changedfiles,
1320 commonrevs,
1363 commonrevs,
1321 source,
1364 source,
1322 mfdicts,
1365 mfdicts,
1323 fastpathlinkrev,
1366 fastpathlinkrev,
1324 fnodes,
1367 fnodes,
1325 clrevs,
1368 clrevs,
1369 sidedata_helpers=None,
1326 ):
1370 ):
1327 changedfiles = [
1371 changedfiles = [
1328 f
1372 f
1329 for f in changedfiles
1373 for f in changedfiles
1330 if self._matcher(f) and not self._oldmatcher(f)
1374 if self._matcher(f) and not self._oldmatcher(f)
1331 ]
1375 ]
1332
1376
1333 if not fastpathlinkrev:
1377 if not fastpathlinkrev:
1334
1378
1335 def normallinknodes(unused, fname):
1379 def normallinknodes(unused, fname):
1336 return fnodes.get(fname, {})
1380 return fnodes.get(fname, {})
1337
1381
1338 else:
1382 else:
1339 cln = self._repo.changelog.node
1383 cln = self._repo.changelog.node
1340
1384
1341 def normallinknodes(store, fname):
1385 def normallinknodes(store, fname):
1342 flinkrev = store.linkrev
1386 flinkrev = store.linkrev
1343 fnode = store.node
1387 fnode = store.node
1344 revs = ((r, flinkrev(r)) for r in store)
1388 revs = ((r, flinkrev(r)) for r in store)
1345 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1389 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1346
1390
1347 clrevtolocalrev = {}
1391 clrevtolocalrev = {}
1348
1392
1349 if self._isshallow:
1393 if self._isshallow:
1350 # In a shallow clone, the linknodes callback needs to also include
1394 # In a shallow clone, the linknodes callback needs to also include
1351 # those file nodes that are in the manifests we sent but weren't
1395 # those file nodes that are in the manifests we sent but weren't
1352 # introduced by those manifests.
1396 # introduced by those manifests.
1353 commonctxs = [self._repo[c] for c in commonrevs]
1397 commonctxs = [self._repo[c] for c in commonrevs]
1354 clrev = self._repo.changelog.rev
1398 clrev = self._repo.changelog.rev
1355
1399
1356 def linknodes(flog, fname):
1400 def linknodes(flog, fname):
1357 for c in commonctxs:
1401 for c in commonctxs:
1358 try:
1402 try:
1359 fnode = c.filenode(fname)
1403 fnode = c.filenode(fname)
1360 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1404 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1361 except error.ManifestLookupError:
1405 except error.ManifestLookupError:
1362 pass
1406 pass
1363 links = normallinknodes(flog, fname)
1407 links = normallinknodes(flog, fname)
1364 if len(links) != len(mfdicts):
1408 if len(links) != len(mfdicts):
1365 for mf, lr in mfdicts:
1409 for mf, lr in mfdicts:
1366 fnode = mf.get(fname, None)
1410 fnode = mf.get(fname, None)
1367 if fnode in links:
1411 if fnode in links:
1368 links[fnode] = min(links[fnode], lr, key=clrev)
1412 links[fnode] = min(links[fnode], lr, key=clrev)
1369 elif fnode:
1413 elif fnode:
1370 links[fnode] = lr
1414 links[fnode] = lr
1371 return links
1415 return links
1372
1416
1373 else:
1417 else:
1374 linknodes = normallinknodes
1418 linknodes = normallinknodes
1375
1419
1376 repo = self._repo
1420 repo = self._repo
1377 progress = repo.ui.makeprogress(
1421 progress = repo.ui.makeprogress(
1378 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1422 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1379 )
1423 )
1380 for i, fname in enumerate(sorted(changedfiles)):
1424 for i, fname in enumerate(sorted(changedfiles)):
1381 filerevlog = repo.file(fname)
1425 filerevlog = repo.file(fname)
1382 if not filerevlog:
1426 if not filerevlog:
1383 raise error.Abort(
1427 raise error.Abort(
1384 _(b"empty or missing file data for %s") % fname
1428 _(b"empty or missing file data for %s") % fname
1385 )
1429 )
1386
1430
1387 clrevtolocalrev.clear()
1431 clrevtolocalrev.clear()
1388
1432
1389 linkrevnodes = linknodes(filerevlog, fname)
1433 linkrevnodes = linknodes(filerevlog, fname)
1390 # Lookup for filenodes, we collected the linkrev nodes above in the
1434 # Lookup for filenodes, we collected the linkrev nodes above in the
1391 # fastpath case and with lookupmf in the slowpath case.
1435 # fastpath case and with lookupmf in the slowpath case.
1392 def lookupfilelog(x):
1436 def lookupfilelog(x):
1393 return linkrevnodes[x]
1437 return linkrevnodes[x]
1394
1438
1395 frev, flr = filerevlog.rev, filerevlog.linkrev
1439 frev, flr = filerevlog.rev, filerevlog.linkrev
1396 # Skip sending any filenode we know the client already
1440 # Skip sending any filenode we know the client already
1397 # has. This avoids over-sending files relatively
1441 # has. This avoids over-sending files relatively
1398 # inexpensively, so it's not a problem if we under-filter
1442 # inexpensively, so it's not a problem if we under-filter
1399 # here.
1443 # here.
1400 filenodes = [
1444 filenodes = [
1401 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1445 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1402 ]
1446 ]
1403
1447
1404 if not filenodes:
1448 if not filenodes:
1405 continue
1449 continue
1406
1450
1407 progress.update(i + 1, item=fname)
1451 progress.update(i + 1, item=fname)
1408
1452
1409 deltas = deltagroup(
1453 deltas = deltagroup(
1410 self._repo,
1454 self._repo,
1411 filerevlog,
1455 filerevlog,
1412 filenodes,
1456 filenodes,
1413 False,
1457 False,
1414 lookupfilelog,
1458 lookupfilelog,
1415 self._forcedeltaparentprev,
1459 self._forcedeltaparentprev,
1416 ellipses=self._ellipses,
1460 ellipses=self._ellipses,
1417 clrevtolocalrev=clrevtolocalrev,
1461 clrevtolocalrev=clrevtolocalrev,
1418 fullclnodes=self._fullclnodes,
1462 fullclnodes=self._fullclnodes,
1419 precomputedellipsis=self._precomputedellipsis,
1463 precomputedellipsis=self._precomputedellipsis,
1464 sidedata_helpers=sidedata_helpers,
1420 )
1465 )
1421
1466
1422 yield fname, deltas
1467 yield fname, deltas
1423
1468
1424 progress.complete()
1469 progress.complete()
1425
1470
1426
1471
1427 def _makecg1packer(
1472 def _makecg1packer(
1428 repo,
1473 repo,
1429 oldmatcher,
1474 oldmatcher,
1430 matcher,
1475 matcher,
1431 bundlecaps,
1476 bundlecaps,
1432 ellipses=False,
1477 ellipses=False,
1433 shallow=False,
1478 shallow=False,
1434 ellipsisroots=None,
1479 ellipsisroots=None,
1435 fullnodes=None,
1480 fullnodes=None,
1436 remote_sidedata=None,
1481 remote_sidedata=None,
1437 ):
1482 ):
1438 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1483 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1439 d.node, d.p1node, d.p2node, d.linknode
1484 d.node, d.p1node, d.p2node, d.linknode
1440 )
1485 )
1441
1486
1442 return cgpacker(
1487 return cgpacker(
1443 repo,
1488 repo,
1444 oldmatcher,
1489 oldmatcher,
1445 matcher,
1490 matcher,
1446 b'01',
1491 b'01',
1447 builddeltaheader=builddeltaheader,
1492 builddeltaheader=builddeltaheader,
1448 manifestsend=b'',
1493 manifestsend=b'',
1449 forcedeltaparentprev=True,
1494 forcedeltaparentprev=True,
1450 bundlecaps=bundlecaps,
1495 bundlecaps=bundlecaps,
1451 ellipses=ellipses,
1496 ellipses=ellipses,
1452 shallow=shallow,
1497 shallow=shallow,
1453 ellipsisroots=ellipsisroots,
1498 ellipsisroots=ellipsisroots,
1454 fullnodes=fullnodes,
1499 fullnodes=fullnodes,
1455 )
1500 )
1456
1501
1457
1502
1458 def _makecg2packer(
1503 def _makecg2packer(
1459 repo,
1504 repo,
1460 oldmatcher,
1505 oldmatcher,
1461 matcher,
1506 matcher,
1462 bundlecaps,
1507 bundlecaps,
1463 ellipses=False,
1508 ellipses=False,
1464 shallow=False,
1509 shallow=False,
1465 ellipsisroots=None,
1510 ellipsisroots=None,
1466 fullnodes=None,
1511 fullnodes=None,
1467 remote_sidedata=None,
1512 remote_sidedata=None,
1468 ):
1513 ):
1469 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1514 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1470 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1515 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1471 )
1516 )
1472
1517
1473 return cgpacker(
1518 return cgpacker(
1474 repo,
1519 repo,
1475 oldmatcher,
1520 oldmatcher,
1476 matcher,
1521 matcher,
1477 b'02',
1522 b'02',
1478 builddeltaheader=builddeltaheader,
1523 builddeltaheader=builddeltaheader,
1479 manifestsend=b'',
1524 manifestsend=b'',
1480 bundlecaps=bundlecaps,
1525 bundlecaps=bundlecaps,
1481 ellipses=ellipses,
1526 ellipses=ellipses,
1482 shallow=shallow,
1527 shallow=shallow,
1483 ellipsisroots=ellipsisroots,
1528 ellipsisroots=ellipsisroots,
1484 fullnodes=fullnodes,
1529 fullnodes=fullnodes,
1485 )
1530 )
1486
1531
1487
1532
1488 def _makecg3packer(
1533 def _makecg3packer(
1489 repo,
1534 repo,
1490 oldmatcher,
1535 oldmatcher,
1491 matcher,
1536 matcher,
1492 bundlecaps,
1537 bundlecaps,
1493 ellipses=False,
1538 ellipses=False,
1494 shallow=False,
1539 shallow=False,
1495 ellipsisroots=None,
1540 ellipsisroots=None,
1496 fullnodes=None,
1541 fullnodes=None,
1497 remote_sidedata=None,
1542 remote_sidedata=None,
1498 ):
1543 ):
1499 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1544 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1500 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1545 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1501 )
1546 )
1502
1547
1503 return cgpacker(
1548 return cgpacker(
1504 repo,
1549 repo,
1505 oldmatcher,
1550 oldmatcher,
1506 matcher,
1551 matcher,
1507 b'03',
1552 b'03',
1508 builddeltaheader=builddeltaheader,
1553 builddeltaheader=builddeltaheader,
1509 manifestsend=closechunk(),
1554 manifestsend=closechunk(),
1510 bundlecaps=bundlecaps,
1555 bundlecaps=bundlecaps,
1511 ellipses=ellipses,
1556 ellipses=ellipses,
1512 shallow=shallow,
1557 shallow=shallow,
1513 ellipsisroots=ellipsisroots,
1558 ellipsisroots=ellipsisroots,
1514 fullnodes=fullnodes,
1559 fullnodes=fullnodes,
1515 )
1560 )
1516
1561
1517
1562
1518 def _makecg4packer(
1563 def _makecg4packer(
1519 repo,
1564 repo,
1520 oldmatcher,
1565 oldmatcher,
1521 matcher,
1566 matcher,
1522 bundlecaps,
1567 bundlecaps,
1523 ellipses=False,
1568 ellipses=False,
1524 shallow=False,
1569 shallow=False,
1525 ellipsisroots=None,
1570 ellipsisroots=None,
1526 fullnodes=None,
1571 fullnodes=None,
1527 remote_sidedata=None,
1572 remote_sidedata=None,
1528 ):
1573 ):
1529 # Same header func as cg3. Sidedata is in a separate chunk from the delta to
1574 # Same header func as cg3. Sidedata is in a separate chunk from the delta to
1530 # differenciate "raw delta" and sidedata.
1575 # differenciate "raw delta" and sidedata.
1531 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1576 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1532 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1577 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1533 )
1578 )
1534
1579
1535 return cgpacker(
1580 return cgpacker(
1536 repo,
1581 repo,
1537 oldmatcher,
1582 oldmatcher,
1538 matcher,
1583 matcher,
1539 b'04',
1584 b'04',
1540 builddeltaheader=builddeltaheader,
1585 builddeltaheader=builddeltaheader,
1541 manifestsend=closechunk(),
1586 manifestsend=closechunk(),
1542 bundlecaps=bundlecaps,
1587 bundlecaps=bundlecaps,
1543 ellipses=ellipses,
1588 ellipses=ellipses,
1544 shallow=shallow,
1589 shallow=shallow,
1545 ellipsisroots=ellipsisroots,
1590 ellipsisroots=ellipsisroots,
1546 fullnodes=fullnodes,
1591 fullnodes=fullnodes,
1547 remote_sidedata=remote_sidedata,
1592 remote_sidedata=remote_sidedata,
1548 )
1593 )
1549
1594
1550
1595
1551 _packermap = {
1596 _packermap = {
1552 b'01': (_makecg1packer, cg1unpacker),
1597 b'01': (_makecg1packer, cg1unpacker),
1553 # cg2 adds support for exchanging generaldelta
1598 # cg2 adds support for exchanging generaldelta
1554 b'02': (_makecg2packer, cg2unpacker),
1599 b'02': (_makecg2packer, cg2unpacker),
1555 # cg3 adds support for exchanging revlog flags and treemanifests
1600 # cg3 adds support for exchanging revlog flags and treemanifests
1556 b'03': (_makecg3packer, cg3unpacker),
1601 b'03': (_makecg3packer, cg3unpacker),
1557 # ch4 adds support for exchanging sidedata
1602 # ch4 adds support for exchanging sidedata
1558 b'04': (_makecg4packer, cg4unpacker),
1603 b'04': (_makecg4packer, cg4unpacker),
1559 }
1604 }
1560
1605
1561
1606
1562 def allsupportedversions(repo):
1607 def allsupportedversions(repo):
1563 versions = set(_packermap.keys())
1608 versions = set(_packermap.keys())
1564 needv03 = False
1609 needv03 = False
1565 if (
1610 if (
1566 repo.ui.configbool(b'experimental', b'changegroup3')
1611 repo.ui.configbool(b'experimental', b'changegroup3')
1567 or repo.ui.configbool(b'experimental', b'treemanifest')
1612 or repo.ui.configbool(b'experimental', b'treemanifest')
1568 or scmutil.istreemanifest(repo)
1613 or scmutil.istreemanifest(repo)
1569 ):
1614 ):
1570 # we keep version 03 because we need to to exchange treemanifest data
1615 # we keep version 03 because we need to to exchange treemanifest data
1571 #
1616 #
1572 # we also keep vresion 01 and 02, because it is possible for repo to
1617 # we also keep vresion 01 and 02, because it is possible for repo to
1573 # contains both normal and tree manifest at the same time. so using
1618 # contains both normal and tree manifest at the same time. so using
1574 # older version to pull data is viable
1619 # older version to pull data is viable
1575 #
1620 #
1576 # (or even to push subset of history)
1621 # (or even to push subset of history)
1577 needv03 = True
1622 needv03 = True
1578 has_revlogv2 = requirements.REVLOGV2_REQUIREMENT in repo.requirements
1623 has_revlogv2 = requirements.REVLOGV2_REQUIREMENT in repo.requirements
1579 if not has_revlogv2:
1624 if not has_revlogv2:
1580 versions.discard(b'04')
1625 versions.discard(b'04')
1581 if not needv03:
1626 if not needv03:
1582 versions.discard(b'03')
1627 versions.discard(b'03')
1583 return versions
1628 return versions
1584
1629
1585
1630
1586 # Changegroup versions that can be applied to the repo
1631 # Changegroup versions that can be applied to the repo
1587 def supportedincomingversions(repo):
1632 def supportedincomingversions(repo):
1588 return allsupportedversions(repo)
1633 return allsupportedversions(repo)
1589
1634
1590
1635
1591 # Changegroup versions that can be created from the repo
1636 # Changegroup versions that can be created from the repo
1592 def supportedoutgoingversions(repo):
1637 def supportedoutgoingversions(repo):
1593 versions = allsupportedversions(repo)
1638 versions = allsupportedversions(repo)
1594 if scmutil.istreemanifest(repo):
1639 if scmutil.istreemanifest(repo):
1595 # Versions 01 and 02 support only flat manifests and it's just too
1640 # Versions 01 and 02 support only flat manifests and it's just too
1596 # expensive to convert between the flat manifest and tree manifest on
1641 # expensive to convert between the flat manifest and tree manifest on
1597 # the fly. Since tree manifests are hashed differently, all of history
1642 # the fly. Since tree manifests are hashed differently, all of history
1598 # would have to be converted. Instead, we simply don't even pretend to
1643 # would have to be converted. Instead, we simply don't even pretend to
1599 # support versions 01 and 02.
1644 # support versions 01 and 02.
1600 versions.discard(b'01')
1645 versions.discard(b'01')
1601 versions.discard(b'02')
1646 versions.discard(b'02')
1602 if requirements.NARROW_REQUIREMENT in repo.requirements:
1647 if requirements.NARROW_REQUIREMENT in repo.requirements:
1603 # Versions 01 and 02 don't support revlog flags, and we need to
1648 # Versions 01 and 02 don't support revlog flags, and we need to
1604 # support that for stripping and unbundling to work.
1649 # support that for stripping and unbundling to work.
1605 versions.discard(b'01')
1650 versions.discard(b'01')
1606 versions.discard(b'02')
1651 versions.discard(b'02')
1607 if LFS_REQUIREMENT in repo.requirements:
1652 if LFS_REQUIREMENT in repo.requirements:
1608 # Versions 01 and 02 don't support revlog flags, and we need to
1653 # Versions 01 and 02 don't support revlog flags, and we need to
1609 # mark LFS entries with REVIDX_EXTSTORED.
1654 # mark LFS entries with REVIDX_EXTSTORED.
1610 versions.discard(b'01')
1655 versions.discard(b'01')
1611 versions.discard(b'02')
1656 versions.discard(b'02')
1612
1657
1613 return versions
1658 return versions
1614
1659
1615
1660
1616 def localversion(repo):
1661 def localversion(repo):
1617 # Finds the best version to use for bundles that are meant to be used
1662 # Finds the best version to use for bundles that are meant to be used
1618 # locally, such as those from strip and shelve, and temporary bundles.
1663 # locally, such as those from strip and shelve, and temporary bundles.
1619 return max(supportedoutgoingversions(repo))
1664 return max(supportedoutgoingversions(repo))
1620
1665
1621
1666
1622 def safeversion(repo):
1667 def safeversion(repo):
1623 # Finds the smallest version that it's safe to assume clients of the repo
1668 # Finds the smallest version that it's safe to assume clients of the repo
1624 # will support. For example, all hg versions that support generaldelta also
1669 # will support. For example, all hg versions that support generaldelta also
1625 # support changegroup 02.
1670 # support changegroup 02.
1626 versions = supportedoutgoingversions(repo)
1671 versions = supportedoutgoingversions(repo)
1627 if requirements.GENERALDELTA_REQUIREMENT in repo.requirements:
1672 if requirements.GENERALDELTA_REQUIREMENT in repo.requirements:
1628 versions.discard(b'01')
1673 versions.discard(b'01')
1629 assert versions
1674 assert versions
1630 return min(versions)
1675 return min(versions)
1631
1676
1632
1677
1633 def getbundler(
1678 def getbundler(
1634 version,
1679 version,
1635 repo,
1680 repo,
1636 bundlecaps=None,
1681 bundlecaps=None,
1637 oldmatcher=None,
1682 oldmatcher=None,
1638 matcher=None,
1683 matcher=None,
1639 ellipses=False,
1684 ellipses=False,
1640 shallow=False,
1685 shallow=False,
1641 ellipsisroots=None,
1686 ellipsisroots=None,
1642 fullnodes=None,
1687 fullnodes=None,
1643 remote_sidedata=None,
1688 remote_sidedata=None,
1644 ):
1689 ):
1645 assert version in supportedoutgoingversions(repo)
1690 assert version in supportedoutgoingversions(repo)
1646
1691
1647 if matcher is None:
1692 if matcher is None:
1648 matcher = matchmod.always()
1693 matcher = matchmod.always()
1649 if oldmatcher is None:
1694 if oldmatcher is None:
1650 oldmatcher = matchmod.never()
1695 oldmatcher = matchmod.never()
1651
1696
1652 if version == b'01' and not matcher.always():
1697 if version == b'01' and not matcher.always():
1653 raise error.ProgrammingError(
1698 raise error.ProgrammingError(
1654 b'version 01 changegroups do not support sparse file matchers'
1699 b'version 01 changegroups do not support sparse file matchers'
1655 )
1700 )
1656
1701
1657 if ellipses and version in (b'01', b'02'):
1702 if ellipses and version in (b'01', b'02'):
1658 raise error.Abort(
1703 raise error.Abort(
1659 _(
1704 _(
1660 b'ellipsis nodes require at least cg3 on client and server, '
1705 b'ellipsis nodes require at least cg3 on client and server, '
1661 b'but negotiated version %s'
1706 b'but negotiated version %s'
1662 )
1707 )
1663 % version
1708 % version
1664 )
1709 )
1665
1710
1666 # Requested files could include files not in the local store. So
1711 # Requested files could include files not in the local store. So
1667 # filter those out.
1712 # filter those out.
1668 matcher = repo.narrowmatch(matcher)
1713 matcher = repo.narrowmatch(matcher)
1669
1714
1670 fn = _packermap[version][0]
1715 fn = _packermap[version][0]
1671 return fn(
1716 return fn(
1672 repo,
1717 repo,
1673 oldmatcher,
1718 oldmatcher,
1674 matcher,
1719 matcher,
1675 bundlecaps,
1720 bundlecaps,
1676 ellipses=ellipses,
1721 ellipses=ellipses,
1677 shallow=shallow,
1722 shallow=shallow,
1678 ellipsisroots=ellipsisroots,
1723 ellipsisroots=ellipsisroots,
1679 fullnodes=fullnodes,
1724 fullnodes=fullnodes,
1680 remote_sidedata=remote_sidedata,
1725 remote_sidedata=remote_sidedata,
1681 )
1726 )
1682
1727
1683
1728
1684 def getunbundler(version, fh, alg, extras=None):
1729 def getunbundler(version, fh, alg, extras=None):
1685 return _packermap[version][1](fh, alg, extras=extras)
1730 return _packermap[version][1](fh, alg, extras=extras)
1686
1731
1687
1732
1688 def _changegroupinfo(repo, nodes, source):
1733 def _changegroupinfo(repo, nodes, source):
1689 if repo.ui.verbose or source == b'bundle':
1734 if repo.ui.verbose or source == b'bundle':
1690 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1735 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1691 if repo.ui.debugflag:
1736 if repo.ui.debugflag:
1692 repo.ui.debug(b"list of changesets:\n")
1737 repo.ui.debug(b"list of changesets:\n")
1693 for node in nodes:
1738 for node in nodes:
1694 repo.ui.debug(b"%s\n" % hex(node))
1739 repo.ui.debug(b"%s\n" % hex(node))
1695
1740
1696
1741
1697 def makechangegroup(
1742 def makechangegroup(
1698 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1743 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1699 ):
1744 ):
1700 cgstream = makestream(
1745 cgstream = makestream(
1701 repo,
1746 repo,
1702 outgoing,
1747 outgoing,
1703 version,
1748 version,
1704 source,
1749 source,
1705 fastpath=fastpath,
1750 fastpath=fastpath,
1706 bundlecaps=bundlecaps,
1751 bundlecaps=bundlecaps,
1707 )
1752 )
1708 return getunbundler(
1753 return getunbundler(
1709 version,
1754 version,
1710 util.chunkbuffer(cgstream),
1755 util.chunkbuffer(cgstream),
1711 None,
1756 None,
1712 {b'clcount': len(outgoing.missing)},
1757 {b'clcount': len(outgoing.missing)},
1713 )
1758 )
1714
1759
1715
1760
1716 def makestream(
1761 def makestream(
1717 repo,
1762 repo,
1718 outgoing,
1763 outgoing,
1719 version,
1764 version,
1720 source,
1765 source,
1721 fastpath=False,
1766 fastpath=False,
1722 bundlecaps=None,
1767 bundlecaps=None,
1723 matcher=None,
1768 matcher=None,
1724 remote_sidedata=None,
1769 remote_sidedata=None,
1725 ):
1770 ):
1726 bundler = getbundler(
1771 bundler = getbundler(
1727 version,
1772 version,
1728 repo,
1773 repo,
1729 bundlecaps=bundlecaps,
1774 bundlecaps=bundlecaps,
1730 matcher=matcher,
1775 matcher=matcher,
1731 remote_sidedata=remote_sidedata,
1776 remote_sidedata=remote_sidedata,
1732 )
1777 )
1733
1778
1734 repo = repo.unfiltered()
1779 repo = repo.unfiltered()
1735 commonrevs = outgoing.common
1780 commonrevs = outgoing.common
1736 csets = outgoing.missing
1781 csets = outgoing.missing
1737 heads = outgoing.ancestorsof
1782 heads = outgoing.ancestorsof
1738 # We go through the fast path if we get told to, or if all (unfiltered
1783 # We go through the fast path if we get told to, or if all (unfiltered
1739 # heads have been requested (since we then know there all linkrevs will
1784 # heads have been requested (since we then know there all linkrevs will
1740 # be pulled by the client).
1785 # be pulled by the client).
1741 heads.sort()
1786 heads.sort()
1742 fastpathlinkrev = fastpath or (
1787 fastpathlinkrev = fastpath or (
1743 repo.filtername is None and heads == sorted(repo.heads())
1788 repo.filtername is None and heads == sorted(repo.heads())
1744 )
1789 )
1745
1790
1746 repo.hook(b'preoutgoing', throw=True, source=source)
1791 repo.hook(b'preoutgoing', throw=True, source=source)
1747 _changegroupinfo(repo, csets, source)
1792 _changegroupinfo(repo, csets, source)
1748 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1793 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1749
1794
1750
1795
1751 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1796 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1752 revisions = 0
1797 revisions = 0
1753 files = 0
1798 files = 0
1754 progress = repo.ui.makeprogress(
1799 progress = repo.ui.makeprogress(
1755 _(b'files'), unit=_(b'files'), total=expectedfiles
1800 _(b'files'), unit=_(b'files'), total=expectedfiles
1756 )
1801 )
1757 for chunkdata in iter(source.filelogheader, {}):
1802 for chunkdata in iter(source.filelogheader, {}):
1758 files += 1
1803 files += 1
1759 f = chunkdata[b"filename"]
1804 f = chunkdata[b"filename"]
1760 repo.ui.debug(b"adding %s revisions\n" % f)
1805 repo.ui.debug(b"adding %s revisions\n" % f)
1761 progress.increment()
1806 progress.increment()
1762 fl = repo.file(f)
1807 fl = repo.file(f)
1763 o = len(fl)
1808 o = len(fl)
1764 try:
1809 try:
1765 deltas = source.deltaiter()
1810 deltas = source.deltaiter()
1766 if not fl.addgroup(deltas, revmap, trp):
1811 if not fl.addgroup(deltas, revmap, trp):
1767 raise error.Abort(_(b"received file revlog group is empty"))
1812 raise error.Abort(_(b"received file revlog group is empty"))
1768 except error.CensoredBaseError as e:
1813 except error.CensoredBaseError as e:
1769 raise error.Abort(_(b"received delta base is censored: %s") % e)
1814 raise error.Abort(_(b"received delta base is censored: %s") % e)
1770 revisions += len(fl) - o
1815 revisions += len(fl) - o
1771 if f in needfiles:
1816 if f in needfiles:
1772 needs = needfiles[f]
1817 needs = needfiles[f]
1773 for new in pycompat.xrange(o, len(fl)):
1818 for new in pycompat.xrange(o, len(fl)):
1774 n = fl.node(new)
1819 n = fl.node(new)
1775 if n in needs:
1820 if n in needs:
1776 needs.remove(n)
1821 needs.remove(n)
1777 else:
1822 else:
1778 raise error.Abort(_(b"received spurious file revlog entry"))
1823 raise error.Abort(_(b"received spurious file revlog entry"))
1779 if not needs:
1824 if not needs:
1780 del needfiles[f]
1825 del needfiles[f]
1781 progress.complete()
1826 progress.complete()
1782
1827
1783 for f, needs in pycompat.iteritems(needfiles):
1828 for f, needs in pycompat.iteritems(needfiles):
1784 fl = repo.file(f)
1829 fl = repo.file(f)
1785 for n in needs:
1830 for n in needs:
1786 try:
1831 try:
1787 fl.rev(n)
1832 fl.rev(n)
1788 except error.LookupError:
1833 except error.LookupError:
1789 raise error.Abort(
1834 raise error.Abort(
1790 _(b'missing file data for %s:%s - run hg verify')
1835 _(b'missing file data for %s:%s - run hg verify')
1791 % (f, hex(n))
1836 % (f, hex(n))
1792 )
1837 )
1793
1838
1794 return revisions, files
1839 return revisions, files
1840
1841
1842 def get_sidedata_helpers(repo, remote_sd_categories, pull=False):
1843 # Computers for computing sidedata on-the-fly
1844 sd_computers = collections.defaultdict(list)
1845 # Computers for categories to remove from sidedata
1846 sd_removers = collections.defaultdict(list)
1847
1848 to_generate = remote_sd_categories - repo._wanted_sidedata
1849 to_remove = repo._wanted_sidedata - remote_sd_categories
1850 if pull:
1851 to_generate, to_remove = to_remove, to_generate
1852
1853 for revlog_kind, computers in repo._sidedata_computers.items():
1854 for category, computer in computers.items():
1855 if category in to_generate:
1856 sd_computers[revlog_kind].append(computer)
1857 if category in to_remove:
1858 sd_removers[revlog_kind].append(computer)
1859
1860 sidedata_helpers = (repo, sd_computers, sd_removers)
1861 return sidedata_helpers
@@ -1,292 +1,294 b''
1 # filelog.py - file history class for mercurial
1 # filelog.py - file history class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 from .i18n import _
10 from .i18n import _
11 from .node import (
11 from .node import (
12 nullid,
12 nullid,
13 nullrev,
13 nullrev,
14 )
14 )
15 from . import (
15 from . import (
16 error,
16 error,
17 revlog,
17 revlog,
18 )
18 )
19 from .interfaces import (
19 from .interfaces import (
20 repository,
20 repository,
21 util as interfaceutil,
21 util as interfaceutil,
22 )
22 )
23 from .utils import storageutil
23 from .utils import storageutil
24
24
25
25
26 @interfaceutil.implementer(repository.ifilestorage)
26 @interfaceutil.implementer(repository.ifilestorage)
27 class filelog(object):
27 class filelog(object):
28 def __init__(self, opener, path):
28 def __init__(self, opener, path):
29 self._revlog = revlog.revlog(
29 self._revlog = revlog.revlog(
30 opener, b'/'.join((b'data', path + b'.i')), censorable=True
30 opener, b'/'.join((b'data', path + b'.i')), censorable=True
31 )
31 )
32 # Full name of the user visible file, relative to the repository root.
32 # Full name of the user visible file, relative to the repository root.
33 # Used by LFS.
33 # Used by LFS.
34 self._revlog.filename = path
34 self._revlog.filename = path
35 self._revlog.revlog_kind = b'filelog'
35 self._revlog.revlog_kind = b'filelog'
36
36
37 def __len__(self):
37 def __len__(self):
38 return len(self._revlog)
38 return len(self._revlog)
39
39
40 def __iter__(self):
40 def __iter__(self):
41 return self._revlog.__iter__()
41 return self._revlog.__iter__()
42
42
43 def hasnode(self, node):
43 def hasnode(self, node):
44 if node in (nullid, nullrev):
44 if node in (nullid, nullrev):
45 return False
45 return False
46
46
47 try:
47 try:
48 self._revlog.rev(node)
48 self._revlog.rev(node)
49 return True
49 return True
50 except (TypeError, ValueError, IndexError, error.LookupError):
50 except (TypeError, ValueError, IndexError, error.LookupError):
51 return False
51 return False
52
52
53 def revs(self, start=0, stop=None):
53 def revs(self, start=0, stop=None):
54 return self._revlog.revs(start=start, stop=stop)
54 return self._revlog.revs(start=start, stop=stop)
55
55
56 def parents(self, node):
56 def parents(self, node):
57 return self._revlog.parents(node)
57 return self._revlog.parents(node)
58
58
59 def parentrevs(self, rev):
59 def parentrevs(self, rev):
60 return self._revlog.parentrevs(rev)
60 return self._revlog.parentrevs(rev)
61
61
62 def rev(self, node):
62 def rev(self, node):
63 return self._revlog.rev(node)
63 return self._revlog.rev(node)
64
64
65 def node(self, rev):
65 def node(self, rev):
66 return self._revlog.node(rev)
66 return self._revlog.node(rev)
67
67
68 def lookup(self, node):
68 def lookup(self, node):
69 return storageutil.fileidlookup(
69 return storageutil.fileidlookup(
70 self._revlog, node, self._revlog.indexfile
70 self._revlog, node, self._revlog.indexfile
71 )
71 )
72
72
73 def linkrev(self, rev):
73 def linkrev(self, rev):
74 return self._revlog.linkrev(rev)
74 return self._revlog.linkrev(rev)
75
75
76 def commonancestorsheads(self, node1, node2):
76 def commonancestorsheads(self, node1, node2):
77 return self._revlog.commonancestorsheads(node1, node2)
77 return self._revlog.commonancestorsheads(node1, node2)
78
78
79 # Used by dagop.blockdescendants().
79 # Used by dagop.blockdescendants().
80 def descendants(self, revs):
80 def descendants(self, revs):
81 return self._revlog.descendants(revs)
81 return self._revlog.descendants(revs)
82
82
83 def heads(self, start=None, stop=None):
83 def heads(self, start=None, stop=None):
84 return self._revlog.heads(start, stop)
84 return self._revlog.heads(start, stop)
85
85
86 # Used by hgweb, children extension.
86 # Used by hgweb, children extension.
87 def children(self, node):
87 def children(self, node):
88 return self._revlog.children(node)
88 return self._revlog.children(node)
89
89
90 def iscensored(self, rev):
90 def iscensored(self, rev):
91 return self._revlog.iscensored(rev)
91 return self._revlog.iscensored(rev)
92
92
93 def revision(self, node, _df=None, raw=False):
93 def revision(self, node, _df=None, raw=False):
94 return self._revlog.revision(node, _df=_df, raw=raw)
94 return self._revlog.revision(node, _df=_df, raw=raw)
95
95
96 def rawdata(self, node, _df=None):
96 def rawdata(self, node, _df=None):
97 return self._revlog.rawdata(node, _df=_df)
97 return self._revlog.rawdata(node, _df=_df)
98
98
99 def emitrevisions(
99 def emitrevisions(
100 self,
100 self,
101 nodes,
101 nodes,
102 nodesorder=None,
102 nodesorder=None,
103 revisiondata=False,
103 revisiondata=False,
104 assumehaveparentrevisions=False,
104 assumehaveparentrevisions=False,
105 deltamode=repository.CG_DELTAMODE_STD,
105 deltamode=repository.CG_DELTAMODE_STD,
106 sidedata_helpers=None,
106 ):
107 ):
107 return self._revlog.emitrevisions(
108 return self._revlog.emitrevisions(
108 nodes,
109 nodes,
109 nodesorder=nodesorder,
110 nodesorder=nodesorder,
110 revisiondata=revisiondata,
111 revisiondata=revisiondata,
111 assumehaveparentrevisions=assumehaveparentrevisions,
112 assumehaveparentrevisions=assumehaveparentrevisions,
112 deltamode=deltamode,
113 deltamode=deltamode,
114 sidedata_helpers=sidedata_helpers,
113 )
115 )
114
116
115 def addrevision(
117 def addrevision(
116 self,
118 self,
117 revisiondata,
119 revisiondata,
118 transaction,
120 transaction,
119 linkrev,
121 linkrev,
120 p1,
122 p1,
121 p2,
123 p2,
122 node=None,
124 node=None,
123 flags=revlog.REVIDX_DEFAULT_FLAGS,
125 flags=revlog.REVIDX_DEFAULT_FLAGS,
124 cachedelta=None,
126 cachedelta=None,
125 ):
127 ):
126 return self._revlog.addrevision(
128 return self._revlog.addrevision(
127 revisiondata,
129 revisiondata,
128 transaction,
130 transaction,
129 linkrev,
131 linkrev,
130 p1,
132 p1,
131 p2,
133 p2,
132 node=node,
134 node=node,
133 flags=flags,
135 flags=flags,
134 cachedelta=cachedelta,
136 cachedelta=cachedelta,
135 )
137 )
136
138
137 def addgroup(
139 def addgroup(
138 self,
140 self,
139 deltas,
141 deltas,
140 linkmapper,
142 linkmapper,
141 transaction,
143 transaction,
142 addrevisioncb=None,
144 addrevisioncb=None,
143 duplicaterevisioncb=None,
145 duplicaterevisioncb=None,
144 maybemissingparents=False,
146 maybemissingparents=False,
145 ):
147 ):
146 if maybemissingparents:
148 if maybemissingparents:
147 raise error.Abort(
149 raise error.Abort(
148 _(
150 _(
149 b'revlog storage does not support missing '
151 b'revlog storage does not support missing '
150 b'parents write mode'
152 b'parents write mode'
151 )
153 )
152 )
154 )
153
155
154 return self._revlog.addgroup(
156 return self._revlog.addgroup(
155 deltas,
157 deltas,
156 linkmapper,
158 linkmapper,
157 transaction,
159 transaction,
158 addrevisioncb=addrevisioncb,
160 addrevisioncb=addrevisioncb,
159 duplicaterevisioncb=duplicaterevisioncb,
161 duplicaterevisioncb=duplicaterevisioncb,
160 )
162 )
161
163
162 def getstrippoint(self, minlink):
164 def getstrippoint(self, minlink):
163 return self._revlog.getstrippoint(minlink)
165 return self._revlog.getstrippoint(minlink)
164
166
165 def strip(self, minlink, transaction):
167 def strip(self, minlink, transaction):
166 return self._revlog.strip(minlink, transaction)
168 return self._revlog.strip(minlink, transaction)
167
169
168 def censorrevision(self, tr, node, tombstone=b''):
170 def censorrevision(self, tr, node, tombstone=b''):
169 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
171 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
170
172
171 def files(self):
173 def files(self):
172 return self._revlog.files()
174 return self._revlog.files()
173
175
174 def read(self, node):
176 def read(self, node):
175 return storageutil.filtermetadata(self.revision(node))
177 return storageutil.filtermetadata(self.revision(node))
176
178
177 def add(self, text, meta, transaction, link, p1=None, p2=None):
179 def add(self, text, meta, transaction, link, p1=None, p2=None):
178 if meta or text.startswith(b'\1\n'):
180 if meta or text.startswith(b'\1\n'):
179 text = storageutil.packmeta(meta, text)
181 text = storageutil.packmeta(meta, text)
180 rev = self.addrevision(text, transaction, link, p1, p2)
182 rev = self.addrevision(text, transaction, link, p1, p2)
181 return self.node(rev)
183 return self.node(rev)
182
184
183 def renamed(self, node):
185 def renamed(self, node):
184 return storageutil.filerevisioncopied(self, node)
186 return storageutil.filerevisioncopied(self, node)
185
187
186 def size(self, rev):
188 def size(self, rev):
187 """return the size of a given revision"""
189 """return the size of a given revision"""
188
190
189 # for revisions with renames, we have to go the slow way
191 # for revisions with renames, we have to go the slow way
190 node = self.node(rev)
192 node = self.node(rev)
191 if self.renamed(node):
193 if self.renamed(node):
192 return len(self.read(node))
194 return len(self.read(node))
193 if self.iscensored(rev):
195 if self.iscensored(rev):
194 return 0
196 return 0
195
197
196 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
198 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
197 return self._revlog.size(rev)
199 return self._revlog.size(rev)
198
200
199 def cmp(self, node, text):
201 def cmp(self, node, text):
200 """compare text with a given file revision
202 """compare text with a given file revision
201
203
202 returns True if text is different than what is stored.
204 returns True if text is different than what is stored.
203 """
205 """
204 return not storageutil.filedataequivalent(self, node, text)
206 return not storageutil.filedataequivalent(self, node, text)
205
207
206 def verifyintegrity(self, state):
208 def verifyintegrity(self, state):
207 return self._revlog.verifyintegrity(state)
209 return self._revlog.verifyintegrity(state)
208
210
209 def storageinfo(
211 def storageinfo(
210 self,
212 self,
211 exclusivefiles=False,
213 exclusivefiles=False,
212 sharedfiles=False,
214 sharedfiles=False,
213 revisionscount=False,
215 revisionscount=False,
214 trackedsize=False,
216 trackedsize=False,
215 storedsize=False,
217 storedsize=False,
216 ):
218 ):
217 return self._revlog.storageinfo(
219 return self._revlog.storageinfo(
218 exclusivefiles=exclusivefiles,
220 exclusivefiles=exclusivefiles,
219 sharedfiles=sharedfiles,
221 sharedfiles=sharedfiles,
220 revisionscount=revisionscount,
222 revisionscount=revisionscount,
221 trackedsize=trackedsize,
223 trackedsize=trackedsize,
222 storedsize=storedsize,
224 storedsize=storedsize,
223 )
225 )
224
226
225 # TODO these aren't part of the interface and aren't internal methods.
227 # TODO these aren't part of the interface and aren't internal methods.
226 # Callers should be fixed to not use them.
228 # Callers should be fixed to not use them.
227
229
228 # Used by bundlefilelog, unionfilelog.
230 # Used by bundlefilelog, unionfilelog.
229 @property
231 @property
230 def indexfile(self):
232 def indexfile(self):
231 return self._revlog.indexfile
233 return self._revlog.indexfile
232
234
233 @indexfile.setter
235 @indexfile.setter
234 def indexfile(self, value):
236 def indexfile(self, value):
235 self._revlog.indexfile = value
237 self._revlog.indexfile = value
236
238
237 # Used by repo upgrade.
239 # Used by repo upgrade.
238 def clone(self, tr, destrevlog, **kwargs):
240 def clone(self, tr, destrevlog, **kwargs):
239 if not isinstance(destrevlog, filelog):
241 if not isinstance(destrevlog, filelog):
240 raise error.ProgrammingError(b'expected filelog to clone()')
242 raise error.ProgrammingError(b'expected filelog to clone()')
241
243
242 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
244 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
243
245
244
246
245 class narrowfilelog(filelog):
247 class narrowfilelog(filelog):
246 """Filelog variation to be used with narrow stores."""
248 """Filelog variation to be used with narrow stores."""
247
249
248 def __init__(self, opener, path, narrowmatch):
250 def __init__(self, opener, path, narrowmatch):
249 super(narrowfilelog, self).__init__(opener, path)
251 super(narrowfilelog, self).__init__(opener, path)
250 self._narrowmatch = narrowmatch
252 self._narrowmatch = narrowmatch
251
253
252 def renamed(self, node):
254 def renamed(self, node):
253 res = super(narrowfilelog, self).renamed(node)
255 res = super(narrowfilelog, self).renamed(node)
254
256
255 # Renames that come from outside the narrowspec are problematic
257 # Renames that come from outside the narrowspec are problematic
256 # because we may lack the base text for the rename. This can result
258 # because we may lack the base text for the rename. This can result
257 # in code attempting to walk the ancestry or compute a diff
259 # in code attempting to walk the ancestry or compute a diff
258 # encountering a missing revision. We address this by silently
260 # encountering a missing revision. We address this by silently
259 # removing rename metadata if the source file is outside the
261 # removing rename metadata if the source file is outside the
260 # narrow spec.
262 # narrow spec.
261 #
263 #
262 # A better solution would be to see if the base revision is available,
264 # A better solution would be to see if the base revision is available,
263 # rather than assuming it isn't.
265 # rather than assuming it isn't.
264 #
266 #
265 # An even better solution would be to teach all consumers of rename
267 # An even better solution would be to teach all consumers of rename
266 # metadata that the base revision may not be available.
268 # metadata that the base revision may not be available.
267 #
269 #
268 # TODO consider better ways of doing this.
270 # TODO consider better ways of doing this.
269 if res and not self._narrowmatch(res[0]):
271 if res and not self._narrowmatch(res[0]):
270 return None
272 return None
271
273
272 return res
274 return res
273
275
274 def size(self, rev):
276 def size(self, rev):
275 # Because we have a custom renamed() that may lie, we need to call
277 # Because we have a custom renamed() that may lie, we need to call
276 # the base renamed() to report accurate results.
278 # the base renamed() to report accurate results.
277 node = self.node(rev)
279 node = self.node(rev)
278 if super(narrowfilelog, self).renamed(node):
280 if super(narrowfilelog, self).renamed(node):
279 return len(self.read(node))
281 return len(self.read(node))
280 else:
282 else:
281 return super(narrowfilelog, self).size(rev)
283 return super(narrowfilelog, self).size(rev)
282
284
283 def cmp(self, node, text):
285 def cmp(self, node, text):
284 # We don't call `super` because narrow parents can be buggy in case of a
286 # We don't call `super` because narrow parents can be buggy in case of a
285 # ambiguous dirstate. Always take the slow path until there is a better
287 # ambiguous dirstate. Always take the slow path until there is a better
286 # fix, see issue6150.
288 # fix, see issue6150.
287
289
288 # Censored files compare against the empty file.
290 # Censored files compare against the empty file.
289 if self.iscensored(self.rev(node)):
291 if self.iscensored(self.rev(node)):
290 return text != b''
292 return text != b''
291
293
292 return self.read(node) != text
294 return self.read(node) != text
@@ -1,2355 +1,2357 b''
1 # manifest.py - manifest revision class for mercurial
1 # manifest.py - manifest revision class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import heapq
10 import heapq
11 import itertools
11 import itertools
12 import struct
12 import struct
13 import weakref
13 import weakref
14
14
15 from .i18n import _
15 from .i18n import _
16 from .node import (
16 from .node import (
17 bin,
17 bin,
18 hex,
18 hex,
19 nullid,
19 nullid,
20 nullrev,
20 nullrev,
21 )
21 )
22 from .pycompat import getattr
22 from .pycompat import getattr
23 from . import (
23 from . import (
24 encoding,
24 encoding,
25 error,
25 error,
26 match as matchmod,
26 match as matchmod,
27 mdiff,
27 mdiff,
28 pathutil,
28 pathutil,
29 policy,
29 policy,
30 pycompat,
30 pycompat,
31 revlog,
31 revlog,
32 util,
32 util,
33 )
33 )
34 from .interfaces import (
34 from .interfaces import (
35 repository,
35 repository,
36 util as interfaceutil,
36 util as interfaceutil,
37 )
37 )
38
38
39 parsers = policy.importmod('parsers')
39 parsers = policy.importmod('parsers')
40 propertycache = util.propertycache
40 propertycache = util.propertycache
41
41
42 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
42 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
43 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
43 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
44
44
45
45
46 def _parse(data):
46 def _parse(data):
47 # This method does a little bit of excessive-looking
47 # This method does a little bit of excessive-looking
48 # precondition checking. This is so that the behavior of this
48 # precondition checking. This is so that the behavior of this
49 # class exactly matches its C counterpart to try and help
49 # class exactly matches its C counterpart to try and help
50 # prevent surprise breakage for anyone that develops against
50 # prevent surprise breakage for anyone that develops against
51 # the pure version.
51 # the pure version.
52 if data and data[-1:] != b'\n':
52 if data and data[-1:] != b'\n':
53 raise ValueError(b'Manifest did not end in a newline.')
53 raise ValueError(b'Manifest did not end in a newline.')
54 prev = None
54 prev = None
55 for l in data.splitlines():
55 for l in data.splitlines():
56 if prev is not None and prev > l:
56 if prev is not None and prev > l:
57 raise ValueError(b'Manifest lines not in sorted order.')
57 raise ValueError(b'Manifest lines not in sorted order.')
58 prev = l
58 prev = l
59 f, n = l.split(b'\0')
59 f, n = l.split(b'\0')
60 nl = len(n)
60 nl = len(n)
61 flags = n[-1:]
61 flags = n[-1:]
62 if flags in _manifestflags:
62 if flags in _manifestflags:
63 n = n[:-1]
63 n = n[:-1]
64 nl -= 1
64 nl -= 1
65 else:
65 else:
66 flags = b''
66 flags = b''
67 if nl not in (40, 64):
67 if nl not in (40, 64):
68 raise ValueError(b'Invalid manifest line')
68 raise ValueError(b'Invalid manifest line')
69
69
70 yield f, bin(n), flags
70 yield f, bin(n), flags
71
71
72
72
73 def _text(it):
73 def _text(it):
74 files = []
74 files = []
75 lines = []
75 lines = []
76 for f, n, fl in it:
76 for f, n, fl in it:
77 files.append(f)
77 files.append(f)
78 # if this is changed to support newlines in filenames,
78 # if this is changed to support newlines in filenames,
79 # be sure to check the templates/ dir again (especially *-raw.tmpl)
79 # be sure to check the templates/ dir again (especially *-raw.tmpl)
80 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
80 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
81
81
82 _checkforbidden(files)
82 _checkforbidden(files)
83 return b''.join(lines)
83 return b''.join(lines)
84
84
85
85
86 class lazymanifestiter(object):
86 class lazymanifestiter(object):
87 def __init__(self, lm):
87 def __init__(self, lm):
88 self.pos = 0
88 self.pos = 0
89 self.lm = lm
89 self.lm = lm
90
90
91 def __iter__(self):
91 def __iter__(self):
92 return self
92 return self
93
93
94 def next(self):
94 def next(self):
95 try:
95 try:
96 data, pos = self.lm._get(self.pos)
96 data, pos = self.lm._get(self.pos)
97 except IndexError:
97 except IndexError:
98 raise StopIteration
98 raise StopIteration
99 if pos == -1:
99 if pos == -1:
100 self.pos += 1
100 self.pos += 1
101 return data[0]
101 return data[0]
102 self.pos += 1
102 self.pos += 1
103 zeropos = data.find(b'\x00', pos)
103 zeropos = data.find(b'\x00', pos)
104 return data[pos:zeropos]
104 return data[pos:zeropos]
105
105
106 __next__ = next
106 __next__ = next
107
107
108
108
109 class lazymanifestiterentries(object):
109 class lazymanifestiterentries(object):
110 def __init__(self, lm):
110 def __init__(self, lm):
111 self.lm = lm
111 self.lm = lm
112 self.pos = 0
112 self.pos = 0
113
113
114 def __iter__(self):
114 def __iter__(self):
115 return self
115 return self
116
116
117 def next(self):
117 def next(self):
118 try:
118 try:
119 data, pos = self.lm._get(self.pos)
119 data, pos = self.lm._get(self.pos)
120 except IndexError:
120 except IndexError:
121 raise StopIteration
121 raise StopIteration
122 if pos == -1:
122 if pos == -1:
123 self.pos += 1
123 self.pos += 1
124 return data
124 return data
125 zeropos = data.find(b'\x00', pos)
125 zeropos = data.find(b'\x00', pos)
126 nlpos = data.find(b'\n', pos)
126 nlpos = data.find(b'\n', pos)
127 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
127 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
128 raise error.StorageError(b'Invalid manifest line')
128 raise error.StorageError(b'Invalid manifest line')
129 flags = data[nlpos - 1 : nlpos]
129 flags = data[nlpos - 1 : nlpos]
130 if flags in _manifestflags:
130 if flags in _manifestflags:
131 hlen = nlpos - zeropos - 2
131 hlen = nlpos - zeropos - 2
132 else:
132 else:
133 hlen = nlpos - zeropos - 1
133 hlen = nlpos - zeropos - 1
134 flags = b''
134 flags = b''
135 if hlen not in (40, 64):
135 if hlen not in (40, 64):
136 raise error.StorageError(b'Invalid manifest line')
136 raise error.StorageError(b'Invalid manifest line')
137 hashval = unhexlify(
137 hashval = unhexlify(
138 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
138 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
139 )
139 )
140 self.pos += 1
140 self.pos += 1
141 return (data[pos:zeropos], hashval, flags)
141 return (data[pos:zeropos], hashval, flags)
142
142
143 __next__ = next
143 __next__ = next
144
144
145
145
146 def unhexlify(data, extra, pos, length):
146 def unhexlify(data, extra, pos, length):
147 s = bin(data[pos : pos + length])
147 s = bin(data[pos : pos + length])
148 if extra:
148 if extra:
149 s += chr(extra & 0xFF)
149 s += chr(extra & 0xFF)
150 return s
150 return s
151
151
152
152
153 def _cmp(a, b):
153 def _cmp(a, b):
154 return (a > b) - (a < b)
154 return (a > b) - (a < b)
155
155
156
156
157 _manifestflags = {b'', b'l', b't', b'x'}
157 _manifestflags = {b'', b'l', b't', b'x'}
158
158
159
159
160 class _lazymanifest(object):
160 class _lazymanifest(object):
161 """A pure python manifest backed by a byte string. It is supplimented with
161 """A pure python manifest backed by a byte string. It is supplimented with
162 internal lists as it is modified, until it is compacted back to a pure byte
162 internal lists as it is modified, until it is compacted back to a pure byte
163 string.
163 string.
164
164
165 ``data`` is the initial manifest data.
165 ``data`` is the initial manifest data.
166
166
167 ``positions`` is a list of offsets, one per manifest entry. Positive
167 ``positions`` is a list of offsets, one per manifest entry. Positive
168 values are offsets into ``data``, negative values are offsets into the
168 values are offsets into ``data``, negative values are offsets into the
169 ``extradata`` list. When an entry is removed, its entry is dropped from
169 ``extradata`` list. When an entry is removed, its entry is dropped from
170 ``positions``. The values are encoded such that when walking the list and
170 ``positions``. The values are encoded such that when walking the list and
171 indexing into ``data`` or ``extradata`` as appropriate, the entries are
171 indexing into ``data`` or ``extradata`` as appropriate, the entries are
172 sorted by filename.
172 sorted by filename.
173
173
174 ``extradata`` is a list of (key, hash, flags) for entries that were added or
174 ``extradata`` is a list of (key, hash, flags) for entries that were added or
175 modified since the manifest was created or compacted.
175 modified since the manifest was created or compacted.
176 """
176 """
177
177
178 def __init__(
178 def __init__(
179 self,
179 self,
180 data,
180 data,
181 positions=None,
181 positions=None,
182 extrainfo=None,
182 extrainfo=None,
183 extradata=None,
183 extradata=None,
184 hasremovals=False,
184 hasremovals=False,
185 ):
185 ):
186 if positions is None:
186 if positions is None:
187 self.positions = self.findlines(data)
187 self.positions = self.findlines(data)
188 self.extrainfo = [0] * len(self.positions)
188 self.extrainfo = [0] * len(self.positions)
189 self.data = data
189 self.data = data
190 self.extradata = []
190 self.extradata = []
191 self.hasremovals = False
191 self.hasremovals = False
192 else:
192 else:
193 self.positions = positions[:]
193 self.positions = positions[:]
194 self.extrainfo = extrainfo[:]
194 self.extrainfo = extrainfo[:]
195 self.extradata = extradata[:]
195 self.extradata = extradata[:]
196 self.data = data
196 self.data = data
197 self.hasremovals = hasremovals
197 self.hasremovals = hasremovals
198
198
199 def findlines(self, data):
199 def findlines(self, data):
200 if not data:
200 if not data:
201 return []
201 return []
202 pos = data.find(b"\n")
202 pos = data.find(b"\n")
203 if pos == -1 or data[-1:] != b'\n':
203 if pos == -1 or data[-1:] != b'\n':
204 raise ValueError(b"Manifest did not end in a newline.")
204 raise ValueError(b"Manifest did not end in a newline.")
205 positions = [0]
205 positions = [0]
206 prev = data[: data.find(b'\x00')]
206 prev = data[: data.find(b'\x00')]
207 while pos < len(data) - 1 and pos != -1:
207 while pos < len(data) - 1 and pos != -1:
208 positions.append(pos + 1)
208 positions.append(pos + 1)
209 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
209 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
210 if nexts < prev:
210 if nexts < prev:
211 raise ValueError(b"Manifest lines not in sorted order.")
211 raise ValueError(b"Manifest lines not in sorted order.")
212 prev = nexts
212 prev = nexts
213 pos = data.find(b"\n", pos + 1)
213 pos = data.find(b"\n", pos + 1)
214 return positions
214 return positions
215
215
216 def _get(self, index):
216 def _get(self, index):
217 # get the position encoded in pos:
217 # get the position encoded in pos:
218 # positive number is an index in 'data'
218 # positive number is an index in 'data'
219 # negative number is in extrapieces
219 # negative number is in extrapieces
220 pos = self.positions[index]
220 pos = self.positions[index]
221 if pos >= 0:
221 if pos >= 0:
222 return self.data, pos
222 return self.data, pos
223 return self.extradata[-pos - 1], -1
223 return self.extradata[-pos - 1], -1
224
224
225 def _getkey(self, pos):
225 def _getkey(self, pos):
226 if pos >= 0:
226 if pos >= 0:
227 return self.data[pos : self.data.find(b'\x00', pos + 1)]
227 return self.data[pos : self.data.find(b'\x00', pos + 1)]
228 return self.extradata[-pos - 1][0]
228 return self.extradata[-pos - 1][0]
229
229
230 def bsearch(self, key):
230 def bsearch(self, key):
231 first = 0
231 first = 0
232 last = len(self.positions) - 1
232 last = len(self.positions) - 1
233
233
234 while first <= last:
234 while first <= last:
235 midpoint = (first + last) // 2
235 midpoint = (first + last) // 2
236 nextpos = self.positions[midpoint]
236 nextpos = self.positions[midpoint]
237 candidate = self._getkey(nextpos)
237 candidate = self._getkey(nextpos)
238 r = _cmp(key, candidate)
238 r = _cmp(key, candidate)
239 if r == 0:
239 if r == 0:
240 return midpoint
240 return midpoint
241 else:
241 else:
242 if r < 0:
242 if r < 0:
243 last = midpoint - 1
243 last = midpoint - 1
244 else:
244 else:
245 first = midpoint + 1
245 first = midpoint + 1
246 return -1
246 return -1
247
247
248 def bsearch2(self, key):
248 def bsearch2(self, key):
249 # same as the above, but will always return the position
249 # same as the above, but will always return the position
250 # done for performance reasons
250 # done for performance reasons
251 first = 0
251 first = 0
252 last = len(self.positions) - 1
252 last = len(self.positions) - 1
253
253
254 while first <= last:
254 while first <= last:
255 midpoint = (first + last) // 2
255 midpoint = (first + last) // 2
256 nextpos = self.positions[midpoint]
256 nextpos = self.positions[midpoint]
257 candidate = self._getkey(nextpos)
257 candidate = self._getkey(nextpos)
258 r = _cmp(key, candidate)
258 r = _cmp(key, candidate)
259 if r == 0:
259 if r == 0:
260 return (midpoint, True)
260 return (midpoint, True)
261 else:
261 else:
262 if r < 0:
262 if r < 0:
263 last = midpoint - 1
263 last = midpoint - 1
264 else:
264 else:
265 first = midpoint + 1
265 first = midpoint + 1
266 return (first, False)
266 return (first, False)
267
267
268 def __contains__(self, key):
268 def __contains__(self, key):
269 return self.bsearch(key) != -1
269 return self.bsearch(key) != -1
270
270
271 def __getitem__(self, key):
271 def __getitem__(self, key):
272 if not isinstance(key, bytes):
272 if not isinstance(key, bytes):
273 raise TypeError(b"getitem: manifest keys must be a bytes.")
273 raise TypeError(b"getitem: manifest keys must be a bytes.")
274 needle = self.bsearch(key)
274 needle = self.bsearch(key)
275 if needle == -1:
275 if needle == -1:
276 raise KeyError
276 raise KeyError
277 data, pos = self._get(needle)
277 data, pos = self._get(needle)
278 if pos == -1:
278 if pos == -1:
279 return (data[1], data[2])
279 return (data[1], data[2])
280 zeropos = data.find(b'\x00', pos)
280 zeropos = data.find(b'\x00', pos)
281 nlpos = data.find(b'\n', zeropos)
281 nlpos = data.find(b'\n', zeropos)
282 assert 0 <= needle <= len(self.positions)
282 assert 0 <= needle <= len(self.positions)
283 assert len(self.extrainfo) == len(self.positions)
283 assert len(self.extrainfo) == len(self.positions)
284 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
284 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
285 raise error.StorageError(b'Invalid manifest line')
285 raise error.StorageError(b'Invalid manifest line')
286 hlen = nlpos - zeropos - 1
286 hlen = nlpos - zeropos - 1
287 flags = data[nlpos - 1 : nlpos]
287 flags = data[nlpos - 1 : nlpos]
288 if flags in _manifestflags:
288 if flags in _manifestflags:
289 hlen -= 1
289 hlen -= 1
290 else:
290 else:
291 flags = b''
291 flags = b''
292 if hlen not in (40, 64):
292 if hlen not in (40, 64):
293 raise error.StorageError(b'Invalid manifest line')
293 raise error.StorageError(b'Invalid manifest line')
294 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
294 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
295 return (hashval, flags)
295 return (hashval, flags)
296
296
297 def __delitem__(self, key):
297 def __delitem__(self, key):
298 needle, found = self.bsearch2(key)
298 needle, found = self.bsearch2(key)
299 if not found:
299 if not found:
300 raise KeyError
300 raise KeyError
301 cur = self.positions[needle]
301 cur = self.positions[needle]
302 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
302 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
303 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
303 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
304 if cur >= 0:
304 if cur >= 0:
305 # This does NOT unsort the list as far as the search functions are
305 # This does NOT unsort the list as far as the search functions are
306 # concerned, as they only examine lines mapped by self.positions.
306 # concerned, as they only examine lines mapped by self.positions.
307 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
307 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
308 self.hasremovals = True
308 self.hasremovals = True
309
309
310 def __setitem__(self, key, value):
310 def __setitem__(self, key, value):
311 if not isinstance(key, bytes):
311 if not isinstance(key, bytes):
312 raise TypeError(b"setitem: manifest keys must be a byte string.")
312 raise TypeError(b"setitem: manifest keys must be a byte string.")
313 if not isinstance(value, tuple) or len(value) != 2:
313 if not isinstance(value, tuple) or len(value) != 2:
314 raise TypeError(
314 raise TypeError(
315 b"Manifest values must be a tuple of (node, flags)."
315 b"Manifest values must be a tuple of (node, flags)."
316 )
316 )
317 hashval = value[0]
317 hashval = value[0]
318 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
318 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
319 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
319 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
320 flags = value[1]
320 flags = value[1]
321 if not isinstance(flags, bytes) or len(flags) > 1:
321 if not isinstance(flags, bytes) or len(flags) > 1:
322 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
322 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
323 needle, found = self.bsearch2(key)
323 needle, found = self.bsearch2(key)
324 if found:
324 if found:
325 # put the item
325 # put the item
326 pos = self.positions[needle]
326 pos = self.positions[needle]
327 if pos < 0:
327 if pos < 0:
328 self.extradata[-pos - 1] = (key, hashval, value[1])
328 self.extradata[-pos - 1] = (key, hashval, value[1])
329 else:
329 else:
330 # just don't bother
330 # just don't bother
331 self.extradata.append((key, hashval, value[1]))
331 self.extradata.append((key, hashval, value[1]))
332 self.positions[needle] = -len(self.extradata)
332 self.positions[needle] = -len(self.extradata)
333 else:
333 else:
334 # not found, put it in with extra positions
334 # not found, put it in with extra positions
335 self.extradata.append((key, hashval, value[1]))
335 self.extradata.append((key, hashval, value[1]))
336 self.positions = (
336 self.positions = (
337 self.positions[:needle]
337 self.positions[:needle]
338 + [-len(self.extradata)]
338 + [-len(self.extradata)]
339 + self.positions[needle:]
339 + self.positions[needle:]
340 )
340 )
341 self.extrainfo = (
341 self.extrainfo = (
342 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
342 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
343 )
343 )
344
344
345 def copy(self):
345 def copy(self):
346 # XXX call _compact like in C?
346 # XXX call _compact like in C?
347 return _lazymanifest(
347 return _lazymanifest(
348 self.data,
348 self.data,
349 self.positions,
349 self.positions,
350 self.extrainfo,
350 self.extrainfo,
351 self.extradata,
351 self.extradata,
352 self.hasremovals,
352 self.hasremovals,
353 )
353 )
354
354
355 def _compact(self):
355 def _compact(self):
356 # hopefully not called TOO often
356 # hopefully not called TOO often
357 if len(self.extradata) == 0 and not self.hasremovals:
357 if len(self.extradata) == 0 and not self.hasremovals:
358 return
358 return
359 l = []
359 l = []
360 i = 0
360 i = 0
361 offset = 0
361 offset = 0
362 self.extrainfo = [0] * len(self.positions)
362 self.extrainfo = [0] * len(self.positions)
363 while i < len(self.positions):
363 while i < len(self.positions):
364 if self.positions[i] >= 0:
364 if self.positions[i] >= 0:
365 cur = self.positions[i]
365 cur = self.positions[i]
366 last_cut = cur
366 last_cut = cur
367
367
368 # Collect all contiguous entries in the buffer at the current
368 # Collect all contiguous entries in the buffer at the current
369 # offset, breaking out only for added/modified items held in
369 # offset, breaking out only for added/modified items held in
370 # extradata, or a deleted line prior to the next position.
370 # extradata, or a deleted line prior to the next position.
371 while True:
371 while True:
372 self.positions[i] = offset
372 self.positions[i] = offset
373 i += 1
373 i += 1
374 if i == len(self.positions) or self.positions[i] < 0:
374 if i == len(self.positions) or self.positions[i] < 0:
375 break
375 break
376
376
377 # A removed file has no positions[] entry, but does have an
377 # A removed file has no positions[] entry, but does have an
378 # overwritten first byte. Break out and find the end of the
378 # overwritten first byte. Break out and find the end of the
379 # current good entry/entries if there is a removed file
379 # current good entry/entries if there is a removed file
380 # before the next position.
380 # before the next position.
381 if (
381 if (
382 self.hasremovals
382 self.hasremovals
383 and self.data.find(b'\n\x00', cur, self.positions[i])
383 and self.data.find(b'\n\x00', cur, self.positions[i])
384 != -1
384 != -1
385 ):
385 ):
386 break
386 break
387
387
388 offset += self.positions[i] - cur
388 offset += self.positions[i] - cur
389 cur = self.positions[i]
389 cur = self.positions[i]
390 end_cut = self.data.find(b'\n', cur)
390 end_cut = self.data.find(b'\n', cur)
391 if end_cut != -1:
391 if end_cut != -1:
392 end_cut += 1
392 end_cut += 1
393 offset += end_cut - cur
393 offset += end_cut - cur
394 l.append(self.data[last_cut:end_cut])
394 l.append(self.data[last_cut:end_cut])
395 else:
395 else:
396 while i < len(self.positions) and self.positions[i] < 0:
396 while i < len(self.positions) and self.positions[i] < 0:
397 cur = self.positions[i]
397 cur = self.positions[i]
398 t = self.extradata[-cur - 1]
398 t = self.extradata[-cur - 1]
399 l.append(self._pack(t))
399 l.append(self._pack(t))
400 self.positions[i] = offset
400 self.positions[i] = offset
401 # Hashes are either 20 bytes (old sha1s) or 32
401 # Hashes are either 20 bytes (old sha1s) or 32
402 # bytes (new non-sha1).
402 # bytes (new non-sha1).
403 hlen = 20
403 hlen = 20
404 if len(t[1]) > 25:
404 if len(t[1]) > 25:
405 hlen = 32
405 hlen = 32
406 if len(t[1]) > hlen:
406 if len(t[1]) > hlen:
407 self.extrainfo[i] = ord(t[1][hlen + 1])
407 self.extrainfo[i] = ord(t[1][hlen + 1])
408 offset += len(l[-1])
408 offset += len(l[-1])
409 i += 1
409 i += 1
410 self.data = b''.join(l)
410 self.data = b''.join(l)
411 self.hasremovals = False
411 self.hasremovals = False
412 self.extradata = []
412 self.extradata = []
413
413
414 def _pack(self, d):
414 def _pack(self, d):
415 n = d[1]
415 n = d[1]
416 assert len(n) in (20, 32)
416 assert len(n) in (20, 32)
417 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
417 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
418
418
419 def text(self):
419 def text(self):
420 self._compact()
420 self._compact()
421 return self.data
421 return self.data
422
422
423 def diff(self, m2, clean=False):
423 def diff(self, m2, clean=False):
424 '''Finds changes between the current manifest and m2.'''
424 '''Finds changes between the current manifest and m2.'''
425 # XXX think whether efficiency matters here
425 # XXX think whether efficiency matters here
426 diff = {}
426 diff = {}
427
427
428 for fn, e1, flags in self.iterentries():
428 for fn, e1, flags in self.iterentries():
429 if fn not in m2:
429 if fn not in m2:
430 diff[fn] = (e1, flags), (None, b'')
430 diff[fn] = (e1, flags), (None, b'')
431 else:
431 else:
432 e2 = m2[fn]
432 e2 = m2[fn]
433 if (e1, flags) != e2:
433 if (e1, flags) != e2:
434 diff[fn] = (e1, flags), e2
434 diff[fn] = (e1, flags), e2
435 elif clean:
435 elif clean:
436 diff[fn] = None
436 diff[fn] = None
437
437
438 for fn, e2, flags in m2.iterentries():
438 for fn, e2, flags in m2.iterentries():
439 if fn not in self:
439 if fn not in self:
440 diff[fn] = (None, b''), (e2, flags)
440 diff[fn] = (None, b''), (e2, flags)
441
441
442 return diff
442 return diff
443
443
444 def iterentries(self):
444 def iterentries(self):
445 return lazymanifestiterentries(self)
445 return lazymanifestiterentries(self)
446
446
447 def iterkeys(self):
447 def iterkeys(self):
448 return lazymanifestiter(self)
448 return lazymanifestiter(self)
449
449
450 def __iter__(self):
450 def __iter__(self):
451 return lazymanifestiter(self)
451 return lazymanifestiter(self)
452
452
453 def __len__(self):
453 def __len__(self):
454 return len(self.positions)
454 return len(self.positions)
455
455
456 def filtercopy(self, filterfn):
456 def filtercopy(self, filterfn):
457 # XXX should be optimized
457 # XXX should be optimized
458 c = _lazymanifest(b'')
458 c = _lazymanifest(b'')
459 for f, n, fl in self.iterentries():
459 for f, n, fl in self.iterentries():
460 if filterfn(f):
460 if filterfn(f):
461 c[f] = n, fl
461 c[f] = n, fl
462 return c
462 return c
463
463
464
464
465 try:
465 try:
466 _lazymanifest = parsers.lazymanifest
466 _lazymanifest = parsers.lazymanifest
467 except AttributeError:
467 except AttributeError:
468 pass
468 pass
469
469
470
470
471 @interfaceutil.implementer(repository.imanifestdict)
471 @interfaceutil.implementer(repository.imanifestdict)
472 class manifestdict(object):
472 class manifestdict(object):
473 def __init__(self, data=b''):
473 def __init__(self, data=b''):
474 self._lm = _lazymanifest(data)
474 self._lm = _lazymanifest(data)
475
475
476 def __getitem__(self, key):
476 def __getitem__(self, key):
477 return self._lm[key][0]
477 return self._lm[key][0]
478
478
479 def find(self, key):
479 def find(self, key):
480 return self._lm[key]
480 return self._lm[key]
481
481
482 def __len__(self):
482 def __len__(self):
483 return len(self._lm)
483 return len(self._lm)
484
484
485 def __nonzero__(self):
485 def __nonzero__(self):
486 # nonzero is covered by the __len__ function, but implementing it here
486 # nonzero is covered by the __len__ function, but implementing it here
487 # makes it easier for extensions to override.
487 # makes it easier for extensions to override.
488 return len(self._lm) != 0
488 return len(self._lm) != 0
489
489
490 __bool__ = __nonzero__
490 __bool__ = __nonzero__
491
491
492 def __setitem__(self, key, node):
492 def __setitem__(self, key, node):
493 self._lm[key] = node, self.flags(key)
493 self._lm[key] = node, self.flags(key)
494
494
495 def __contains__(self, key):
495 def __contains__(self, key):
496 if key is None:
496 if key is None:
497 return False
497 return False
498 return key in self._lm
498 return key in self._lm
499
499
500 def __delitem__(self, key):
500 def __delitem__(self, key):
501 del self._lm[key]
501 del self._lm[key]
502
502
503 def __iter__(self):
503 def __iter__(self):
504 return self._lm.__iter__()
504 return self._lm.__iter__()
505
505
506 def iterkeys(self):
506 def iterkeys(self):
507 return self._lm.iterkeys()
507 return self._lm.iterkeys()
508
508
509 def keys(self):
509 def keys(self):
510 return list(self.iterkeys())
510 return list(self.iterkeys())
511
511
512 def filesnotin(self, m2, match=None):
512 def filesnotin(self, m2, match=None):
513 '''Set of files in this manifest that are not in the other'''
513 '''Set of files in this manifest that are not in the other'''
514 if match is not None:
514 if match is not None:
515 match = matchmod.badmatch(match, lambda path, msg: None)
515 match = matchmod.badmatch(match, lambda path, msg: None)
516 sm2 = set(m2.walk(match))
516 sm2 = set(m2.walk(match))
517 return {f for f in self.walk(match) if f not in sm2}
517 return {f for f in self.walk(match) if f not in sm2}
518 return {f for f in self if f not in m2}
518 return {f for f in self if f not in m2}
519
519
520 @propertycache
520 @propertycache
521 def _dirs(self):
521 def _dirs(self):
522 return pathutil.dirs(self)
522 return pathutil.dirs(self)
523
523
524 def dirs(self):
524 def dirs(self):
525 return self._dirs
525 return self._dirs
526
526
527 def hasdir(self, dir):
527 def hasdir(self, dir):
528 return dir in self._dirs
528 return dir in self._dirs
529
529
530 def _filesfastpath(self, match):
530 def _filesfastpath(self, match):
531 """Checks whether we can correctly and quickly iterate over matcher
531 """Checks whether we can correctly and quickly iterate over matcher
532 files instead of over manifest files."""
532 files instead of over manifest files."""
533 files = match.files()
533 files = match.files()
534 return len(files) < 100 and (
534 return len(files) < 100 and (
535 match.isexact()
535 match.isexact()
536 or (match.prefix() and all(fn in self for fn in files))
536 or (match.prefix() and all(fn in self for fn in files))
537 )
537 )
538
538
539 def walk(self, match):
539 def walk(self, match):
540 """Generates matching file names.
540 """Generates matching file names.
541
541
542 Equivalent to manifest.matches(match).iterkeys(), but without creating
542 Equivalent to manifest.matches(match).iterkeys(), but without creating
543 an entirely new manifest.
543 an entirely new manifest.
544
544
545 It also reports nonexistent files by marking them bad with match.bad().
545 It also reports nonexistent files by marking them bad with match.bad().
546 """
546 """
547 if match.always():
547 if match.always():
548 for f in iter(self):
548 for f in iter(self):
549 yield f
549 yield f
550 return
550 return
551
551
552 fset = set(match.files())
552 fset = set(match.files())
553
553
554 # avoid the entire walk if we're only looking for specific files
554 # avoid the entire walk if we're only looking for specific files
555 if self._filesfastpath(match):
555 if self._filesfastpath(match):
556 for fn in sorted(fset):
556 for fn in sorted(fset):
557 if fn in self:
557 if fn in self:
558 yield fn
558 yield fn
559 return
559 return
560
560
561 for fn in self:
561 for fn in self:
562 if fn in fset:
562 if fn in fset:
563 # specified pattern is the exact name
563 # specified pattern is the exact name
564 fset.remove(fn)
564 fset.remove(fn)
565 if match(fn):
565 if match(fn):
566 yield fn
566 yield fn
567
567
568 # for dirstate.walk, files=[''] means "walk the whole tree".
568 # for dirstate.walk, files=[''] means "walk the whole tree".
569 # follow that here, too
569 # follow that here, too
570 fset.discard(b'')
570 fset.discard(b'')
571
571
572 for fn in sorted(fset):
572 for fn in sorted(fset):
573 if not self.hasdir(fn):
573 if not self.hasdir(fn):
574 match.bad(fn, None)
574 match.bad(fn, None)
575
575
576 def _matches(self, match):
576 def _matches(self, match):
577 '''generate a new manifest filtered by the match argument'''
577 '''generate a new manifest filtered by the match argument'''
578 if match.always():
578 if match.always():
579 return self.copy()
579 return self.copy()
580
580
581 if self._filesfastpath(match):
581 if self._filesfastpath(match):
582 m = manifestdict()
582 m = manifestdict()
583 lm = self._lm
583 lm = self._lm
584 for fn in match.files():
584 for fn in match.files():
585 if fn in lm:
585 if fn in lm:
586 m._lm[fn] = lm[fn]
586 m._lm[fn] = lm[fn]
587 return m
587 return m
588
588
589 m = manifestdict()
589 m = manifestdict()
590 m._lm = self._lm.filtercopy(match)
590 m._lm = self._lm.filtercopy(match)
591 return m
591 return m
592
592
593 def diff(self, m2, match=None, clean=False):
593 def diff(self, m2, match=None, clean=False):
594 """Finds changes between the current manifest and m2.
594 """Finds changes between the current manifest and m2.
595
595
596 Args:
596 Args:
597 m2: the manifest to which this manifest should be compared.
597 m2: the manifest to which this manifest should be compared.
598 clean: if true, include files unchanged between these manifests
598 clean: if true, include files unchanged between these manifests
599 with a None value in the returned dictionary.
599 with a None value in the returned dictionary.
600
600
601 The result is returned as a dict with filename as key and
601 The result is returned as a dict with filename as key and
602 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
602 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
603 nodeid in the current/other manifest and fl1/fl2 is the flag
603 nodeid in the current/other manifest and fl1/fl2 is the flag
604 in the current/other manifest. Where the file does not exist,
604 in the current/other manifest. Where the file does not exist,
605 the nodeid will be None and the flags will be the empty
605 the nodeid will be None and the flags will be the empty
606 string.
606 string.
607 """
607 """
608 if match:
608 if match:
609 m1 = self._matches(match)
609 m1 = self._matches(match)
610 m2 = m2._matches(match)
610 m2 = m2._matches(match)
611 return m1.diff(m2, clean=clean)
611 return m1.diff(m2, clean=clean)
612 return self._lm.diff(m2._lm, clean)
612 return self._lm.diff(m2._lm, clean)
613
613
614 def setflag(self, key, flag):
614 def setflag(self, key, flag):
615 if flag not in _manifestflags:
615 if flag not in _manifestflags:
616 raise TypeError(b"Invalid manifest flag set.")
616 raise TypeError(b"Invalid manifest flag set.")
617 self._lm[key] = self[key], flag
617 self._lm[key] = self[key], flag
618
618
619 def get(self, key, default=None):
619 def get(self, key, default=None):
620 try:
620 try:
621 return self._lm[key][0]
621 return self._lm[key][0]
622 except KeyError:
622 except KeyError:
623 return default
623 return default
624
624
625 def flags(self, key):
625 def flags(self, key):
626 try:
626 try:
627 return self._lm[key][1]
627 return self._lm[key][1]
628 except KeyError:
628 except KeyError:
629 return b''
629 return b''
630
630
631 def copy(self):
631 def copy(self):
632 c = manifestdict()
632 c = manifestdict()
633 c._lm = self._lm.copy()
633 c._lm = self._lm.copy()
634 return c
634 return c
635
635
636 def items(self):
636 def items(self):
637 return (x[:2] for x in self._lm.iterentries())
637 return (x[:2] for x in self._lm.iterentries())
638
638
639 def iteritems(self):
639 def iteritems(self):
640 return (x[:2] for x in self._lm.iterentries())
640 return (x[:2] for x in self._lm.iterentries())
641
641
642 def iterentries(self):
642 def iterentries(self):
643 return self._lm.iterentries()
643 return self._lm.iterentries()
644
644
645 def text(self):
645 def text(self):
646 # most likely uses native version
646 # most likely uses native version
647 return self._lm.text()
647 return self._lm.text()
648
648
649 def fastdelta(self, base, changes):
649 def fastdelta(self, base, changes):
650 """Given a base manifest text as a bytearray and a list of changes
650 """Given a base manifest text as a bytearray and a list of changes
651 relative to that text, compute a delta that can be used by revlog.
651 relative to that text, compute a delta that can be used by revlog.
652 """
652 """
653 delta = []
653 delta = []
654 dstart = None
654 dstart = None
655 dend = None
655 dend = None
656 dline = [b""]
656 dline = [b""]
657 start = 0
657 start = 0
658 # zero copy representation of base as a buffer
658 # zero copy representation of base as a buffer
659 addbuf = util.buffer(base)
659 addbuf = util.buffer(base)
660
660
661 changes = list(changes)
661 changes = list(changes)
662 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
662 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
663 # start with a readonly loop that finds the offset of
663 # start with a readonly loop that finds the offset of
664 # each line and creates the deltas
664 # each line and creates the deltas
665 for f, todelete in changes:
665 for f, todelete in changes:
666 # bs will either be the index of the item or the insert point
666 # bs will either be the index of the item or the insert point
667 start, end = _msearch(addbuf, f, start)
667 start, end = _msearch(addbuf, f, start)
668 if not todelete:
668 if not todelete:
669 h, fl = self._lm[f]
669 h, fl = self._lm[f]
670 l = b"%s\0%s%s\n" % (f, hex(h), fl)
670 l = b"%s\0%s%s\n" % (f, hex(h), fl)
671 else:
671 else:
672 if start == end:
672 if start == end:
673 # item we want to delete was not found, error out
673 # item we want to delete was not found, error out
674 raise AssertionError(
674 raise AssertionError(
675 _(b"failed to remove %s from manifest") % f
675 _(b"failed to remove %s from manifest") % f
676 )
676 )
677 l = b""
677 l = b""
678 if dstart is not None and dstart <= start and dend >= start:
678 if dstart is not None and dstart <= start and dend >= start:
679 if dend < end:
679 if dend < end:
680 dend = end
680 dend = end
681 if l:
681 if l:
682 dline.append(l)
682 dline.append(l)
683 else:
683 else:
684 if dstart is not None:
684 if dstart is not None:
685 delta.append([dstart, dend, b"".join(dline)])
685 delta.append([dstart, dend, b"".join(dline)])
686 dstart = start
686 dstart = start
687 dend = end
687 dend = end
688 dline = [l]
688 dline = [l]
689
689
690 if dstart is not None:
690 if dstart is not None:
691 delta.append([dstart, dend, b"".join(dline)])
691 delta.append([dstart, dend, b"".join(dline)])
692 # apply the delta to the base, and get a delta for addrevision
692 # apply the delta to the base, and get a delta for addrevision
693 deltatext, arraytext = _addlistdelta(base, delta)
693 deltatext, arraytext = _addlistdelta(base, delta)
694 else:
694 else:
695 # For large changes, it's much cheaper to just build the text and
695 # For large changes, it's much cheaper to just build the text and
696 # diff it.
696 # diff it.
697 arraytext = bytearray(self.text())
697 arraytext = bytearray(self.text())
698 deltatext = mdiff.textdiff(
698 deltatext = mdiff.textdiff(
699 util.buffer(base), util.buffer(arraytext)
699 util.buffer(base), util.buffer(arraytext)
700 )
700 )
701
701
702 return arraytext, deltatext
702 return arraytext, deltatext
703
703
704
704
705 def _msearch(m, s, lo=0, hi=None):
705 def _msearch(m, s, lo=0, hi=None):
706 """return a tuple (start, end) that says where to find s within m.
706 """return a tuple (start, end) that says where to find s within m.
707
707
708 If the string is found m[start:end] are the line containing
708 If the string is found m[start:end] are the line containing
709 that string. If start == end the string was not found and
709 that string. If start == end the string was not found and
710 they indicate the proper sorted insertion point.
710 they indicate the proper sorted insertion point.
711
711
712 m should be a buffer, a memoryview or a byte string.
712 m should be a buffer, a memoryview or a byte string.
713 s is a byte string"""
713 s is a byte string"""
714
714
715 def advance(i, c):
715 def advance(i, c):
716 while i < lenm and m[i : i + 1] != c:
716 while i < lenm and m[i : i + 1] != c:
717 i += 1
717 i += 1
718 return i
718 return i
719
719
720 if not s:
720 if not s:
721 return (lo, lo)
721 return (lo, lo)
722 lenm = len(m)
722 lenm = len(m)
723 if not hi:
723 if not hi:
724 hi = lenm
724 hi = lenm
725 while lo < hi:
725 while lo < hi:
726 mid = (lo + hi) // 2
726 mid = (lo + hi) // 2
727 start = mid
727 start = mid
728 while start > 0 and m[start - 1 : start] != b'\n':
728 while start > 0 and m[start - 1 : start] != b'\n':
729 start -= 1
729 start -= 1
730 end = advance(start, b'\0')
730 end = advance(start, b'\0')
731 if bytes(m[start:end]) < s:
731 if bytes(m[start:end]) < s:
732 # we know that after the null there are 40 bytes of sha1
732 # we know that after the null there are 40 bytes of sha1
733 # this translates to the bisect lo = mid + 1
733 # this translates to the bisect lo = mid + 1
734 lo = advance(end + 40, b'\n') + 1
734 lo = advance(end + 40, b'\n') + 1
735 else:
735 else:
736 # this translates to the bisect hi = mid
736 # this translates to the bisect hi = mid
737 hi = start
737 hi = start
738 end = advance(lo, b'\0')
738 end = advance(lo, b'\0')
739 found = m[lo:end]
739 found = m[lo:end]
740 if s == found:
740 if s == found:
741 # we know that after the null there are 40 bytes of sha1
741 # we know that after the null there are 40 bytes of sha1
742 end = advance(end + 40, b'\n')
742 end = advance(end + 40, b'\n')
743 return (lo, end + 1)
743 return (lo, end + 1)
744 else:
744 else:
745 return (lo, lo)
745 return (lo, lo)
746
746
747
747
748 def _checkforbidden(l):
748 def _checkforbidden(l):
749 """Check filenames for illegal characters."""
749 """Check filenames for illegal characters."""
750 for f in l:
750 for f in l:
751 if b'\n' in f or b'\r' in f:
751 if b'\n' in f or b'\r' in f:
752 raise error.StorageError(
752 raise error.StorageError(
753 _(b"'\\n' and '\\r' disallowed in filenames: %r")
753 _(b"'\\n' and '\\r' disallowed in filenames: %r")
754 % pycompat.bytestr(f)
754 % pycompat.bytestr(f)
755 )
755 )
756
756
757
757
758 # apply the changes collected during the bisect loop to our addlist
758 # apply the changes collected during the bisect loop to our addlist
759 # return a delta suitable for addrevision
759 # return a delta suitable for addrevision
760 def _addlistdelta(addlist, x):
760 def _addlistdelta(addlist, x):
761 # for large addlist arrays, building a new array is cheaper
761 # for large addlist arrays, building a new array is cheaper
762 # than repeatedly modifying the existing one
762 # than repeatedly modifying the existing one
763 currentposition = 0
763 currentposition = 0
764 newaddlist = bytearray()
764 newaddlist = bytearray()
765
765
766 for start, end, content in x:
766 for start, end, content in x:
767 newaddlist += addlist[currentposition:start]
767 newaddlist += addlist[currentposition:start]
768 if content:
768 if content:
769 newaddlist += bytearray(content)
769 newaddlist += bytearray(content)
770
770
771 currentposition = end
771 currentposition = end
772
772
773 newaddlist += addlist[currentposition:]
773 newaddlist += addlist[currentposition:]
774
774
775 deltatext = b"".join(
775 deltatext = b"".join(
776 struct.pack(b">lll", start, end, len(content)) + content
776 struct.pack(b">lll", start, end, len(content)) + content
777 for start, end, content in x
777 for start, end, content in x
778 )
778 )
779 return deltatext, newaddlist
779 return deltatext, newaddlist
780
780
781
781
782 def _splittopdir(f):
782 def _splittopdir(f):
783 if b'/' in f:
783 if b'/' in f:
784 dir, subpath = f.split(b'/', 1)
784 dir, subpath = f.split(b'/', 1)
785 return dir + b'/', subpath
785 return dir + b'/', subpath
786 else:
786 else:
787 return b'', f
787 return b'', f
788
788
789
789
790 _noop = lambda s: None
790 _noop = lambda s: None
791
791
792
792
793 @interfaceutil.implementer(repository.imanifestdict)
793 @interfaceutil.implementer(repository.imanifestdict)
794 class treemanifest(object):
794 class treemanifest(object):
795 def __init__(self, dir=b'', text=b''):
795 def __init__(self, dir=b'', text=b''):
796 self._dir = dir
796 self._dir = dir
797 self._node = nullid
797 self._node = nullid
798 self._loadfunc = _noop
798 self._loadfunc = _noop
799 self._copyfunc = _noop
799 self._copyfunc = _noop
800 self._dirty = False
800 self._dirty = False
801 self._dirs = {}
801 self._dirs = {}
802 self._lazydirs = {}
802 self._lazydirs = {}
803 # Using _lazymanifest here is a little slower than plain old dicts
803 # Using _lazymanifest here is a little slower than plain old dicts
804 self._files = {}
804 self._files = {}
805 self._flags = {}
805 self._flags = {}
806 if text:
806 if text:
807
807
808 def readsubtree(subdir, subm):
808 def readsubtree(subdir, subm):
809 raise AssertionError(
809 raise AssertionError(
810 b'treemanifest constructor only accepts flat manifests'
810 b'treemanifest constructor only accepts flat manifests'
811 )
811 )
812
812
813 self.parse(text, readsubtree)
813 self.parse(text, readsubtree)
814 self._dirty = True # Mark flat manifest dirty after parsing
814 self._dirty = True # Mark flat manifest dirty after parsing
815
815
816 def _subpath(self, path):
816 def _subpath(self, path):
817 return self._dir + path
817 return self._dir + path
818
818
819 def _loadalllazy(self):
819 def _loadalllazy(self):
820 selfdirs = self._dirs
820 selfdirs = self._dirs
821 subpath = self._subpath
821 subpath = self._subpath
822 for d, (node, readsubtree, docopy) in pycompat.iteritems(
822 for d, (node, readsubtree, docopy) in pycompat.iteritems(
823 self._lazydirs
823 self._lazydirs
824 ):
824 ):
825 if docopy:
825 if docopy:
826 selfdirs[d] = readsubtree(subpath(d), node).copy()
826 selfdirs[d] = readsubtree(subpath(d), node).copy()
827 else:
827 else:
828 selfdirs[d] = readsubtree(subpath(d), node)
828 selfdirs[d] = readsubtree(subpath(d), node)
829 self._lazydirs = {}
829 self._lazydirs = {}
830
830
831 def _loadlazy(self, d):
831 def _loadlazy(self, d):
832 v = self._lazydirs.get(d)
832 v = self._lazydirs.get(d)
833 if v:
833 if v:
834 node, readsubtree, docopy = v
834 node, readsubtree, docopy = v
835 if docopy:
835 if docopy:
836 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
836 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
837 else:
837 else:
838 self._dirs[d] = readsubtree(self._subpath(d), node)
838 self._dirs[d] = readsubtree(self._subpath(d), node)
839 del self._lazydirs[d]
839 del self._lazydirs[d]
840
840
841 def _loadchildrensetlazy(self, visit):
841 def _loadchildrensetlazy(self, visit):
842 if not visit:
842 if not visit:
843 return None
843 return None
844 if visit == b'all' or visit == b'this':
844 if visit == b'all' or visit == b'this':
845 self._loadalllazy()
845 self._loadalllazy()
846 return None
846 return None
847
847
848 loadlazy = self._loadlazy
848 loadlazy = self._loadlazy
849 for k in visit:
849 for k in visit:
850 loadlazy(k + b'/')
850 loadlazy(k + b'/')
851 return visit
851 return visit
852
852
853 def _loaddifflazy(self, t1, t2):
853 def _loaddifflazy(self, t1, t2):
854 """load items in t1 and t2 if they're needed for diffing.
854 """load items in t1 and t2 if they're needed for diffing.
855
855
856 The criteria currently is:
856 The criteria currently is:
857 - if it's not present in _lazydirs in either t1 or t2, load it in the
857 - if it's not present in _lazydirs in either t1 or t2, load it in the
858 other (it may already be loaded or it may not exist, doesn't matter)
858 other (it may already be loaded or it may not exist, doesn't matter)
859 - if it's present in _lazydirs in both, compare the nodeid; if it
859 - if it's present in _lazydirs in both, compare the nodeid; if it
860 differs, load it in both
860 differs, load it in both
861 """
861 """
862 toloadlazy = []
862 toloadlazy = []
863 for d, v1 in pycompat.iteritems(t1._lazydirs):
863 for d, v1 in pycompat.iteritems(t1._lazydirs):
864 v2 = t2._lazydirs.get(d)
864 v2 = t2._lazydirs.get(d)
865 if not v2 or v2[0] != v1[0]:
865 if not v2 or v2[0] != v1[0]:
866 toloadlazy.append(d)
866 toloadlazy.append(d)
867 for d, v1 in pycompat.iteritems(t2._lazydirs):
867 for d, v1 in pycompat.iteritems(t2._lazydirs):
868 if d not in t1._lazydirs:
868 if d not in t1._lazydirs:
869 toloadlazy.append(d)
869 toloadlazy.append(d)
870
870
871 for d in toloadlazy:
871 for d in toloadlazy:
872 t1._loadlazy(d)
872 t1._loadlazy(d)
873 t2._loadlazy(d)
873 t2._loadlazy(d)
874
874
875 def __len__(self):
875 def __len__(self):
876 self._load()
876 self._load()
877 size = len(self._files)
877 size = len(self._files)
878 self._loadalllazy()
878 self._loadalllazy()
879 for m in self._dirs.values():
879 for m in self._dirs.values():
880 size += m.__len__()
880 size += m.__len__()
881 return size
881 return size
882
882
883 def __nonzero__(self):
883 def __nonzero__(self):
884 # Faster than "__len() != 0" since it avoids loading sub-manifests
884 # Faster than "__len() != 0" since it avoids loading sub-manifests
885 return not self._isempty()
885 return not self._isempty()
886
886
887 __bool__ = __nonzero__
887 __bool__ = __nonzero__
888
888
889 def _isempty(self):
889 def _isempty(self):
890 self._load() # for consistency; already loaded by all callers
890 self._load() # for consistency; already loaded by all callers
891 # See if we can skip loading everything.
891 # See if we can skip loading everything.
892 if self._files or (
892 if self._files or (
893 self._dirs and any(not m._isempty() for m in self._dirs.values())
893 self._dirs and any(not m._isempty() for m in self._dirs.values())
894 ):
894 ):
895 return False
895 return False
896 self._loadalllazy()
896 self._loadalllazy()
897 return not self._dirs or all(m._isempty() for m in self._dirs.values())
897 return not self._dirs or all(m._isempty() for m in self._dirs.values())
898
898
899 @encoding.strmethod
899 @encoding.strmethod
900 def __repr__(self):
900 def __repr__(self):
901 return (
901 return (
902 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
902 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
903 % (
903 % (
904 self._dir,
904 self._dir,
905 hex(self._node),
905 hex(self._node),
906 bool(self._loadfunc is _noop),
906 bool(self._loadfunc is _noop),
907 self._dirty,
907 self._dirty,
908 id(self),
908 id(self),
909 )
909 )
910 )
910 )
911
911
912 def dir(self):
912 def dir(self):
913 """The directory that this tree manifest represents, including a
913 """The directory that this tree manifest represents, including a
914 trailing '/'. Empty string for the repo root directory."""
914 trailing '/'. Empty string for the repo root directory."""
915 return self._dir
915 return self._dir
916
916
917 def node(self):
917 def node(self):
918 """This node of this instance. nullid for unsaved instances. Should
918 """This node of this instance. nullid for unsaved instances. Should
919 be updated when the instance is read or written from a revlog.
919 be updated when the instance is read or written from a revlog.
920 """
920 """
921 assert not self._dirty
921 assert not self._dirty
922 return self._node
922 return self._node
923
923
924 def setnode(self, node):
924 def setnode(self, node):
925 self._node = node
925 self._node = node
926 self._dirty = False
926 self._dirty = False
927
927
928 def iterentries(self):
928 def iterentries(self):
929 self._load()
929 self._load()
930 self._loadalllazy()
930 self._loadalllazy()
931 for p, n in sorted(
931 for p, n in sorted(
932 itertools.chain(self._dirs.items(), self._files.items())
932 itertools.chain(self._dirs.items(), self._files.items())
933 ):
933 ):
934 if p in self._files:
934 if p in self._files:
935 yield self._subpath(p), n, self._flags.get(p, b'')
935 yield self._subpath(p), n, self._flags.get(p, b'')
936 else:
936 else:
937 for x in n.iterentries():
937 for x in n.iterentries():
938 yield x
938 yield x
939
939
940 def items(self):
940 def items(self):
941 self._load()
941 self._load()
942 self._loadalllazy()
942 self._loadalllazy()
943 for p, n in sorted(
943 for p, n in sorted(
944 itertools.chain(self._dirs.items(), self._files.items())
944 itertools.chain(self._dirs.items(), self._files.items())
945 ):
945 ):
946 if p in self._files:
946 if p in self._files:
947 yield self._subpath(p), n
947 yield self._subpath(p), n
948 else:
948 else:
949 for f, sn in pycompat.iteritems(n):
949 for f, sn in pycompat.iteritems(n):
950 yield f, sn
950 yield f, sn
951
951
952 iteritems = items
952 iteritems = items
953
953
954 def iterkeys(self):
954 def iterkeys(self):
955 self._load()
955 self._load()
956 self._loadalllazy()
956 self._loadalllazy()
957 for p in sorted(itertools.chain(self._dirs, self._files)):
957 for p in sorted(itertools.chain(self._dirs, self._files)):
958 if p in self._files:
958 if p in self._files:
959 yield self._subpath(p)
959 yield self._subpath(p)
960 else:
960 else:
961 for f in self._dirs[p]:
961 for f in self._dirs[p]:
962 yield f
962 yield f
963
963
964 def keys(self):
964 def keys(self):
965 return list(self.iterkeys())
965 return list(self.iterkeys())
966
966
967 def __iter__(self):
967 def __iter__(self):
968 return self.iterkeys()
968 return self.iterkeys()
969
969
970 def __contains__(self, f):
970 def __contains__(self, f):
971 if f is None:
971 if f is None:
972 return False
972 return False
973 self._load()
973 self._load()
974 dir, subpath = _splittopdir(f)
974 dir, subpath = _splittopdir(f)
975 if dir:
975 if dir:
976 self._loadlazy(dir)
976 self._loadlazy(dir)
977
977
978 if dir not in self._dirs:
978 if dir not in self._dirs:
979 return False
979 return False
980
980
981 return self._dirs[dir].__contains__(subpath)
981 return self._dirs[dir].__contains__(subpath)
982 else:
982 else:
983 return f in self._files
983 return f in self._files
984
984
985 def get(self, f, default=None):
985 def get(self, f, default=None):
986 self._load()
986 self._load()
987 dir, subpath = _splittopdir(f)
987 dir, subpath = _splittopdir(f)
988 if dir:
988 if dir:
989 self._loadlazy(dir)
989 self._loadlazy(dir)
990
990
991 if dir not in self._dirs:
991 if dir not in self._dirs:
992 return default
992 return default
993 return self._dirs[dir].get(subpath, default)
993 return self._dirs[dir].get(subpath, default)
994 else:
994 else:
995 return self._files.get(f, default)
995 return self._files.get(f, default)
996
996
997 def __getitem__(self, f):
997 def __getitem__(self, f):
998 self._load()
998 self._load()
999 dir, subpath = _splittopdir(f)
999 dir, subpath = _splittopdir(f)
1000 if dir:
1000 if dir:
1001 self._loadlazy(dir)
1001 self._loadlazy(dir)
1002
1002
1003 return self._dirs[dir].__getitem__(subpath)
1003 return self._dirs[dir].__getitem__(subpath)
1004 else:
1004 else:
1005 return self._files[f]
1005 return self._files[f]
1006
1006
1007 def flags(self, f):
1007 def flags(self, f):
1008 self._load()
1008 self._load()
1009 dir, subpath = _splittopdir(f)
1009 dir, subpath = _splittopdir(f)
1010 if dir:
1010 if dir:
1011 self._loadlazy(dir)
1011 self._loadlazy(dir)
1012
1012
1013 if dir not in self._dirs:
1013 if dir not in self._dirs:
1014 return b''
1014 return b''
1015 return self._dirs[dir].flags(subpath)
1015 return self._dirs[dir].flags(subpath)
1016 else:
1016 else:
1017 if f in self._lazydirs or f in self._dirs:
1017 if f in self._lazydirs or f in self._dirs:
1018 return b''
1018 return b''
1019 return self._flags.get(f, b'')
1019 return self._flags.get(f, b'')
1020
1020
1021 def find(self, f):
1021 def find(self, f):
1022 self._load()
1022 self._load()
1023 dir, subpath = _splittopdir(f)
1023 dir, subpath = _splittopdir(f)
1024 if dir:
1024 if dir:
1025 self._loadlazy(dir)
1025 self._loadlazy(dir)
1026
1026
1027 return self._dirs[dir].find(subpath)
1027 return self._dirs[dir].find(subpath)
1028 else:
1028 else:
1029 return self._files[f], self._flags.get(f, b'')
1029 return self._files[f], self._flags.get(f, b'')
1030
1030
1031 def __delitem__(self, f):
1031 def __delitem__(self, f):
1032 self._load()
1032 self._load()
1033 dir, subpath = _splittopdir(f)
1033 dir, subpath = _splittopdir(f)
1034 if dir:
1034 if dir:
1035 self._loadlazy(dir)
1035 self._loadlazy(dir)
1036
1036
1037 self._dirs[dir].__delitem__(subpath)
1037 self._dirs[dir].__delitem__(subpath)
1038 # If the directory is now empty, remove it
1038 # If the directory is now empty, remove it
1039 if self._dirs[dir]._isempty():
1039 if self._dirs[dir]._isempty():
1040 del self._dirs[dir]
1040 del self._dirs[dir]
1041 else:
1041 else:
1042 del self._files[f]
1042 del self._files[f]
1043 if f in self._flags:
1043 if f in self._flags:
1044 del self._flags[f]
1044 del self._flags[f]
1045 self._dirty = True
1045 self._dirty = True
1046
1046
1047 def __setitem__(self, f, n):
1047 def __setitem__(self, f, n):
1048 assert n is not None
1048 assert n is not None
1049 self._load()
1049 self._load()
1050 dir, subpath = _splittopdir(f)
1050 dir, subpath = _splittopdir(f)
1051 if dir:
1051 if dir:
1052 self._loadlazy(dir)
1052 self._loadlazy(dir)
1053 if dir not in self._dirs:
1053 if dir not in self._dirs:
1054 self._dirs[dir] = treemanifest(self._subpath(dir))
1054 self._dirs[dir] = treemanifest(self._subpath(dir))
1055 self._dirs[dir].__setitem__(subpath, n)
1055 self._dirs[dir].__setitem__(subpath, n)
1056 else:
1056 else:
1057 # manifest nodes are either 20 bytes or 32 bytes,
1057 # manifest nodes are either 20 bytes or 32 bytes,
1058 # depending on the hash in use. Assert this as historically
1058 # depending on the hash in use. Assert this as historically
1059 # sometimes extra bytes were added.
1059 # sometimes extra bytes were added.
1060 assert len(n) in (20, 32)
1060 assert len(n) in (20, 32)
1061 self._files[f] = n
1061 self._files[f] = n
1062 self._dirty = True
1062 self._dirty = True
1063
1063
1064 def _load(self):
1064 def _load(self):
1065 if self._loadfunc is not _noop:
1065 if self._loadfunc is not _noop:
1066 lf, self._loadfunc = self._loadfunc, _noop
1066 lf, self._loadfunc = self._loadfunc, _noop
1067 lf(self)
1067 lf(self)
1068 elif self._copyfunc is not _noop:
1068 elif self._copyfunc is not _noop:
1069 cf, self._copyfunc = self._copyfunc, _noop
1069 cf, self._copyfunc = self._copyfunc, _noop
1070 cf(self)
1070 cf(self)
1071
1071
1072 def setflag(self, f, flags):
1072 def setflag(self, f, flags):
1073 """Set the flags (symlink, executable) for path f."""
1073 """Set the flags (symlink, executable) for path f."""
1074 if flags not in _manifestflags:
1074 if flags not in _manifestflags:
1075 raise TypeError(b"Invalid manifest flag set.")
1075 raise TypeError(b"Invalid manifest flag set.")
1076 self._load()
1076 self._load()
1077 dir, subpath = _splittopdir(f)
1077 dir, subpath = _splittopdir(f)
1078 if dir:
1078 if dir:
1079 self._loadlazy(dir)
1079 self._loadlazy(dir)
1080 if dir not in self._dirs:
1080 if dir not in self._dirs:
1081 self._dirs[dir] = treemanifest(self._subpath(dir))
1081 self._dirs[dir] = treemanifest(self._subpath(dir))
1082 self._dirs[dir].setflag(subpath, flags)
1082 self._dirs[dir].setflag(subpath, flags)
1083 else:
1083 else:
1084 self._flags[f] = flags
1084 self._flags[f] = flags
1085 self._dirty = True
1085 self._dirty = True
1086
1086
1087 def copy(self):
1087 def copy(self):
1088 copy = treemanifest(self._dir)
1088 copy = treemanifest(self._dir)
1089 copy._node = self._node
1089 copy._node = self._node
1090 copy._dirty = self._dirty
1090 copy._dirty = self._dirty
1091 if self._copyfunc is _noop:
1091 if self._copyfunc is _noop:
1092
1092
1093 def _copyfunc(s):
1093 def _copyfunc(s):
1094 self._load()
1094 self._load()
1095 s._lazydirs = {
1095 s._lazydirs = {
1096 d: (n, r, True)
1096 d: (n, r, True)
1097 for d, (n, r, c) in pycompat.iteritems(self._lazydirs)
1097 for d, (n, r, c) in pycompat.iteritems(self._lazydirs)
1098 }
1098 }
1099 sdirs = s._dirs
1099 sdirs = s._dirs
1100 for d, v in pycompat.iteritems(self._dirs):
1100 for d, v in pycompat.iteritems(self._dirs):
1101 sdirs[d] = v.copy()
1101 sdirs[d] = v.copy()
1102 s._files = dict.copy(self._files)
1102 s._files = dict.copy(self._files)
1103 s._flags = dict.copy(self._flags)
1103 s._flags = dict.copy(self._flags)
1104
1104
1105 if self._loadfunc is _noop:
1105 if self._loadfunc is _noop:
1106 _copyfunc(copy)
1106 _copyfunc(copy)
1107 else:
1107 else:
1108 copy._copyfunc = _copyfunc
1108 copy._copyfunc = _copyfunc
1109 else:
1109 else:
1110 copy._copyfunc = self._copyfunc
1110 copy._copyfunc = self._copyfunc
1111 return copy
1111 return copy
1112
1112
1113 def filesnotin(self, m2, match=None):
1113 def filesnotin(self, m2, match=None):
1114 '''Set of files in this manifest that are not in the other'''
1114 '''Set of files in this manifest that are not in the other'''
1115 if match and not match.always():
1115 if match and not match.always():
1116 m1 = self._matches(match)
1116 m1 = self._matches(match)
1117 m2 = m2._matches(match)
1117 m2 = m2._matches(match)
1118 return m1.filesnotin(m2)
1118 return m1.filesnotin(m2)
1119
1119
1120 files = set()
1120 files = set()
1121
1121
1122 def _filesnotin(t1, t2):
1122 def _filesnotin(t1, t2):
1123 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1123 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1124 return
1124 return
1125 t1._load()
1125 t1._load()
1126 t2._load()
1126 t2._load()
1127 self._loaddifflazy(t1, t2)
1127 self._loaddifflazy(t1, t2)
1128 for d, m1 in pycompat.iteritems(t1._dirs):
1128 for d, m1 in pycompat.iteritems(t1._dirs):
1129 if d in t2._dirs:
1129 if d in t2._dirs:
1130 m2 = t2._dirs[d]
1130 m2 = t2._dirs[d]
1131 _filesnotin(m1, m2)
1131 _filesnotin(m1, m2)
1132 else:
1132 else:
1133 files.update(m1.iterkeys())
1133 files.update(m1.iterkeys())
1134
1134
1135 for fn in t1._files:
1135 for fn in t1._files:
1136 if fn not in t2._files:
1136 if fn not in t2._files:
1137 files.add(t1._subpath(fn))
1137 files.add(t1._subpath(fn))
1138
1138
1139 _filesnotin(self, m2)
1139 _filesnotin(self, m2)
1140 return files
1140 return files
1141
1141
1142 @propertycache
1142 @propertycache
1143 def _alldirs(self):
1143 def _alldirs(self):
1144 return pathutil.dirs(self)
1144 return pathutil.dirs(self)
1145
1145
1146 def dirs(self):
1146 def dirs(self):
1147 return self._alldirs
1147 return self._alldirs
1148
1148
1149 def hasdir(self, dir):
1149 def hasdir(self, dir):
1150 self._load()
1150 self._load()
1151 topdir, subdir = _splittopdir(dir)
1151 topdir, subdir = _splittopdir(dir)
1152 if topdir:
1152 if topdir:
1153 self._loadlazy(topdir)
1153 self._loadlazy(topdir)
1154 if topdir in self._dirs:
1154 if topdir in self._dirs:
1155 return self._dirs[topdir].hasdir(subdir)
1155 return self._dirs[topdir].hasdir(subdir)
1156 return False
1156 return False
1157 dirslash = dir + b'/'
1157 dirslash = dir + b'/'
1158 return dirslash in self._dirs or dirslash in self._lazydirs
1158 return dirslash in self._dirs or dirslash in self._lazydirs
1159
1159
1160 def walk(self, match):
1160 def walk(self, match):
1161 """Generates matching file names.
1161 """Generates matching file names.
1162
1162
1163 It also reports nonexistent files by marking them bad with match.bad().
1163 It also reports nonexistent files by marking them bad with match.bad().
1164 """
1164 """
1165 if match.always():
1165 if match.always():
1166 for f in iter(self):
1166 for f in iter(self):
1167 yield f
1167 yield f
1168 return
1168 return
1169
1169
1170 fset = set(match.files())
1170 fset = set(match.files())
1171
1171
1172 for fn in self._walk(match):
1172 for fn in self._walk(match):
1173 if fn in fset:
1173 if fn in fset:
1174 # specified pattern is the exact name
1174 # specified pattern is the exact name
1175 fset.remove(fn)
1175 fset.remove(fn)
1176 yield fn
1176 yield fn
1177
1177
1178 # for dirstate.walk, files=[''] means "walk the whole tree".
1178 # for dirstate.walk, files=[''] means "walk the whole tree".
1179 # follow that here, too
1179 # follow that here, too
1180 fset.discard(b'')
1180 fset.discard(b'')
1181
1181
1182 for fn in sorted(fset):
1182 for fn in sorted(fset):
1183 if not self.hasdir(fn):
1183 if not self.hasdir(fn):
1184 match.bad(fn, None)
1184 match.bad(fn, None)
1185
1185
1186 def _walk(self, match):
1186 def _walk(self, match):
1187 '''Recursively generates matching file names for walk().'''
1187 '''Recursively generates matching file names for walk().'''
1188 visit = match.visitchildrenset(self._dir[:-1])
1188 visit = match.visitchildrenset(self._dir[:-1])
1189 if not visit:
1189 if not visit:
1190 return
1190 return
1191
1191
1192 # yield this dir's files and walk its submanifests
1192 # yield this dir's files and walk its submanifests
1193 self._load()
1193 self._load()
1194 visit = self._loadchildrensetlazy(visit)
1194 visit = self._loadchildrensetlazy(visit)
1195 for p in sorted(list(self._dirs) + list(self._files)):
1195 for p in sorted(list(self._dirs) + list(self._files)):
1196 if p in self._files:
1196 if p in self._files:
1197 fullp = self._subpath(p)
1197 fullp = self._subpath(p)
1198 if match(fullp):
1198 if match(fullp):
1199 yield fullp
1199 yield fullp
1200 else:
1200 else:
1201 if not visit or p[:-1] in visit:
1201 if not visit or p[:-1] in visit:
1202 for f in self._dirs[p]._walk(match):
1202 for f in self._dirs[p]._walk(match):
1203 yield f
1203 yield f
1204
1204
1205 def _matches(self, match):
1205 def _matches(self, match):
1206 """recursively generate a new manifest filtered by the match argument."""
1206 """recursively generate a new manifest filtered by the match argument."""
1207 if match.always():
1207 if match.always():
1208 return self.copy()
1208 return self.copy()
1209 return self._matches_inner(match)
1209 return self._matches_inner(match)
1210
1210
1211 def _matches_inner(self, match):
1211 def _matches_inner(self, match):
1212 if match.always():
1212 if match.always():
1213 return self.copy()
1213 return self.copy()
1214
1214
1215 visit = match.visitchildrenset(self._dir[:-1])
1215 visit = match.visitchildrenset(self._dir[:-1])
1216 if visit == b'all':
1216 if visit == b'all':
1217 return self.copy()
1217 return self.copy()
1218 ret = treemanifest(self._dir)
1218 ret = treemanifest(self._dir)
1219 if not visit:
1219 if not visit:
1220 return ret
1220 return ret
1221
1221
1222 self._load()
1222 self._load()
1223 for fn in self._files:
1223 for fn in self._files:
1224 # While visitchildrenset *usually* lists only subdirs, this is
1224 # While visitchildrenset *usually* lists only subdirs, this is
1225 # actually up to the matcher and may have some files in the set().
1225 # actually up to the matcher and may have some files in the set().
1226 # If visit == 'this', we should obviously look at the files in this
1226 # If visit == 'this', we should obviously look at the files in this
1227 # directory; if visit is a set, and fn is in it, we should inspect
1227 # directory; if visit is a set, and fn is in it, we should inspect
1228 # fn (but no need to inspect things not in the set).
1228 # fn (but no need to inspect things not in the set).
1229 if visit != b'this' and fn not in visit:
1229 if visit != b'this' and fn not in visit:
1230 continue
1230 continue
1231 fullp = self._subpath(fn)
1231 fullp = self._subpath(fn)
1232 # visitchildrenset isn't perfect, we still need to call the regular
1232 # visitchildrenset isn't perfect, we still need to call the regular
1233 # matcher code to further filter results.
1233 # matcher code to further filter results.
1234 if not match(fullp):
1234 if not match(fullp):
1235 continue
1235 continue
1236 ret._files[fn] = self._files[fn]
1236 ret._files[fn] = self._files[fn]
1237 if fn in self._flags:
1237 if fn in self._flags:
1238 ret._flags[fn] = self._flags[fn]
1238 ret._flags[fn] = self._flags[fn]
1239
1239
1240 visit = self._loadchildrensetlazy(visit)
1240 visit = self._loadchildrensetlazy(visit)
1241 for dir, subm in pycompat.iteritems(self._dirs):
1241 for dir, subm in pycompat.iteritems(self._dirs):
1242 if visit and dir[:-1] not in visit:
1242 if visit and dir[:-1] not in visit:
1243 continue
1243 continue
1244 m = subm._matches_inner(match)
1244 m = subm._matches_inner(match)
1245 if not m._isempty():
1245 if not m._isempty():
1246 ret._dirs[dir] = m
1246 ret._dirs[dir] = m
1247
1247
1248 if not ret._isempty():
1248 if not ret._isempty():
1249 ret._dirty = True
1249 ret._dirty = True
1250 return ret
1250 return ret
1251
1251
1252 def fastdelta(self, base, changes):
1252 def fastdelta(self, base, changes):
1253 raise FastdeltaUnavailable()
1253 raise FastdeltaUnavailable()
1254
1254
1255 def diff(self, m2, match=None, clean=False):
1255 def diff(self, m2, match=None, clean=False):
1256 """Finds changes between the current manifest and m2.
1256 """Finds changes between the current manifest and m2.
1257
1257
1258 Args:
1258 Args:
1259 m2: the manifest to which this manifest should be compared.
1259 m2: the manifest to which this manifest should be compared.
1260 clean: if true, include files unchanged between these manifests
1260 clean: if true, include files unchanged between these manifests
1261 with a None value in the returned dictionary.
1261 with a None value in the returned dictionary.
1262
1262
1263 The result is returned as a dict with filename as key and
1263 The result is returned as a dict with filename as key and
1264 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1264 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1265 nodeid in the current/other manifest and fl1/fl2 is the flag
1265 nodeid in the current/other manifest and fl1/fl2 is the flag
1266 in the current/other manifest. Where the file does not exist,
1266 in the current/other manifest. Where the file does not exist,
1267 the nodeid will be None and the flags will be the empty
1267 the nodeid will be None and the flags will be the empty
1268 string.
1268 string.
1269 """
1269 """
1270 if match and not match.always():
1270 if match and not match.always():
1271 m1 = self._matches(match)
1271 m1 = self._matches(match)
1272 m2 = m2._matches(match)
1272 m2 = m2._matches(match)
1273 return m1.diff(m2, clean=clean)
1273 return m1.diff(m2, clean=clean)
1274 result = {}
1274 result = {}
1275 emptytree = treemanifest()
1275 emptytree = treemanifest()
1276
1276
1277 def _iterativediff(t1, t2, stack):
1277 def _iterativediff(t1, t2, stack):
1278 """compares two tree manifests and append new tree-manifests which
1278 """compares two tree manifests and append new tree-manifests which
1279 needs to be compared to stack"""
1279 needs to be compared to stack"""
1280 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1280 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1281 return
1281 return
1282 t1._load()
1282 t1._load()
1283 t2._load()
1283 t2._load()
1284 self._loaddifflazy(t1, t2)
1284 self._loaddifflazy(t1, t2)
1285
1285
1286 for d, m1 in pycompat.iteritems(t1._dirs):
1286 for d, m1 in pycompat.iteritems(t1._dirs):
1287 m2 = t2._dirs.get(d, emptytree)
1287 m2 = t2._dirs.get(d, emptytree)
1288 stack.append((m1, m2))
1288 stack.append((m1, m2))
1289
1289
1290 for d, m2 in pycompat.iteritems(t2._dirs):
1290 for d, m2 in pycompat.iteritems(t2._dirs):
1291 if d not in t1._dirs:
1291 if d not in t1._dirs:
1292 stack.append((emptytree, m2))
1292 stack.append((emptytree, m2))
1293
1293
1294 for fn, n1 in pycompat.iteritems(t1._files):
1294 for fn, n1 in pycompat.iteritems(t1._files):
1295 fl1 = t1._flags.get(fn, b'')
1295 fl1 = t1._flags.get(fn, b'')
1296 n2 = t2._files.get(fn, None)
1296 n2 = t2._files.get(fn, None)
1297 fl2 = t2._flags.get(fn, b'')
1297 fl2 = t2._flags.get(fn, b'')
1298 if n1 != n2 or fl1 != fl2:
1298 if n1 != n2 or fl1 != fl2:
1299 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1299 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1300 elif clean:
1300 elif clean:
1301 result[t1._subpath(fn)] = None
1301 result[t1._subpath(fn)] = None
1302
1302
1303 for fn, n2 in pycompat.iteritems(t2._files):
1303 for fn, n2 in pycompat.iteritems(t2._files):
1304 if fn not in t1._files:
1304 if fn not in t1._files:
1305 fl2 = t2._flags.get(fn, b'')
1305 fl2 = t2._flags.get(fn, b'')
1306 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1306 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1307
1307
1308 stackls = []
1308 stackls = []
1309 _iterativediff(self, m2, stackls)
1309 _iterativediff(self, m2, stackls)
1310 while stackls:
1310 while stackls:
1311 t1, t2 = stackls.pop()
1311 t1, t2 = stackls.pop()
1312 # stackls is populated in the function call
1312 # stackls is populated in the function call
1313 _iterativediff(t1, t2, stackls)
1313 _iterativediff(t1, t2, stackls)
1314 return result
1314 return result
1315
1315
1316 def unmodifiedsince(self, m2):
1316 def unmodifiedsince(self, m2):
1317 return not self._dirty and not m2._dirty and self._node == m2._node
1317 return not self._dirty and not m2._dirty and self._node == m2._node
1318
1318
1319 def parse(self, text, readsubtree):
1319 def parse(self, text, readsubtree):
1320 selflazy = self._lazydirs
1320 selflazy = self._lazydirs
1321 for f, n, fl in _parse(text):
1321 for f, n, fl in _parse(text):
1322 if fl == b't':
1322 if fl == b't':
1323 f = f + b'/'
1323 f = f + b'/'
1324 # False below means "doesn't need to be copied" and can use the
1324 # False below means "doesn't need to be copied" and can use the
1325 # cached value from readsubtree directly.
1325 # cached value from readsubtree directly.
1326 selflazy[f] = (n, readsubtree, False)
1326 selflazy[f] = (n, readsubtree, False)
1327 elif b'/' in f:
1327 elif b'/' in f:
1328 # This is a flat manifest, so use __setitem__ and setflag rather
1328 # This is a flat manifest, so use __setitem__ and setflag rather
1329 # than assigning directly to _files and _flags, so we can
1329 # than assigning directly to _files and _flags, so we can
1330 # assign a path in a subdirectory, and to mark dirty (compared
1330 # assign a path in a subdirectory, and to mark dirty (compared
1331 # to nullid).
1331 # to nullid).
1332 self[f] = n
1332 self[f] = n
1333 if fl:
1333 if fl:
1334 self.setflag(f, fl)
1334 self.setflag(f, fl)
1335 else:
1335 else:
1336 # Assigning to _files and _flags avoids marking as dirty,
1336 # Assigning to _files and _flags avoids marking as dirty,
1337 # and should be a little faster.
1337 # and should be a little faster.
1338 self._files[f] = n
1338 self._files[f] = n
1339 if fl:
1339 if fl:
1340 self._flags[f] = fl
1340 self._flags[f] = fl
1341
1341
1342 def text(self):
1342 def text(self):
1343 """Get the full data of this manifest as a bytestring."""
1343 """Get the full data of this manifest as a bytestring."""
1344 self._load()
1344 self._load()
1345 return _text(self.iterentries())
1345 return _text(self.iterentries())
1346
1346
1347 def dirtext(self):
1347 def dirtext(self):
1348 """Get the full data of this directory as a bytestring. Make sure that
1348 """Get the full data of this directory as a bytestring. Make sure that
1349 any submanifests have been written first, so their nodeids are correct.
1349 any submanifests have been written first, so their nodeids are correct.
1350 """
1350 """
1351 self._load()
1351 self._load()
1352 flags = self.flags
1352 flags = self.flags
1353 lazydirs = [
1353 lazydirs = [
1354 (d[:-1], v[0], b't') for d, v in pycompat.iteritems(self._lazydirs)
1354 (d[:-1], v[0], b't') for d, v in pycompat.iteritems(self._lazydirs)
1355 ]
1355 ]
1356 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1356 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1357 files = [(f, self._files[f], flags(f)) for f in self._files]
1357 files = [(f, self._files[f], flags(f)) for f in self._files]
1358 return _text(sorted(dirs + files + lazydirs))
1358 return _text(sorted(dirs + files + lazydirs))
1359
1359
1360 def read(self, gettext, readsubtree):
1360 def read(self, gettext, readsubtree):
1361 def _load_for_read(s):
1361 def _load_for_read(s):
1362 s.parse(gettext(), readsubtree)
1362 s.parse(gettext(), readsubtree)
1363 s._dirty = False
1363 s._dirty = False
1364
1364
1365 self._loadfunc = _load_for_read
1365 self._loadfunc = _load_for_read
1366
1366
1367 def writesubtrees(self, m1, m2, writesubtree, match):
1367 def writesubtrees(self, m1, m2, writesubtree, match):
1368 self._load() # for consistency; should never have any effect here
1368 self._load() # for consistency; should never have any effect here
1369 m1._load()
1369 m1._load()
1370 m2._load()
1370 m2._load()
1371 emptytree = treemanifest()
1371 emptytree = treemanifest()
1372
1372
1373 def getnode(m, d):
1373 def getnode(m, d):
1374 ld = m._lazydirs.get(d)
1374 ld = m._lazydirs.get(d)
1375 if ld:
1375 if ld:
1376 return ld[0]
1376 return ld[0]
1377 return m._dirs.get(d, emptytree)._node
1377 return m._dirs.get(d, emptytree)._node
1378
1378
1379 # let's skip investigating things that `match` says we do not need.
1379 # let's skip investigating things that `match` says we do not need.
1380 visit = match.visitchildrenset(self._dir[:-1])
1380 visit = match.visitchildrenset(self._dir[:-1])
1381 visit = self._loadchildrensetlazy(visit)
1381 visit = self._loadchildrensetlazy(visit)
1382 if visit == b'this' or visit == b'all':
1382 if visit == b'this' or visit == b'all':
1383 visit = None
1383 visit = None
1384 for d, subm in pycompat.iteritems(self._dirs):
1384 for d, subm in pycompat.iteritems(self._dirs):
1385 if visit and d[:-1] not in visit:
1385 if visit and d[:-1] not in visit:
1386 continue
1386 continue
1387 subp1 = getnode(m1, d)
1387 subp1 = getnode(m1, d)
1388 subp2 = getnode(m2, d)
1388 subp2 = getnode(m2, d)
1389 if subp1 == nullid:
1389 if subp1 == nullid:
1390 subp1, subp2 = subp2, subp1
1390 subp1, subp2 = subp2, subp1
1391 writesubtree(subm, subp1, subp2, match)
1391 writesubtree(subm, subp1, subp2, match)
1392
1392
1393 def walksubtrees(self, matcher=None):
1393 def walksubtrees(self, matcher=None):
1394 """Returns an iterator of the subtrees of this manifest, including this
1394 """Returns an iterator of the subtrees of this manifest, including this
1395 manifest itself.
1395 manifest itself.
1396
1396
1397 If `matcher` is provided, it only returns subtrees that match.
1397 If `matcher` is provided, it only returns subtrees that match.
1398 """
1398 """
1399 if matcher and not matcher.visitdir(self._dir[:-1]):
1399 if matcher and not matcher.visitdir(self._dir[:-1]):
1400 return
1400 return
1401 if not matcher or matcher(self._dir[:-1]):
1401 if not matcher or matcher(self._dir[:-1]):
1402 yield self
1402 yield self
1403
1403
1404 self._load()
1404 self._load()
1405 # OPT: use visitchildrenset to avoid loading everything.
1405 # OPT: use visitchildrenset to avoid loading everything.
1406 self._loadalllazy()
1406 self._loadalllazy()
1407 for d, subm in pycompat.iteritems(self._dirs):
1407 for d, subm in pycompat.iteritems(self._dirs):
1408 for subtree in subm.walksubtrees(matcher=matcher):
1408 for subtree in subm.walksubtrees(matcher=matcher):
1409 yield subtree
1409 yield subtree
1410
1410
1411
1411
1412 class manifestfulltextcache(util.lrucachedict):
1412 class manifestfulltextcache(util.lrucachedict):
1413 """File-backed LRU cache for the manifest cache
1413 """File-backed LRU cache for the manifest cache
1414
1414
1415 File consists of entries, up to EOF:
1415 File consists of entries, up to EOF:
1416
1416
1417 - 20 bytes node, 4 bytes length, <length> manifest data
1417 - 20 bytes node, 4 bytes length, <length> manifest data
1418
1418
1419 These are written in reverse cache order (oldest to newest).
1419 These are written in reverse cache order (oldest to newest).
1420
1420
1421 """
1421 """
1422
1422
1423 _file = b'manifestfulltextcache'
1423 _file = b'manifestfulltextcache'
1424
1424
1425 def __init__(self, max):
1425 def __init__(self, max):
1426 super(manifestfulltextcache, self).__init__(max)
1426 super(manifestfulltextcache, self).__init__(max)
1427 self._dirty = False
1427 self._dirty = False
1428 self._read = False
1428 self._read = False
1429 self._opener = None
1429 self._opener = None
1430
1430
1431 def read(self):
1431 def read(self):
1432 if self._read or self._opener is None:
1432 if self._read or self._opener is None:
1433 return
1433 return
1434
1434
1435 try:
1435 try:
1436 with self._opener(self._file) as fp:
1436 with self._opener(self._file) as fp:
1437 set = super(manifestfulltextcache, self).__setitem__
1437 set = super(manifestfulltextcache, self).__setitem__
1438 # ignore trailing data, this is a cache, corruption is skipped
1438 # ignore trailing data, this is a cache, corruption is skipped
1439 while True:
1439 while True:
1440 # TODO do we need to do work here for sha1 portability?
1440 # TODO do we need to do work here for sha1 portability?
1441 node = fp.read(20)
1441 node = fp.read(20)
1442 if len(node) < 20:
1442 if len(node) < 20:
1443 break
1443 break
1444 try:
1444 try:
1445 size = struct.unpack(b'>L', fp.read(4))[0]
1445 size = struct.unpack(b'>L', fp.read(4))[0]
1446 except struct.error:
1446 except struct.error:
1447 break
1447 break
1448 value = bytearray(fp.read(size))
1448 value = bytearray(fp.read(size))
1449 if len(value) != size:
1449 if len(value) != size:
1450 break
1450 break
1451 set(node, value)
1451 set(node, value)
1452 except IOError:
1452 except IOError:
1453 # the file is allowed to be missing
1453 # the file is allowed to be missing
1454 pass
1454 pass
1455
1455
1456 self._read = True
1456 self._read = True
1457 self._dirty = False
1457 self._dirty = False
1458
1458
1459 def write(self):
1459 def write(self):
1460 if not self._dirty or self._opener is None:
1460 if not self._dirty or self._opener is None:
1461 return
1461 return
1462 # rotate backwards to the first used node
1462 # rotate backwards to the first used node
1463 try:
1463 try:
1464 with self._opener(
1464 with self._opener(
1465 self._file, b'w', atomictemp=True, checkambig=True
1465 self._file, b'w', atomictemp=True, checkambig=True
1466 ) as fp:
1466 ) as fp:
1467 node = self._head.prev
1467 node = self._head.prev
1468 while True:
1468 while True:
1469 if node.key in self._cache:
1469 if node.key in self._cache:
1470 fp.write(node.key)
1470 fp.write(node.key)
1471 fp.write(struct.pack(b'>L', len(node.value)))
1471 fp.write(struct.pack(b'>L', len(node.value)))
1472 fp.write(node.value)
1472 fp.write(node.value)
1473 if node is self._head:
1473 if node is self._head:
1474 break
1474 break
1475 node = node.prev
1475 node = node.prev
1476 except IOError:
1476 except IOError:
1477 # We could not write the cache (eg: permission error)
1477 # We could not write the cache (eg: permission error)
1478 # the content can be missing.
1478 # the content can be missing.
1479 #
1479 #
1480 # We could try harder and see if we could recreate a wcache
1480 # We could try harder and see if we could recreate a wcache
1481 # directory were we coudl write too.
1481 # directory were we coudl write too.
1482 #
1482 #
1483 # XXX the error pass silently, having some way to issue an error
1483 # XXX the error pass silently, having some way to issue an error
1484 # log `ui.log` would be nice.
1484 # log `ui.log` would be nice.
1485 pass
1485 pass
1486
1486
1487 def __len__(self):
1487 def __len__(self):
1488 if not self._read:
1488 if not self._read:
1489 self.read()
1489 self.read()
1490 return super(manifestfulltextcache, self).__len__()
1490 return super(manifestfulltextcache, self).__len__()
1491
1491
1492 def __contains__(self, k):
1492 def __contains__(self, k):
1493 if not self._read:
1493 if not self._read:
1494 self.read()
1494 self.read()
1495 return super(manifestfulltextcache, self).__contains__(k)
1495 return super(manifestfulltextcache, self).__contains__(k)
1496
1496
1497 def __iter__(self):
1497 def __iter__(self):
1498 if not self._read:
1498 if not self._read:
1499 self.read()
1499 self.read()
1500 return super(manifestfulltextcache, self).__iter__()
1500 return super(manifestfulltextcache, self).__iter__()
1501
1501
1502 def __getitem__(self, k):
1502 def __getitem__(self, k):
1503 if not self._read:
1503 if not self._read:
1504 self.read()
1504 self.read()
1505 # the cache lru order can change on read
1505 # the cache lru order can change on read
1506 setdirty = self._cache.get(k) is not self._head
1506 setdirty = self._cache.get(k) is not self._head
1507 value = super(manifestfulltextcache, self).__getitem__(k)
1507 value = super(manifestfulltextcache, self).__getitem__(k)
1508 if setdirty:
1508 if setdirty:
1509 self._dirty = True
1509 self._dirty = True
1510 return value
1510 return value
1511
1511
1512 def __setitem__(self, k, v):
1512 def __setitem__(self, k, v):
1513 if not self._read:
1513 if not self._read:
1514 self.read()
1514 self.read()
1515 super(manifestfulltextcache, self).__setitem__(k, v)
1515 super(manifestfulltextcache, self).__setitem__(k, v)
1516 self._dirty = True
1516 self._dirty = True
1517
1517
1518 def __delitem__(self, k):
1518 def __delitem__(self, k):
1519 if not self._read:
1519 if not self._read:
1520 self.read()
1520 self.read()
1521 super(manifestfulltextcache, self).__delitem__(k)
1521 super(manifestfulltextcache, self).__delitem__(k)
1522 self._dirty = True
1522 self._dirty = True
1523
1523
1524 def get(self, k, default=None):
1524 def get(self, k, default=None):
1525 if not self._read:
1525 if not self._read:
1526 self.read()
1526 self.read()
1527 return super(manifestfulltextcache, self).get(k, default=default)
1527 return super(manifestfulltextcache, self).get(k, default=default)
1528
1528
1529 def clear(self, clear_persisted_data=False):
1529 def clear(self, clear_persisted_data=False):
1530 super(manifestfulltextcache, self).clear()
1530 super(manifestfulltextcache, self).clear()
1531 if clear_persisted_data:
1531 if clear_persisted_data:
1532 self._dirty = True
1532 self._dirty = True
1533 self.write()
1533 self.write()
1534 self._read = False
1534 self._read = False
1535
1535
1536
1536
1537 # and upper bound of what we expect from compression
1537 # and upper bound of what we expect from compression
1538 # (real live value seems to be "3")
1538 # (real live value seems to be "3")
1539 MAXCOMPRESSION = 3
1539 MAXCOMPRESSION = 3
1540
1540
1541
1541
1542 class FastdeltaUnavailable(Exception):
1542 class FastdeltaUnavailable(Exception):
1543 """Exception raised when fastdelta isn't usable on a manifest."""
1543 """Exception raised when fastdelta isn't usable on a manifest."""
1544
1544
1545
1545
1546 @interfaceutil.implementer(repository.imanifeststorage)
1546 @interfaceutil.implementer(repository.imanifeststorage)
1547 class manifestrevlog(object):
1547 class manifestrevlog(object):
1548 """A revlog that stores manifest texts. This is responsible for caching the
1548 """A revlog that stores manifest texts. This is responsible for caching the
1549 full-text manifest contents.
1549 full-text manifest contents.
1550 """
1550 """
1551
1551
1552 def __init__(
1552 def __init__(
1553 self,
1553 self,
1554 opener,
1554 opener,
1555 tree=b'',
1555 tree=b'',
1556 dirlogcache=None,
1556 dirlogcache=None,
1557 indexfile=None,
1557 indexfile=None,
1558 treemanifest=False,
1558 treemanifest=False,
1559 ):
1559 ):
1560 """Constructs a new manifest revlog
1560 """Constructs a new manifest revlog
1561
1561
1562 `indexfile` - used by extensions to have two manifests at once, like
1562 `indexfile` - used by extensions to have two manifests at once, like
1563 when transitioning between flatmanifeset and treemanifests.
1563 when transitioning between flatmanifeset and treemanifests.
1564
1564
1565 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1565 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1566 options can also be used to make this a tree manifest revlog. The opener
1566 options can also be used to make this a tree manifest revlog. The opener
1567 option takes precedence, so if it is set to True, we ignore whatever
1567 option takes precedence, so if it is set to True, we ignore whatever
1568 value is passed in to the constructor.
1568 value is passed in to the constructor.
1569 """
1569 """
1570 # During normal operations, we expect to deal with not more than four
1570 # During normal operations, we expect to deal with not more than four
1571 # revs at a time (such as during commit --amend). When rebasing large
1571 # revs at a time (such as during commit --amend). When rebasing large
1572 # stacks of commits, the number can go up, hence the config knob below.
1572 # stacks of commits, the number can go up, hence the config knob below.
1573 cachesize = 4
1573 cachesize = 4
1574 optiontreemanifest = False
1574 optiontreemanifest = False
1575 opts = getattr(opener, 'options', None)
1575 opts = getattr(opener, 'options', None)
1576 if opts is not None:
1576 if opts is not None:
1577 cachesize = opts.get(b'manifestcachesize', cachesize)
1577 cachesize = opts.get(b'manifestcachesize', cachesize)
1578 optiontreemanifest = opts.get(b'treemanifest', False)
1578 optiontreemanifest = opts.get(b'treemanifest', False)
1579
1579
1580 self._treeondisk = optiontreemanifest or treemanifest
1580 self._treeondisk = optiontreemanifest or treemanifest
1581
1581
1582 self._fulltextcache = manifestfulltextcache(cachesize)
1582 self._fulltextcache = manifestfulltextcache(cachesize)
1583
1583
1584 if tree:
1584 if tree:
1585 assert self._treeondisk, b'opts is %r' % opts
1585 assert self._treeondisk, b'opts is %r' % opts
1586
1586
1587 if indexfile is None:
1587 if indexfile is None:
1588 indexfile = b'00manifest.i'
1588 indexfile = b'00manifest.i'
1589 if tree:
1589 if tree:
1590 indexfile = b"meta/" + tree + indexfile
1590 indexfile = b"meta/" + tree + indexfile
1591
1591
1592 self.tree = tree
1592 self.tree = tree
1593
1593
1594 # The dirlogcache is kept on the root manifest log
1594 # The dirlogcache is kept on the root manifest log
1595 if tree:
1595 if tree:
1596 self._dirlogcache = dirlogcache
1596 self._dirlogcache = dirlogcache
1597 else:
1597 else:
1598 self._dirlogcache = {b'': self}
1598 self._dirlogcache = {b'': self}
1599
1599
1600 self._revlog = revlog.revlog(
1600 self._revlog = revlog.revlog(
1601 opener,
1601 opener,
1602 indexfile,
1602 indexfile,
1603 # only root indexfile is cached
1603 # only root indexfile is cached
1604 checkambig=not bool(tree),
1604 checkambig=not bool(tree),
1605 mmaplargeindex=True,
1605 mmaplargeindex=True,
1606 upperboundcomp=MAXCOMPRESSION,
1606 upperboundcomp=MAXCOMPRESSION,
1607 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1607 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1608 )
1608 )
1609
1609
1610 self.index = self._revlog.index
1610 self.index = self._revlog.index
1611 self.version = self._revlog.version
1611 self.version = self._revlog.version
1612 self._generaldelta = self._revlog._generaldelta
1612 self._generaldelta = self._revlog._generaldelta
1613 self._revlog.revlog_kind = b'manifest'
1613 self._revlog.revlog_kind = b'manifest'
1614
1614
1615 def _setupmanifestcachehooks(self, repo):
1615 def _setupmanifestcachehooks(self, repo):
1616 """Persist the manifestfulltextcache on lock release"""
1616 """Persist the manifestfulltextcache on lock release"""
1617 if not util.safehasattr(repo, b'_wlockref'):
1617 if not util.safehasattr(repo, b'_wlockref'):
1618 return
1618 return
1619
1619
1620 self._fulltextcache._opener = repo.wcachevfs
1620 self._fulltextcache._opener = repo.wcachevfs
1621 if repo._currentlock(repo._wlockref) is None:
1621 if repo._currentlock(repo._wlockref) is None:
1622 return
1622 return
1623
1623
1624 reporef = weakref.ref(repo)
1624 reporef = weakref.ref(repo)
1625 manifestrevlogref = weakref.ref(self)
1625 manifestrevlogref = weakref.ref(self)
1626
1626
1627 def persistmanifestcache(success):
1627 def persistmanifestcache(success):
1628 # Repo is in an unknown state, do not persist.
1628 # Repo is in an unknown state, do not persist.
1629 if not success:
1629 if not success:
1630 return
1630 return
1631
1631
1632 repo = reporef()
1632 repo = reporef()
1633 self = manifestrevlogref()
1633 self = manifestrevlogref()
1634 if repo is None or self is None:
1634 if repo is None or self is None:
1635 return
1635 return
1636 if repo.manifestlog.getstorage(b'') is not self:
1636 if repo.manifestlog.getstorage(b'') is not self:
1637 # there's a different manifest in play now, abort
1637 # there's a different manifest in play now, abort
1638 return
1638 return
1639 self._fulltextcache.write()
1639 self._fulltextcache.write()
1640
1640
1641 repo._afterlock(persistmanifestcache)
1641 repo._afterlock(persistmanifestcache)
1642
1642
1643 @property
1643 @property
1644 def fulltextcache(self):
1644 def fulltextcache(self):
1645 return self._fulltextcache
1645 return self._fulltextcache
1646
1646
1647 def clearcaches(self, clear_persisted_data=False):
1647 def clearcaches(self, clear_persisted_data=False):
1648 self._revlog.clearcaches()
1648 self._revlog.clearcaches()
1649 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1649 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1650 self._dirlogcache = {self.tree: self}
1650 self._dirlogcache = {self.tree: self}
1651
1651
1652 def dirlog(self, d):
1652 def dirlog(self, d):
1653 if d:
1653 if d:
1654 assert self._treeondisk
1654 assert self._treeondisk
1655 if d not in self._dirlogcache:
1655 if d not in self._dirlogcache:
1656 mfrevlog = manifestrevlog(
1656 mfrevlog = manifestrevlog(
1657 self.opener, d, self._dirlogcache, treemanifest=self._treeondisk
1657 self.opener, d, self._dirlogcache, treemanifest=self._treeondisk
1658 )
1658 )
1659 self._dirlogcache[d] = mfrevlog
1659 self._dirlogcache[d] = mfrevlog
1660 return self._dirlogcache[d]
1660 return self._dirlogcache[d]
1661
1661
1662 def add(
1662 def add(
1663 self,
1663 self,
1664 m,
1664 m,
1665 transaction,
1665 transaction,
1666 link,
1666 link,
1667 p1,
1667 p1,
1668 p2,
1668 p2,
1669 added,
1669 added,
1670 removed,
1670 removed,
1671 readtree=None,
1671 readtree=None,
1672 match=None,
1672 match=None,
1673 ):
1673 ):
1674 """add some manifest entry in to the manifest log
1674 """add some manifest entry in to the manifest log
1675
1675
1676 input:
1676 input:
1677
1677
1678 m: the manifest dict we want to store
1678 m: the manifest dict we want to store
1679 transaction: the open transaction
1679 transaction: the open transaction
1680 p1: manifest-node of p1
1680 p1: manifest-node of p1
1681 p2: manifest-node of p2
1681 p2: manifest-node of p2
1682 added: file added/changed compared to parent
1682 added: file added/changed compared to parent
1683 removed: file removed compared to parent
1683 removed: file removed compared to parent
1684
1684
1685 tree manifest input:
1685 tree manifest input:
1686
1686
1687 readtree: a function to read a subtree
1687 readtree: a function to read a subtree
1688 match: a filematcher for the subpart of the tree manifest
1688 match: a filematcher for the subpart of the tree manifest
1689 """
1689 """
1690 try:
1690 try:
1691 if p1 not in self.fulltextcache:
1691 if p1 not in self.fulltextcache:
1692 raise FastdeltaUnavailable()
1692 raise FastdeltaUnavailable()
1693 # If our first parent is in the manifest cache, we can
1693 # If our first parent is in the manifest cache, we can
1694 # compute a delta here using properties we know about the
1694 # compute a delta here using properties we know about the
1695 # manifest up-front, which may save time later for the
1695 # manifest up-front, which may save time later for the
1696 # revlog layer.
1696 # revlog layer.
1697
1697
1698 _checkforbidden(added)
1698 _checkforbidden(added)
1699 # combine the changed lists into one sorted iterator
1699 # combine the changed lists into one sorted iterator
1700 work = heapq.merge(
1700 work = heapq.merge(
1701 [(x, False) for x in sorted(added)],
1701 [(x, False) for x in sorted(added)],
1702 [(x, True) for x in sorted(removed)],
1702 [(x, True) for x in sorted(removed)],
1703 )
1703 )
1704
1704
1705 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1705 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1706 cachedelta = self._revlog.rev(p1), deltatext
1706 cachedelta = self._revlog.rev(p1), deltatext
1707 text = util.buffer(arraytext)
1707 text = util.buffer(arraytext)
1708 rev = self._revlog.addrevision(
1708 rev = self._revlog.addrevision(
1709 text, transaction, link, p1, p2, cachedelta
1709 text, transaction, link, p1, p2, cachedelta
1710 )
1710 )
1711 n = self._revlog.node(rev)
1711 n = self._revlog.node(rev)
1712 except FastdeltaUnavailable:
1712 except FastdeltaUnavailable:
1713 # The first parent manifest isn't already loaded or the
1713 # The first parent manifest isn't already loaded or the
1714 # manifest implementation doesn't support fastdelta, so
1714 # manifest implementation doesn't support fastdelta, so
1715 # we'll just encode a fulltext of the manifest and pass
1715 # we'll just encode a fulltext of the manifest and pass
1716 # that through to the revlog layer, and let it handle the
1716 # that through to the revlog layer, and let it handle the
1717 # delta process.
1717 # delta process.
1718 if self._treeondisk:
1718 if self._treeondisk:
1719 assert readtree, b"readtree must be set for treemanifest writes"
1719 assert readtree, b"readtree must be set for treemanifest writes"
1720 assert match, b"match must be specified for treemanifest writes"
1720 assert match, b"match must be specified for treemanifest writes"
1721 m1 = readtree(self.tree, p1)
1721 m1 = readtree(self.tree, p1)
1722 m2 = readtree(self.tree, p2)
1722 m2 = readtree(self.tree, p2)
1723 n = self._addtree(
1723 n = self._addtree(
1724 m, transaction, link, m1, m2, readtree, match=match
1724 m, transaction, link, m1, m2, readtree, match=match
1725 )
1725 )
1726 arraytext = None
1726 arraytext = None
1727 else:
1727 else:
1728 text = m.text()
1728 text = m.text()
1729 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1729 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1730 n = self._revlog.node(rev)
1730 n = self._revlog.node(rev)
1731 arraytext = bytearray(text)
1731 arraytext = bytearray(text)
1732
1732
1733 if arraytext is not None:
1733 if arraytext is not None:
1734 self.fulltextcache[n] = arraytext
1734 self.fulltextcache[n] = arraytext
1735
1735
1736 return n
1736 return n
1737
1737
1738 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1738 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1739 # If the manifest is unchanged compared to one parent,
1739 # If the manifest is unchanged compared to one parent,
1740 # don't write a new revision
1740 # don't write a new revision
1741 if self.tree != b'' and (
1741 if self.tree != b'' and (
1742 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1742 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1743 ):
1743 ):
1744 return m.node()
1744 return m.node()
1745
1745
1746 def writesubtree(subm, subp1, subp2, match):
1746 def writesubtree(subm, subp1, subp2, match):
1747 sublog = self.dirlog(subm.dir())
1747 sublog = self.dirlog(subm.dir())
1748 sublog.add(
1748 sublog.add(
1749 subm,
1749 subm,
1750 transaction,
1750 transaction,
1751 link,
1751 link,
1752 subp1,
1752 subp1,
1753 subp2,
1753 subp2,
1754 None,
1754 None,
1755 None,
1755 None,
1756 readtree=readtree,
1756 readtree=readtree,
1757 match=match,
1757 match=match,
1758 )
1758 )
1759
1759
1760 m.writesubtrees(m1, m2, writesubtree, match)
1760 m.writesubtrees(m1, m2, writesubtree, match)
1761 text = m.dirtext()
1761 text = m.dirtext()
1762 n = None
1762 n = None
1763 if self.tree != b'':
1763 if self.tree != b'':
1764 # Double-check whether contents are unchanged to one parent
1764 # Double-check whether contents are unchanged to one parent
1765 if text == m1.dirtext():
1765 if text == m1.dirtext():
1766 n = m1.node()
1766 n = m1.node()
1767 elif text == m2.dirtext():
1767 elif text == m2.dirtext():
1768 n = m2.node()
1768 n = m2.node()
1769
1769
1770 if not n:
1770 if not n:
1771 rev = self._revlog.addrevision(
1771 rev = self._revlog.addrevision(
1772 text, transaction, link, m1.node(), m2.node()
1772 text, transaction, link, m1.node(), m2.node()
1773 )
1773 )
1774 n = self._revlog.node(rev)
1774 n = self._revlog.node(rev)
1775
1775
1776 # Save nodeid so parent manifest can calculate its nodeid
1776 # Save nodeid so parent manifest can calculate its nodeid
1777 m.setnode(n)
1777 m.setnode(n)
1778 return n
1778 return n
1779
1779
1780 def __len__(self):
1780 def __len__(self):
1781 return len(self._revlog)
1781 return len(self._revlog)
1782
1782
1783 def __iter__(self):
1783 def __iter__(self):
1784 return self._revlog.__iter__()
1784 return self._revlog.__iter__()
1785
1785
1786 def rev(self, node):
1786 def rev(self, node):
1787 return self._revlog.rev(node)
1787 return self._revlog.rev(node)
1788
1788
1789 def node(self, rev):
1789 def node(self, rev):
1790 return self._revlog.node(rev)
1790 return self._revlog.node(rev)
1791
1791
1792 def lookup(self, value):
1792 def lookup(self, value):
1793 return self._revlog.lookup(value)
1793 return self._revlog.lookup(value)
1794
1794
1795 def parentrevs(self, rev):
1795 def parentrevs(self, rev):
1796 return self._revlog.parentrevs(rev)
1796 return self._revlog.parentrevs(rev)
1797
1797
1798 def parents(self, node):
1798 def parents(self, node):
1799 return self._revlog.parents(node)
1799 return self._revlog.parents(node)
1800
1800
1801 def linkrev(self, rev):
1801 def linkrev(self, rev):
1802 return self._revlog.linkrev(rev)
1802 return self._revlog.linkrev(rev)
1803
1803
1804 def checksize(self):
1804 def checksize(self):
1805 return self._revlog.checksize()
1805 return self._revlog.checksize()
1806
1806
1807 def revision(self, node, _df=None, raw=False):
1807 def revision(self, node, _df=None, raw=False):
1808 return self._revlog.revision(node, _df=_df, raw=raw)
1808 return self._revlog.revision(node, _df=_df, raw=raw)
1809
1809
1810 def rawdata(self, node, _df=None):
1810 def rawdata(self, node, _df=None):
1811 return self._revlog.rawdata(node, _df=_df)
1811 return self._revlog.rawdata(node, _df=_df)
1812
1812
1813 def revdiff(self, rev1, rev2):
1813 def revdiff(self, rev1, rev2):
1814 return self._revlog.revdiff(rev1, rev2)
1814 return self._revlog.revdiff(rev1, rev2)
1815
1815
1816 def cmp(self, node, text):
1816 def cmp(self, node, text):
1817 return self._revlog.cmp(node, text)
1817 return self._revlog.cmp(node, text)
1818
1818
1819 def deltaparent(self, rev):
1819 def deltaparent(self, rev):
1820 return self._revlog.deltaparent(rev)
1820 return self._revlog.deltaparent(rev)
1821
1821
1822 def emitrevisions(
1822 def emitrevisions(
1823 self,
1823 self,
1824 nodes,
1824 nodes,
1825 nodesorder=None,
1825 nodesorder=None,
1826 revisiondata=False,
1826 revisiondata=False,
1827 assumehaveparentrevisions=False,
1827 assumehaveparentrevisions=False,
1828 deltamode=repository.CG_DELTAMODE_STD,
1828 deltamode=repository.CG_DELTAMODE_STD,
1829 sidedata_helpers=None,
1829 ):
1830 ):
1830 return self._revlog.emitrevisions(
1831 return self._revlog.emitrevisions(
1831 nodes,
1832 nodes,
1832 nodesorder=nodesorder,
1833 nodesorder=nodesorder,
1833 revisiondata=revisiondata,
1834 revisiondata=revisiondata,
1834 assumehaveparentrevisions=assumehaveparentrevisions,
1835 assumehaveparentrevisions=assumehaveparentrevisions,
1835 deltamode=deltamode,
1836 deltamode=deltamode,
1837 sidedata_helpers=sidedata_helpers,
1836 )
1838 )
1837
1839
1838 def addgroup(
1840 def addgroup(
1839 self,
1841 self,
1840 deltas,
1842 deltas,
1841 linkmapper,
1843 linkmapper,
1842 transaction,
1844 transaction,
1843 alwayscache=False,
1845 alwayscache=False,
1844 addrevisioncb=None,
1846 addrevisioncb=None,
1845 duplicaterevisioncb=None,
1847 duplicaterevisioncb=None,
1846 ):
1848 ):
1847 return self._revlog.addgroup(
1849 return self._revlog.addgroup(
1848 deltas,
1850 deltas,
1849 linkmapper,
1851 linkmapper,
1850 transaction,
1852 transaction,
1851 alwayscache=alwayscache,
1853 alwayscache=alwayscache,
1852 addrevisioncb=addrevisioncb,
1854 addrevisioncb=addrevisioncb,
1853 duplicaterevisioncb=duplicaterevisioncb,
1855 duplicaterevisioncb=duplicaterevisioncb,
1854 )
1856 )
1855
1857
1856 def rawsize(self, rev):
1858 def rawsize(self, rev):
1857 return self._revlog.rawsize(rev)
1859 return self._revlog.rawsize(rev)
1858
1860
1859 def getstrippoint(self, minlink):
1861 def getstrippoint(self, minlink):
1860 return self._revlog.getstrippoint(minlink)
1862 return self._revlog.getstrippoint(minlink)
1861
1863
1862 def strip(self, minlink, transaction):
1864 def strip(self, minlink, transaction):
1863 return self._revlog.strip(minlink, transaction)
1865 return self._revlog.strip(minlink, transaction)
1864
1866
1865 def files(self):
1867 def files(self):
1866 return self._revlog.files()
1868 return self._revlog.files()
1867
1869
1868 def clone(self, tr, destrevlog, **kwargs):
1870 def clone(self, tr, destrevlog, **kwargs):
1869 if not isinstance(destrevlog, manifestrevlog):
1871 if not isinstance(destrevlog, manifestrevlog):
1870 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1872 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1871
1873
1872 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1874 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1873
1875
1874 def storageinfo(
1876 def storageinfo(
1875 self,
1877 self,
1876 exclusivefiles=False,
1878 exclusivefiles=False,
1877 sharedfiles=False,
1879 sharedfiles=False,
1878 revisionscount=False,
1880 revisionscount=False,
1879 trackedsize=False,
1881 trackedsize=False,
1880 storedsize=False,
1882 storedsize=False,
1881 ):
1883 ):
1882 return self._revlog.storageinfo(
1884 return self._revlog.storageinfo(
1883 exclusivefiles=exclusivefiles,
1885 exclusivefiles=exclusivefiles,
1884 sharedfiles=sharedfiles,
1886 sharedfiles=sharedfiles,
1885 revisionscount=revisionscount,
1887 revisionscount=revisionscount,
1886 trackedsize=trackedsize,
1888 trackedsize=trackedsize,
1887 storedsize=storedsize,
1889 storedsize=storedsize,
1888 )
1890 )
1889
1891
1890 @property
1892 @property
1891 def indexfile(self):
1893 def indexfile(self):
1892 return self._revlog.indexfile
1894 return self._revlog.indexfile
1893
1895
1894 @indexfile.setter
1896 @indexfile.setter
1895 def indexfile(self, value):
1897 def indexfile(self, value):
1896 self._revlog.indexfile = value
1898 self._revlog.indexfile = value
1897
1899
1898 @property
1900 @property
1899 def opener(self):
1901 def opener(self):
1900 return self._revlog.opener
1902 return self._revlog.opener
1901
1903
1902 @opener.setter
1904 @opener.setter
1903 def opener(self, value):
1905 def opener(self, value):
1904 self._revlog.opener = value
1906 self._revlog.opener = value
1905
1907
1906
1908
1907 @interfaceutil.implementer(repository.imanifestlog)
1909 @interfaceutil.implementer(repository.imanifestlog)
1908 class manifestlog(object):
1910 class manifestlog(object):
1909 """A collection class representing the collection of manifest snapshots
1911 """A collection class representing the collection of manifest snapshots
1910 referenced by commits in the repository.
1912 referenced by commits in the repository.
1911
1913
1912 In this situation, 'manifest' refers to the abstract concept of a snapshot
1914 In this situation, 'manifest' refers to the abstract concept of a snapshot
1913 of the list of files in the given commit. Consumers of the output of this
1915 of the list of files in the given commit. Consumers of the output of this
1914 class do not care about the implementation details of the actual manifests
1916 class do not care about the implementation details of the actual manifests
1915 they receive (i.e. tree or flat or lazily loaded, etc)."""
1917 they receive (i.e. tree or flat or lazily loaded, etc)."""
1916
1918
1917 def __init__(self, opener, repo, rootstore, narrowmatch):
1919 def __init__(self, opener, repo, rootstore, narrowmatch):
1918 usetreemanifest = False
1920 usetreemanifest = False
1919 cachesize = 4
1921 cachesize = 4
1920
1922
1921 opts = getattr(opener, 'options', None)
1923 opts = getattr(opener, 'options', None)
1922 if opts is not None:
1924 if opts is not None:
1923 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1925 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1924 cachesize = opts.get(b'manifestcachesize', cachesize)
1926 cachesize = opts.get(b'manifestcachesize', cachesize)
1925
1927
1926 self._treemanifests = usetreemanifest
1928 self._treemanifests = usetreemanifest
1927
1929
1928 self._rootstore = rootstore
1930 self._rootstore = rootstore
1929 self._rootstore._setupmanifestcachehooks(repo)
1931 self._rootstore._setupmanifestcachehooks(repo)
1930 self._narrowmatch = narrowmatch
1932 self._narrowmatch = narrowmatch
1931
1933
1932 # A cache of the manifestctx or treemanifestctx for each directory
1934 # A cache of the manifestctx or treemanifestctx for each directory
1933 self._dirmancache = {}
1935 self._dirmancache = {}
1934 self._dirmancache[b''] = util.lrucachedict(cachesize)
1936 self._dirmancache[b''] = util.lrucachedict(cachesize)
1935
1937
1936 self._cachesize = cachesize
1938 self._cachesize = cachesize
1937
1939
1938 def __getitem__(self, node):
1940 def __getitem__(self, node):
1939 """Retrieves the manifest instance for the given node. Throws a
1941 """Retrieves the manifest instance for the given node. Throws a
1940 LookupError if not found.
1942 LookupError if not found.
1941 """
1943 """
1942 return self.get(b'', node)
1944 return self.get(b'', node)
1943
1945
1944 def get(self, tree, node, verify=True):
1946 def get(self, tree, node, verify=True):
1945 """Retrieves the manifest instance for the given node. Throws a
1947 """Retrieves the manifest instance for the given node. Throws a
1946 LookupError if not found.
1948 LookupError if not found.
1947
1949
1948 `verify` - if True an exception will be thrown if the node is not in
1950 `verify` - if True an exception will be thrown if the node is not in
1949 the revlog
1951 the revlog
1950 """
1952 """
1951 if node in self._dirmancache.get(tree, ()):
1953 if node in self._dirmancache.get(tree, ()):
1952 return self._dirmancache[tree][node]
1954 return self._dirmancache[tree][node]
1953
1955
1954 if not self._narrowmatch.always():
1956 if not self._narrowmatch.always():
1955 if not self._narrowmatch.visitdir(tree[:-1]):
1957 if not self._narrowmatch.visitdir(tree[:-1]):
1956 return excludeddirmanifestctx(tree, node)
1958 return excludeddirmanifestctx(tree, node)
1957 if tree:
1959 if tree:
1958 if self._rootstore._treeondisk:
1960 if self._rootstore._treeondisk:
1959 if verify:
1961 if verify:
1960 # Side-effect is LookupError is raised if node doesn't
1962 # Side-effect is LookupError is raised if node doesn't
1961 # exist.
1963 # exist.
1962 self.getstorage(tree).rev(node)
1964 self.getstorage(tree).rev(node)
1963
1965
1964 m = treemanifestctx(self, tree, node)
1966 m = treemanifestctx(self, tree, node)
1965 else:
1967 else:
1966 raise error.Abort(
1968 raise error.Abort(
1967 _(
1969 _(
1968 b"cannot ask for manifest directory '%s' in a flat "
1970 b"cannot ask for manifest directory '%s' in a flat "
1969 b"manifest"
1971 b"manifest"
1970 )
1972 )
1971 % tree
1973 % tree
1972 )
1974 )
1973 else:
1975 else:
1974 if verify:
1976 if verify:
1975 # Side-effect is LookupError is raised if node doesn't exist.
1977 # Side-effect is LookupError is raised if node doesn't exist.
1976 self._rootstore.rev(node)
1978 self._rootstore.rev(node)
1977
1979
1978 if self._treemanifests:
1980 if self._treemanifests:
1979 m = treemanifestctx(self, b'', node)
1981 m = treemanifestctx(self, b'', node)
1980 else:
1982 else:
1981 m = manifestctx(self, node)
1983 m = manifestctx(self, node)
1982
1984
1983 if node != nullid:
1985 if node != nullid:
1984 mancache = self._dirmancache.get(tree)
1986 mancache = self._dirmancache.get(tree)
1985 if not mancache:
1987 if not mancache:
1986 mancache = util.lrucachedict(self._cachesize)
1988 mancache = util.lrucachedict(self._cachesize)
1987 self._dirmancache[tree] = mancache
1989 self._dirmancache[tree] = mancache
1988 mancache[node] = m
1990 mancache[node] = m
1989 return m
1991 return m
1990
1992
1991 def getstorage(self, tree):
1993 def getstorage(self, tree):
1992 return self._rootstore.dirlog(tree)
1994 return self._rootstore.dirlog(tree)
1993
1995
1994 def clearcaches(self, clear_persisted_data=False):
1996 def clearcaches(self, clear_persisted_data=False):
1995 self._dirmancache.clear()
1997 self._dirmancache.clear()
1996 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1998 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1997
1999
1998 def rev(self, node):
2000 def rev(self, node):
1999 return self._rootstore.rev(node)
2001 return self._rootstore.rev(node)
2000
2002
2001 def update_caches(self, transaction):
2003 def update_caches(self, transaction):
2002 return self._rootstore._revlog.update_caches(transaction=transaction)
2004 return self._rootstore._revlog.update_caches(transaction=transaction)
2003
2005
2004
2006
2005 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2007 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2006 class memmanifestctx(object):
2008 class memmanifestctx(object):
2007 def __init__(self, manifestlog):
2009 def __init__(self, manifestlog):
2008 self._manifestlog = manifestlog
2010 self._manifestlog = manifestlog
2009 self._manifestdict = manifestdict()
2011 self._manifestdict = manifestdict()
2010
2012
2011 def _storage(self):
2013 def _storage(self):
2012 return self._manifestlog.getstorage(b'')
2014 return self._manifestlog.getstorage(b'')
2013
2015
2014 def copy(self):
2016 def copy(self):
2015 memmf = memmanifestctx(self._manifestlog)
2017 memmf = memmanifestctx(self._manifestlog)
2016 memmf._manifestdict = self.read().copy()
2018 memmf._manifestdict = self.read().copy()
2017 return memmf
2019 return memmf
2018
2020
2019 def read(self):
2021 def read(self):
2020 return self._manifestdict
2022 return self._manifestdict
2021
2023
2022 def write(self, transaction, link, p1, p2, added, removed, match=None):
2024 def write(self, transaction, link, p1, p2, added, removed, match=None):
2023 return self._storage().add(
2025 return self._storage().add(
2024 self._manifestdict,
2026 self._manifestdict,
2025 transaction,
2027 transaction,
2026 link,
2028 link,
2027 p1,
2029 p1,
2028 p2,
2030 p2,
2029 added,
2031 added,
2030 removed,
2032 removed,
2031 match=match,
2033 match=match,
2032 )
2034 )
2033
2035
2034
2036
2035 @interfaceutil.implementer(repository.imanifestrevisionstored)
2037 @interfaceutil.implementer(repository.imanifestrevisionstored)
2036 class manifestctx(object):
2038 class manifestctx(object):
2037 """A class representing a single revision of a manifest, including its
2039 """A class representing a single revision of a manifest, including its
2038 contents, its parent revs, and its linkrev.
2040 contents, its parent revs, and its linkrev.
2039 """
2041 """
2040
2042
2041 def __init__(self, manifestlog, node):
2043 def __init__(self, manifestlog, node):
2042 self._manifestlog = manifestlog
2044 self._manifestlog = manifestlog
2043 self._data = None
2045 self._data = None
2044
2046
2045 self._node = node
2047 self._node = node
2046
2048
2047 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2049 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2048 # but let's add it later when something needs it and we can load it
2050 # but let's add it later when something needs it and we can load it
2049 # lazily.
2051 # lazily.
2050 # self.p1, self.p2 = store.parents(node)
2052 # self.p1, self.p2 = store.parents(node)
2051 # rev = store.rev(node)
2053 # rev = store.rev(node)
2052 # self.linkrev = store.linkrev(rev)
2054 # self.linkrev = store.linkrev(rev)
2053
2055
2054 def _storage(self):
2056 def _storage(self):
2055 return self._manifestlog.getstorage(b'')
2057 return self._manifestlog.getstorage(b'')
2056
2058
2057 def node(self):
2059 def node(self):
2058 return self._node
2060 return self._node
2059
2061
2060 def copy(self):
2062 def copy(self):
2061 memmf = memmanifestctx(self._manifestlog)
2063 memmf = memmanifestctx(self._manifestlog)
2062 memmf._manifestdict = self.read().copy()
2064 memmf._manifestdict = self.read().copy()
2063 return memmf
2065 return memmf
2064
2066
2065 @propertycache
2067 @propertycache
2066 def parents(self):
2068 def parents(self):
2067 return self._storage().parents(self._node)
2069 return self._storage().parents(self._node)
2068
2070
2069 def read(self):
2071 def read(self):
2070 if self._data is None:
2072 if self._data is None:
2071 if self._node == nullid:
2073 if self._node == nullid:
2072 self._data = manifestdict()
2074 self._data = manifestdict()
2073 else:
2075 else:
2074 store = self._storage()
2076 store = self._storage()
2075 if self._node in store.fulltextcache:
2077 if self._node in store.fulltextcache:
2076 text = pycompat.bytestr(store.fulltextcache[self._node])
2078 text = pycompat.bytestr(store.fulltextcache[self._node])
2077 else:
2079 else:
2078 text = store.revision(self._node)
2080 text = store.revision(self._node)
2079 arraytext = bytearray(text)
2081 arraytext = bytearray(text)
2080 store.fulltextcache[self._node] = arraytext
2082 store.fulltextcache[self._node] = arraytext
2081 self._data = manifestdict(text)
2083 self._data = manifestdict(text)
2082 return self._data
2084 return self._data
2083
2085
2084 def readfast(self, shallow=False):
2086 def readfast(self, shallow=False):
2085 """Calls either readdelta or read, based on which would be less work.
2087 """Calls either readdelta or read, based on which would be less work.
2086 readdelta is called if the delta is against the p1, and therefore can be
2088 readdelta is called if the delta is against the p1, and therefore can be
2087 read quickly.
2089 read quickly.
2088
2090
2089 If `shallow` is True, nothing changes since this is a flat manifest.
2091 If `shallow` is True, nothing changes since this is a flat manifest.
2090 """
2092 """
2091 store = self._storage()
2093 store = self._storage()
2092 r = store.rev(self._node)
2094 r = store.rev(self._node)
2093 deltaparent = store.deltaparent(r)
2095 deltaparent = store.deltaparent(r)
2094 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2096 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2095 return self.readdelta()
2097 return self.readdelta()
2096 return self.read()
2098 return self.read()
2097
2099
2098 def readdelta(self, shallow=False):
2100 def readdelta(self, shallow=False):
2099 """Returns a manifest containing just the entries that are present
2101 """Returns a manifest containing just the entries that are present
2100 in this manifest, but not in its p1 manifest. This is efficient to read
2102 in this manifest, but not in its p1 manifest. This is efficient to read
2101 if the revlog delta is already p1.
2103 if the revlog delta is already p1.
2102
2104
2103 Changing the value of `shallow` has no effect on flat manifests.
2105 Changing the value of `shallow` has no effect on flat manifests.
2104 """
2106 """
2105 store = self._storage()
2107 store = self._storage()
2106 r = store.rev(self._node)
2108 r = store.rev(self._node)
2107 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2109 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2108 return manifestdict(d)
2110 return manifestdict(d)
2109
2111
2110 def find(self, key):
2112 def find(self, key):
2111 return self.read().find(key)
2113 return self.read().find(key)
2112
2114
2113
2115
2114 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2116 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2115 class memtreemanifestctx(object):
2117 class memtreemanifestctx(object):
2116 def __init__(self, manifestlog, dir=b''):
2118 def __init__(self, manifestlog, dir=b''):
2117 self._manifestlog = manifestlog
2119 self._manifestlog = manifestlog
2118 self._dir = dir
2120 self._dir = dir
2119 self._treemanifest = treemanifest()
2121 self._treemanifest = treemanifest()
2120
2122
2121 def _storage(self):
2123 def _storage(self):
2122 return self._manifestlog.getstorage(b'')
2124 return self._manifestlog.getstorage(b'')
2123
2125
2124 def copy(self):
2126 def copy(self):
2125 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2127 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2126 memmf._treemanifest = self._treemanifest.copy()
2128 memmf._treemanifest = self._treemanifest.copy()
2127 return memmf
2129 return memmf
2128
2130
2129 def read(self):
2131 def read(self):
2130 return self._treemanifest
2132 return self._treemanifest
2131
2133
2132 def write(self, transaction, link, p1, p2, added, removed, match=None):
2134 def write(self, transaction, link, p1, p2, added, removed, match=None):
2133 def readtree(dir, node):
2135 def readtree(dir, node):
2134 return self._manifestlog.get(dir, node).read()
2136 return self._manifestlog.get(dir, node).read()
2135
2137
2136 return self._storage().add(
2138 return self._storage().add(
2137 self._treemanifest,
2139 self._treemanifest,
2138 transaction,
2140 transaction,
2139 link,
2141 link,
2140 p1,
2142 p1,
2141 p2,
2143 p2,
2142 added,
2144 added,
2143 removed,
2145 removed,
2144 readtree=readtree,
2146 readtree=readtree,
2145 match=match,
2147 match=match,
2146 )
2148 )
2147
2149
2148
2150
2149 @interfaceutil.implementer(repository.imanifestrevisionstored)
2151 @interfaceutil.implementer(repository.imanifestrevisionstored)
2150 class treemanifestctx(object):
2152 class treemanifestctx(object):
2151 def __init__(self, manifestlog, dir, node):
2153 def __init__(self, manifestlog, dir, node):
2152 self._manifestlog = manifestlog
2154 self._manifestlog = manifestlog
2153 self._dir = dir
2155 self._dir = dir
2154 self._data = None
2156 self._data = None
2155
2157
2156 self._node = node
2158 self._node = node
2157
2159
2158 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2160 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2159 # we can instantiate treemanifestctx objects for directories we don't
2161 # we can instantiate treemanifestctx objects for directories we don't
2160 # have on disk.
2162 # have on disk.
2161 # self.p1, self.p2 = store.parents(node)
2163 # self.p1, self.p2 = store.parents(node)
2162 # rev = store.rev(node)
2164 # rev = store.rev(node)
2163 # self.linkrev = store.linkrev(rev)
2165 # self.linkrev = store.linkrev(rev)
2164
2166
2165 def _storage(self):
2167 def _storage(self):
2166 narrowmatch = self._manifestlog._narrowmatch
2168 narrowmatch = self._manifestlog._narrowmatch
2167 if not narrowmatch.always():
2169 if not narrowmatch.always():
2168 if not narrowmatch.visitdir(self._dir[:-1]):
2170 if not narrowmatch.visitdir(self._dir[:-1]):
2169 return excludedmanifestrevlog(self._dir)
2171 return excludedmanifestrevlog(self._dir)
2170 return self._manifestlog.getstorage(self._dir)
2172 return self._manifestlog.getstorage(self._dir)
2171
2173
2172 def read(self):
2174 def read(self):
2173 if self._data is None:
2175 if self._data is None:
2174 store = self._storage()
2176 store = self._storage()
2175 if self._node == nullid:
2177 if self._node == nullid:
2176 self._data = treemanifest()
2178 self._data = treemanifest()
2177 # TODO accessing non-public API
2179 # TODO accessing non-public API
2178 elif store._treeondisk:
2180 elif store._treeondisk:
2179 m = treemanifest(dir=self._dir)
2181 m = treemanifest(dir=self._dir)
2180
2182
2181 def gettext():
2183 def gettext():
2182 return store.revision(self._node)
2184 return store.revision(self._node)
2183
2185
2184 def readsubtree(dir, subm):
2186 def readsubtree(dir, subm):
2185 # Set verify to False since we need to be able to create
2187 # Set verify to False since we need to be able to create
2186 # subtrees for trees that don't exist on disk.
2188 # subtrees for trees that don't exist on disk.
2187 return self._manifestlog.get(dir, subm, verify=False).read()
2189 return self._manifestlog.get(dir, subm, verify=False).read()
2188
2190
2189 m.read(gettext, readsubtree)
2191 m.read(gettext, readsubtree)
2190 m.setnode(self._node)
2192 m.setnode(self._node)
2191 self._data = m
2193 self._data = m
2192 else:
2194 else:
2193 if self._node in store.fulltextcache:
2195 if self._node in store.fulltextcache:
2194 text = pycompat.bytestr(store.fulltextcache[self._node])
2196 text = pycompat.bytestr(store.fulltextcache[self._node])
2195 else:
2197 else:
2196 text = store.revision(self._node)
2198 text = store.revision(self._node)
2197 arraytext = bytearray(text)
2199 arraytext = bytearray(text)
2198 store.fulltextcache[self._node] = arraytext
2200 store.fulltextcache[self._node] = arraytext
2199 self._data = treemanifest(dir=self._dir, text=text)
2201 self._data = treemanifest(dir=self._dir, text=text)
2200
2202
2201 return self._data
2203 return self._data
2202
2204
2203 def node(self):
2205 def node(self):
2204 return self._node
2206 return self._node
2205
2207
2206 def copy(self):
2208 def copy(self):
2207 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2209 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2208 memmf._treemanifest = self.read().copy()
2210 memmf._treemanifest = self.read().copy()
2209 return memmf
2211 return memmf
2210
2212
2211 @propertycache
2213 @propertycache
2212 def parents(self):
2214 def parents(self):
2213 return self._storage().parents(self._node)
2215 return self._storage().parents(self._node)
2214
2216
2215 def readdelta(self, shallow=False):
2217 def readdelta(self, shallow=False):
2216 """Returns a manifest containing just the entries that are present
2218 """Returns a manifest containing just the entries that are present
2217 in this manifest, but not in its p1 manifest. This is efficient to read
2219 in this manifest, but not in its p1 manifest. This is efficient to read
2218 if the revlog delta is already p1.
2220 if the revlog delta is already p1.
2219
2221
2220 If `shallow` is True, this will read the delta for this directory,
2222 If `shallow` is True, this will read the delta for this directory,
2221 without recursively reading subdirectory manifests. Instead, any
2223 without recursively reading subdirectory manifests. Instead, any
2222 subdirectory entry will be reported as it appears in the manifest, i.e.
2224 subdirectory entry will be reported as it appears in the manifest, i.e.
2223 the subdirectory will be reported among files and distinguished only by
2225 the subdirectory will be reported among files and distinguished only by
2224 its 't' flag.
2226 its 't' flag.
2225 """
2227 """
2226 store = self._storage()
2228 store = self._storage()
2227 if shallow:
2229 if shallow:
2228 r = store.rev(self._node)
2230 r = store.rev(self._node)
2229 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2231 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2230 return manifestdict(d)
2232 return manifestdict(d)
2231 else:
2233 else:
2232 # Need to perform a slow delta
2234 # Need to perform a slow delta
2233 r0 = store.deltaparent(store.rev(self._node))
2235 r0 = store.deltaparent(store.rev(self._node))
2234 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2236 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2235 m1 = self.read()
2237 m1 = self.read()
2236 md = treemanifest(dir=self._dir)
2238 md = treemanifest(dir=self._dir)
2237 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2239 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2238 if n1:
2240 if n1:
2239 md[f] = n1
2241 md[f] = n1
2240 if fl1:
2242 if fl1:
2241 md.setflag(f, fl1)
2243 md.setflag(f, fl1)
2242 return md
2244 return md
2243
2245
2244 def readfast(self, shallow=False):
2246 def readfast(self, shallow=False):
2245 """Calls either readdelta or read, based on which would be less work.
2247 """Calls either readdelta or read, based on which would be less work.
2246 readdelta is called if the delta is against the p1, and therefore can be
2248 readdelta is called if the delta is against the p1, and therefore can be
2247 read quickly.
2249 read quickly.
2248
2250
2249 If `shallow` is True, it only returns the entries from this manifest,
2251 If `shallow` is True, it only returns the entries from this manifest,
2250 and not any submanifests.
2252 and not any submanifests.
2251 """
2253 """
2252 store = self._storage()
2254 store = self._storage()
2253 r = store.rev(self._node)
2255 r = store.rev(self._node)
2254 deltaparent = store.deltaparent(r)
2256 deltaparent = store.deltaparent(r)
2255 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2257 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2256 return self.readdelta(shallow=shallow)
2258 return self.readdelta(shallow=shallow)
2257
2259
2258 if shallow:
2260 if shallow:
2259 return manifestdict(store.revision(self._node))
2261 return manifestdict(store.revision(self._node))
2260 else:
2262 else:
2261 return self.read()
2263 return self.read()
2262
2264
2263 def find(self, key):
2265 def find(self, key):
2264 return self.read().find(key)
2266 return self.read().find(key)
2265
2267
2266
2268
2267 class excludeddir(treemanifest):
2269 class excludeddir(treemanifest):
2268 """Stand-in for a directory that is excluded from the repository.
2270 """Stand-in for a directory that is excluded from the repository.
2269
2271
2270 With narrowing active on a repository that uses treemanifests,
2272 With narrowing active on a repository that uses treemanifests,
2271 some of the directory revlogs will be excluded from the resulting
2273 some of the directory revlogs will be excluded from the resulting
2272 clone. This is a huge storage win for clients, but means we need
2274 clone. This is a huge storage win for clients, but means we need
2273 some sort of pseudo-manifest to surface to internals so we can
2275 some sort of pseudo-manifest to surface to internals so we can
2274 detect a merge conflict outside the narrowspec. That's what this
2276 detect a merge conflict outside the narrowspec. That's what this
2275 class is: it stands in for a directory whose node is known, but
2277 class is: it stands in for a directory whose node is known, but
2276 whose contents are unknown.
2278 whose contents are unknown.
2277 """
2279 """
2278
2280
2279 def __init__(self, dir, node):
2281 def __init__(self, dir, node):
2280 super(excludeddir, self).__init__(dir)
2282 super(excludeddir, self).__init__(dir)
2281 self._node = node
2283 self._node = node
2282 # Add an empty file, which will be included by iterators and such,
2284 # Add an empty file, which will be included by iterators and such,
2283 # appearing as the directory itself (i.e. something like "dir/")
2285 # appearing as the directory itself (i.e. something like "dir/")
2284 self._files[b''] = node
2286 self._files[b''] = node
2285 self._flags[b''] = b't'
2287 self._flags[b''] = b't'
2286
2288
2287 # Manifests outside the narrowspec should never be modified, so avoid
2289 # Manifests outside the narrowspec should never be modified, so avoid
2288 # copying. This makes a noticeable difference when there are very many
2290 # copying. This makes a noticeable difference when there are very many
2289 # directories outside the narrowspec. Also, it makes sense for the copy to
2291 # directories outside the narrowspec. Also, it makes sense for the copy to
2290 # be of the same type as the original, which would not happen with the
2292 # be of the same type as the original, which would not happen with the
2291 # super type's copy().
2293 # super type's copy().
2292 def copy(self):
2294 def copy(self):
2293 return self
2295 return self
2294
2296
2295
2297
2296 class excludeddirmanifestctx(treemanifestctx):
2298 class excludeddirmanifestctx(treemanifestctx):
2297 """context wrapper for excludeddir - see that docstring for rationale"""
2299 """context wrapper for excludeddir - see that docstring for rationale"""
2298
2300
2299 def __init__(self, dir, node):
2301 def __init__(self, dir, node):
2300 self._dir = dir
2302 self._dir = dir
2301 self._node = node
2303 self._node = node
2302
2304
2303 def read(self):
2305 def read(self):
2304 return excludeddir(self._dir, self._node)
2306 return excludeddir(self._dir, self._node)
2305
2307
2306 def readfast(self, shallow=False):
2308 def readfast(self, shallow=False):
2307 # special version of readfast since we don't have underlying storage
2309 # special version of readfast since we don't have underlying storage
2308 return self.read()
2310 return self.read()
2309
2311
2310 def write(self, *args):
2312 def write(self, *args):
2311 raise error.ProgrammingError(
2313 raise error.ProgrammingError(
2312 b'attempt to write manifest from excluded dir %s' % self._dir
2314 b'attempt to write manifest from excluded dir %s' % self._dir
2313 )
2315 )
2314
2316
2315
2317
2316 class excludedmanifestrevlog(manifestrevlog):
2318 class excludedmanifestrevlog(manifestrevlog):
2317 """Stand-in for excluded treemanifest revlogs.
2319 """Stand-in for excluded treemanifest revlogs.
2318
2320
2319 When narrowing is active on a treemanifest repository, we'll have
2321 When narrowing is active on a treemanifest repository, we'll have
2320 references to directories we can't see due to the revlog being
2322 references to directories we can't see due to the revlog being
2321 skipped. This class exists to conform to the manifestrevlog
2323 skipped. This class exists to conform to the manifestrevlog
2322 interface for those directories and proactively prevent writes to
2324 interface for those directories and proactively prevent writes to
2323 outside the narrowspec.
2325 outside the narrowspec.
2324 """
2326 """
2325
2327
2326 def __init__(self, dir):
2328 def __init__(self, dir):
2327 self._dir = dir
2329 self._dir = dir
2328
2330
2329 def __len__(self):
2331 def __len__(self):
2330 raise error.ProgrammingError(
2332 raise error.ProgrammingError(
2331 b'attempt to get length of excluded dir %s' % self._dir
2333 b'attempt to get length of excluded dir %s' % self._dir
2332 )
2334 )
2333
2335
2334 def rev(self, node):
2336 def rev(self, node):
2335 raise error.ProgrammingError(
2337 raise error.ProgrammingError(
2336 b'attempt to get rev from excluded dir %s' % self._dir
2338 b'attempt to get rev from excluded dir %s' % self._dir
2337 )
2339 )
2338
2340
2339 def linkrev(self, node):
2341 def linkrev(self, node):
2340 raise error.ProgrammingError(
2342 raise error.ProgrammingError(
2341 b'attempt to get linkrev from excluded dir %s' % self._dir
2343 b'attempt to get linkrev from excluded dir %s' % self._dir
2342 )
2344 )
2343
2345
2344 def node(self, rev):
2346 def node(self, rev):
2345 raise error.ProgrammingError(
2347 raise error.ProgrammingError(
2346 b'attempt to get node from excluded dir %s' % self._dir
2348 b'attempt to get node from excluded dir %s' % self._dir
2347 )
2349 )
2348
2350
2349 def add(self, *args, **kwargs):
2351 def add(self, *args, **kwargs):
2350 # We should never write entries in dirlogs outside the narrow clone.
2352 # We should never write entries in dirlogs outside the narrow clone.
2351 # However, the method still gets called from writesubtree() in
2353 # However, the method still gets called from writesubtree() in
2352 # _addtree(), so we need to handle it. We should possibly make that
2354 # _addtree(), so we need to handle it. We should possibly make that
2353 # avoid calling add() with a clean manifest (_dirty is always False
2355 # avoid calling add() with a clean manifest (_dirty is always False
2354 # in excludeddir instances).
2356 # in excludeddir instances).
2355 pass
2357 pass
@@ -1,3201 +1,3203 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_HASCOPIESINFO,
56 REVIDX_HASCOPIESINFO,
57 REVIDX_ISCENSORED,
57 REVIDX_ISCENSORED,
58 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 REVIDX_SIDEDATA,
59 REVIDX_SIDEDATA,
60 )
60 )
61 from .thirdparty import attr
61 from .thirdparty import attr
62 from . import (
62 from . import (
63 ancestor,
63 ancestor,
64 dagop,
64 dagop,
65 error,
65 error,
66 mdiff,
66 mdiff,
67 policy,
67 policy,
68 pycompat,
68 pycompat,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .interfaces import (
72 from .interfaces import (
73 repository,
73 repository,
74 util as interfaceutil,
74 util as interfaceutil,
75 )
75 )
76 from .revlogutils import (
76 from .revlogutils import (
77 deltas as deltautil,
77 deltas as deltautil,
78 flagutil,
78 flagutil,
79 nodemap as nodemaputil,
79 nodemap as nodemaputil,
80 sidedata as sidedatautil,
80 sidedata as sidedatautil,
81 )
81 )
82 from .utils import (
82 from .utils import (
83 storageutil,
83 storageutil,
84 stringutil,
84 stringutil,
85 )
85 )
86 from .pure import parsers as pureparsers
86 from .pure import parsers as pureparsers
87
87
88 # blanked usage of all the name to prevent pyflakes constraints
88 # blanked usage of all the name to prevent pyflakes constraints
89 # We need these name available in the module for extensions.
89 # We need these name available in the module for extensions.
90 REVLOGV0
90 REVLOGV0
91 REVLOGV1
91 REVLOGV1
92 REVLOGV2
92 REVLOGV2
93 FLAG_INLINE_DATA
93 FLAG_INLINE_DATA
94 FLAG_GENERALDELTA
94 FLAG_GENERALDELTA
95 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FLAGS
96 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_FORMAT
97 REVLOG_DEFAULT_VERSION
97 REVLOG_DEFAULT_VERSION
98 REVLOGV1_FLAGS
98 REVLOGV1_FLAGS
99 REVLOGV2_FLAGS
99 REVLOGV2_FLAGS
100 REVIDX_ISCENSORED
100 REVIDX_ISCENSORED
101 REVIDX_ELLIPSIS
101 REVIDX_ELLIPSIS
102 REVIDX_SIDEDATA
102 REVIDX_SIDEDATA
103 REVIDX_HASCOPIESINFO
103 REVIDX_HASCOPIESINFO
104 REVIDX_EXTSTORED
104 REVIDX_EXTSTORED
105 REVIDX_DEFAULT_FLAGS
105 REVIDX_DEFAULT_FLAGS
106 REVIDX_FLAGS_ORDER
106 REVIDX_FLAGS_ORDER
107 REVIDX_RAWTEXT_CHANGING_FLAGS
107 REVIDX_RAWTEXT_CHANGING_FLAGS
108
108
109 parsers = policy.importmod('parsers')
109 parsers = policy.importmod('parsers')
110 rustancestor = policy.importrust('ancestor')
110 rustancestor = policy.importrust('ancestor')
111 rustdagop = policy.importrust('dagop')
111 rustdagop = policy.importrust('dagop')
112 rustrevlog = policy.importrust('revlog')
112 rustrevlog = policy.importrust('revlog')
113
113
114 # Aliased for performance.
114 # Aliased for performance.
115 _zlibdecompress = zlib.decompress
115 _zlibdecompress = zlib.decompress
116
116
117 # max size of revlog with inline data
117 # max size of revlog with inline data
118 _maxinline = 131072
118 _maxinline = 131072
119 _chunksize = 1048576
119 _chunksize = 1048576
120
120
121 # Flag processors for REVIDX_ELLIPSIS.
121 # Flag processors for REVIDX_ELLIPSIS.
122 def ellipsisreadprocessor(rl, text):
122 def ellipsisreadprocessor(rl, text):
123 return text, False
123 return text, False
124
124
125
125
126 def ellipsiswriteprocessor(rl, text):
126 def ellipsiswriteprocessor(rl, text):
127 return text, False
127 return text, False
128
128
129
129
130 def ellipsisrawprocessor(rl, text):
130 def ellipsisrawprocessor(rl, text):
131 return False
131 return False
132
132
133
133
134 ellipsisprocessor = (
134 ellipsisprocessor = (
135 ellipsisreadprocessor,
135 ellipsisreadprocessor,
136 ellipsiswriteprocessor,
136 ellipsiswriteprocessor,
137 ellipsisrawprocessor,
137 ellipsisrawprocessor,
138 )
138 )
139
139
140
140
141 def getoffset(q):
141 def getoffset(q):
142 return int(q >> 16)
142 return int(q >> 16)
143
143
144
144
145 def gettype(q):
145 def gettype(q):
146 return int(q & 0xFFFF)
146 return int(q & 0xFFFF)
147
147
148
148
149 def offset_type(offset, type):
149 def offset_type(offset, type):
150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
151 raise ValueError(b'unknown revlog index flags')
151 raise ValueError(b'unknown revlog index flags')
152 return int(int(offset) << 16 | type)
152 return int(int(offset) << 16 | type)
153
153
154
154
155 def _verify_revision(rl, skipflags, state, node):
155 def _verify_revision(rl, skipflags, state, node):
156 """Verify the integrity of the given revlog ``node`` while providing a hook
156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 point for extensions to influence the operation."""
157 point for extensions to influence the operation."""
158 if skipflags:
158 if skipflags:
159 state[b'skipread'].add(node)
159 state[b'skipread'].add(node)
160 else:
160 else:
161 # Side-effect: read content and verify hash.
161 # Side-effect: read content and verify hash.
162 rl.revision(node)
162 rl.revision(node)
163
163
164
164
165 # True if a fast implementation for persistent-nodemap is available
165 # True if a fast implementation for persistent-nodemap is available
166 #
166 #
167 # We also consider we have a "fast" implementation in "pure" python because
167 # We also consider we have a "fast" implementation in "pure" python because
168 # people using pure don't really have performance consideration (and a
168 # people using pure don't really have performance consideration (and a
169 # wheelbarrow of other slowness source)
169 # wheelbarrow of other slowness source)
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 parsers, 'BaseIndexObject'
171 parsers, 'BaseIndexObject'
172 )
172 )
173
173
174
174
175 @attr.s(slots=True, frozen=True)
175 @attr.s(slots=True, frozen=True)
176 class _revisioninfo(object):
176 class _revisioninfo(object):
177 """Information about a revision that allows building its fulltext
177 """Information about a revision that allows building its fulltext
178 node: expected hash of the revision
178 node: expected hash of the revision
179 p1, p2: parent revs of the revision
179 p1, p2: parent revs of the revision
180 btext: built text cache consisting of a one-element list
180 btext: built text cache consisting of a one-element list
181 cachedelta: (baserev, uncompressed_delta) or None
181 cachedelta: (baserev, uncompressed_delta) or None
182 flags: flags associated to the revision storage
182 flags: flags associated to the revision storage
183
183
184 One of btext[0] or cachedelta must be set.
184 One of btext[0] or cachedelta must be set.
185 """
185 """
186
186
187 node = attr.ib()
187 node = attr.ib()
188 p1 = attr.ib()
188 p1 = attr.ib()
189 p2 = attr.ib()
189 p2 = attr.ib()
190 btext = attr.ib()
190 btext = attr.ib()
191 textlen = attr.ib()
191 textlen = attr.ib()
192 cachedelta = attr.ib()
192 cachedelta = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194
194
195
195
196 @interfaceutil.implementer(repository.irevisiondelta)
196 @interfaceutil.implementer(repository.irevisiondelta)
197 @attr.s(slots=True)
197 @attr.s(slots=True)
198 class revlogrevisiondelta(object):
198 class revlogrevisiondelta(object):
199 node = attr.ib()
199 node = attr.ib()
200 p1node = attr.ib()
200 p1node = attr.ib()
201 p2node = attr.ib()
201 p2node = attr.ib()
202 basenode = attr.ib()
202 basenode = attr.ib()
203 flags = attr.ib()
203 flags = attr.ib()
204 baserevisionsize = attr.ib()
204 baserevisionsize = attr.ib()
205 revision = attr.ib()
205 revision = attr.ib()
206 delta = attr.ib()
206 delta = attr.ib()
207 sidedata = attr.ib()
207 sidedata = attr.ib()
208 linknode = attr.ib(default=None)
208 linknode = attr.ib(default=None)
209
209
210
210
211 @interfaceutil.implementer(repository.iverifyproblem)
211 @interfaceutil.implementer(repository.iverifyproblem)
212 @attr.s(frozen=True)
212 @attr.s(frozen=True)
213 class revlogproblem(object):
213 class revlogproblem(object):
214 warning = attr.ib(default=None)
214 warning = attr.ib(default=None)
215 error = attr.ib(default=None)
215 error = attr.ib(default=None)
216 node = attr.ib(default=None)
216 node = attr.ib(default=None)
217
217
218
218
219 # index v0:
219 # index v0:
220 # 4 bytes: offset
220 # 4 bytes: offset
221 # 4 bytes: compressed length
221 # 4 bytes: compressed length
222 # 4 bytes: base rev
222 # 4 bytes: base rev
223 # 4 bytes: link rev
223 # 4 bytes: link rev
224 # 20 bytes: parent 1 nodeid
224 # 20 bytes: parent 1 nodeid
225 # 20 bytes: parent 2 nodeid
225 # 20 bytes: parent 2 nodeid
226 # 20 bytes: nodeid
226 # 20 bytes: nodeid
227 indexformatv0 = struct.Struct(b">4l20s20s20s")
227 indexformatv0 = struct.Struct(b">4l20s20s20s")
228 indexformatv0_pack = indexformatv0.pack
228 indexformatv0_pack = indexformatv0.pack
229 indexformatv0_unpack = indexformatv0.unpack
229 indexformatv0_unpack = indexformatv0.unpack
230
230
231
231
232 class revlogoldindex(list):
232 class revlogoldindex(list):
233 @property
233 @property
234 def nodemap(self):
234 def nodemap(self):
235 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
235 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
236 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
236 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
237 return self._nodemap
237 return self._nodemap
238
238
239 @util.propertycache
239 @util.propertycache
240 def _nodemap(self):
240 def _nodemap(self):
241 nodemap = nodemaputil.NodeMap({nullid: nullrev})
241 nodemap = nodemaputil.NodeMap({nullid: nullrev})
242 for r in range(0, len(self)):
242 for r in range(0, len(self)):
243 n = self[r][7]
243 n = self[r][7]
244 nodemap[n] = r
244 nodemap[n] = r
245 return nodemap
245 return nodemap
246
246
247 def has_node(self, node):
247 def has_node(self, node):
248 """return True if the node exist in the index"""
248 """return True if the node exist in the index"""
249 return node in self._nodemap
249 return node in self._nodemap
250
250
251 def rev(self, node):
251 def rev(self, node):
252 """return a revision for a node
252 """return a revision for a node
253
253
254 If the node is unknown, raise a RevlogError"""
254 If the node is unknown, raise a RevlogError"""
255 return self._nodemap[node]
255 return self._nodemap[node]
256
256
257 def get_rev(self, node):
257 def get_rev(self, node):
258 """return a revision for a node
258 """return a revision for a node
259
259
260 If the node is unknown, return None"""
260 If the node is unknown, return None"""
261 return self._nodemap.get(node)
261 return self._nodemap.get(node)
262
262
263 def append(self, tup):
263 def append(self, tup):
264 self._nodemap[tup[7]] = len(self)
264 self._nodemap[tup[7]] = len(self)
265 super(revlogoldindex, self).append(tup)
265 super(revlogoldindex, self).append(tup)
266
266
267 def __delitem__(self, i):
267 def __delitem__(self, i):
268 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
268 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
269 raise ValueError(b"deleting slices only supports a:-1 with step 1")
269 raise ValueError(b"deleting slices only supports a:-1 with step 1")
270 for r in pycompat.xrange(i.start, len(self)):
270 for r in pycompat.xrange(i.start, len(self)):
271 del self._nodemap[self[r][7]]
271 del self._nodemap[self[r][7]]
272 super(revlogoldindex, self).__delitem__(i)
272 super(revlogoldindex, self).__delitem__(i)
273
273
274 def clearcaches(self):
274 def clearcaches(self):
275 self.__dict__.pop('_nodemap', None)
275 self.__dict__.pop('_nodemap', None)
276
276
277 def __getitem__(self, i):
277 def __getitem__(self, i):
278 if i == -1:
278 if i == -1:
279 return (0, 0, 0, -1, -1, -1, -1, nullid)
279 return (0, 0, 0, -1, -1, -1, -1, nullid)
280 return list.__getitem__(self, i)
280 return list.__getitem__(self, i)
281
281
282
282
283 class revlogoldio(object):
283 class revlogoldio(object):
284 def __init__(self):
284 def __init__(self):
285 self.size = indexformatv0.size
285 self.size = indexformatv0.size
286
286
287 def parseindex(self, data, inline):
287 def parseindex(self, data, inline):
288 s = self.size
288 s = self.size
289 index = []
289 index = []
290 nodemap = nodemaputil.NodeMap({nullid: nullrev})
290 nodemap = nodemaputil.NodeMap({nullid: nullrev})
291 n = off = 0
291 n = off = 0
292 l = len(data)
292 l = len(data)
293 while off + s <= l:
293 while off + s <= l:
294 cur = data[off : off + s]
294 cur = data[off : off + s]
295 off += s
295 off += s
296 e = indexformatv0_unpack(cur)
296 e = indexformatv0_unpack(cur)
297 # transform to revlogv1 format
297 # transform to revlogv1 format
298 e2 = (
298 e2 = (
299 offset_type(e[0], 0),
299 offset_type(e[0], 0),
300 e[1],
300 e[1],
301 -1,
301 -1,
302 e[2],
302 e[2],
303 e[3],
303 e[3],
304 nodemap.get(e[4], nullrev),
304 nodemap.get(e[4], nullrev),
305 nodemap.get(e[5], nullrev),
305 nodemap.get(e[5], nullrev),
306 e[6],
306 e[6],
307 )
307 )
308 index.append(e2)
308 index.append(e2)
309 nodemap[e[6]] = n
309 nodemap[e[6]] = n
310 n += 1
310 n += 1
311
311
312 index = revlogoldindex(index)
312 index = revlogoldindex(index)
313 return index, None
313 return index, None
314
314
315 def packentry(self, entry, node, version, rev):
315 def packentry(self, entry, node, version, rev):
316 if gettype(entry[0]):
316 if gettype(entry[0]):
317 raise error.RevlogError(
317 raise error.RevlogError(
318 _(b'index entry flags need revlog version 1')
318 _(b'index entry flags need revlog version 1')
319 )
319 )
320 e2 = (
320 e2 = (
321 getoffset(entry[0]),
321 getoffset(entry[0]),
322 entry[1],
322 entry[1],
323 entry[3],
323 entry[3],
324 entry[4],
324 entry[4],
325 node(entry[5]),
325 node(entry[5]),
326 node(entry[6]),
326 node(entry[6]),
327 entry[7],
327 entry[7],
328 )
328 )
329 return indexformatv0_pack(*e2)
329 return indexformatv0_pack(*e2)
330
330
331
331
332 # index ng:
332 # index ng:
333 # 6 bytes: offset
333 # 6 bytes: offset
334 # 2 bytes: flags
334 # 2 bytes: flags
335 # 4 bytes: compressed length
335 # 4 bytes: compressed length
336 # 4 bytes: uncompressed length
336 # 4 bytes: uncompressed length
337 # 4 bytes: base rev
337 # 4 bytes: base rev
338 # 4 bytes: link rev
338 # 4 bytes: link rev
339 # 4 bytes: parent 1 rev
339 # 4 bytes: parent 1 rev
340 # 4 bytes: parent 2 rev
340 # 4 bytes: parent 2 rev
341 # 32 bytes: nodeid
341 # 32 bytes: nodeid
342 indexformatng = struct.Struct(b">Qiiiiii20s12x")
342 indexformatng = struct.Struct(b">Qiiiiii20s12x")
343 indexformatng_pack = indexformatng.pack
343 indexformatng_pack = indexformatng.pack
344 versionformat = struct.Struct(b">I")
344 versionformat = struct.Struct(b">I")
345 versionformat_pack = versionformat.pack
345 versionformat_pack = versionformat.pack
346 versionformat_unpack = versionformat.unpack
346 versionformat_unpack = versionformat.unpack
347
347
348 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
348 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
349 # signed integer)
349 # signed integer)
350 _maxentrysize = 0x7FFFFFFF
350 _maxentrysize = 0x7FFFFFFF
351
351
352
352
353 class revlogio(object):
353 class revlogio(object):
354 def __init__(self):
354 def __init__(self):
355 self.size = indexformatng.size
355 self.size = indexformatng.size
356
356
357 def parseindex(self, data, inline):
357 def parseindex(self, data, inline):
358 # call the C implementation to parse the index data
358 # call the C implementation to parse the index data
359 index, cache = parsers.parse_index2(data, inline)
359 index, cache = parsers.parse_index2(data, inline)
360 return index, cache
360 return index, cache
361
361
362 def packentry(self, entry, node, version, rev):
362 def packentry(self, entry, node, version, rev):
363 p = indexformatng_pack(*entry)
363 p = indexformatng_pack(*entry)
364 if rev == 0:
364 if rev == 0:
365 p = versionformat_pack(version) + p[4:]
365 p = versionformat_pack(version) + p[4:]
366 return p
366 return p
367
367
368
368
369 indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format)
369 indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format)
370 indexformatv2_pack = indexformatv2.pack
370 indexformatv2_pack = indexformatv2.pack
371
371
372
372
373 class revlogv2io(object):
373 class revlogv2io(object):
374 def __init__(self):
374 def __init__(self):
375 self.size = indexformatv2.size
375 self.size = indexformatv2.size
376
376
377 def parseindex(self, data, inline):
377 def parseindex(self, data, inline):
378 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
378 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
379 return index, cache
379 return index, cache
380
380
381 def packentry(self, entry, node, version, rev):
381 def packentry(self, entry, node, version, rev):
382 p = indexformatv2_pack(*entry)
382 p = indexformatv2_pack(*entry)
383 if rev == 0:
383 if rev == 0:
384 p = versionformat_pack(version) + p[4:]
384 p = versionformat_pack(version) + p[4:]
385 return p
385 return p
386
386
387
387
388 NodemapRevlogIO = None
388 NodemapRevlogIO = None
389
389
390 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
390 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
391
391
392 class NodemapRevlogIO(revlogio):
392 class NodemapRevlogIO(revlogio):
393 """A debug oriented IO class that return a PersistentNodeMapIndexObject
393 """A debug oriented IO class that return a PersistentNodeMapIndexObject
394
394
395 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
395 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
396 """
396 """
397
397
398 def parseindex(self, data, inline):
398 def parseindex(self, data, inline):
399 index, cache = parsers.parse_index_devel_nodemap(data, inline)
399 index, cache = parsers.parse_index_devel_nodemap(data, inline)
400 return index, cache
400 return index, cache
401
401
402
402
403 class rustrevlogio(revlogio):
403 class rustrevlogio(revlogio):
404 def parseindex(self, data, inline):
404 def parseindex(self, data, inline):
405 index, cache = super(rustrevlogio, self).parseindex(data, inline)
405 index, cache = super(rustrevlogio, self).parseindex(data, inline)
406 return rustrevlog.MixedIndex(index), cache
406 return rustrevlog.MixedIndex(index), cache
407
407
408
408
409 class revlog(object):
409 class revlog(object):
410 """
410 """
411 the underlying revision storage object
411 the underlying revision storage object
412
412
413 A revlog consists of two parts, an index and the revision data.
413 A revlog consists of two parts, an index and the revision data.
414
414
415 The index is a file with a fixed record size containing
415 The index is a file with a fixed record size containing
416 information on each revision, including its nodeid (hash), the
416 information on each revision, including its nodeid (hash), the
417 nodeids of its parents, the position and offset of its data within
417 nodeids of its parents, the position and offset of its data within
418 the data file, and the revision it's based on. Finally, each entry
418 the data file, and the revision it's based on. Finally, each entry
419 contains a linkrev entry that can serve as a pointer to external
419 contains a linkrev entry that can serve as a pointer to external
420 data.
420 data.
421
421
422 The revision data itself is a linear collection of data chunks.
422 The revision data itself is a linear collection of data chunks.
423 Each chunk represents a revision and is usually represented as a
423 Each chunk represents a revision and is usually represented as a
424 delta against the previous chunk. To bound lookup time, runs of
424 delta against the previous chunk. To bound lookup time, runs of
425 deltas are limited to about 2 times the length of the original
425 deltas are limited to about 2 times the length of the original
426 version data. This makes retrieval of a version proportional to
426 version data. This makes retrieval of a version proportional to
427 its size, or O(1) relative to the number of revisions.
427 its size, or O(1) relative to the number of revisions.
428
428
429 Both pieces of the revlog are written to in an append-only
429 Both pieces of the revlog are written to in an append-only
430 fashion, which means we never need to rewrite a file to insert or
430 fashion, which means we never need to rewrite a file to insert or
431 remove data, and can use some simple techniques to avoid the need
431 remove data, and can use some simple techniques to avoid the need
432 for locking while reading.
432 for locking while reading.
433
433
434 If checkambig, indexfile is opened with checkambig=True at
434 If checkambig, indexfile is opened with checkambig=True at
435 writing, to avoid file stat ambiguity.
435 writing, to avoid file stat ambiguity.
436
436
437 If mmaplargeindex is True, and an mmapindexthreshold is set, the
437 If mmaplargeindex is True, and an mmapindexthreshold is set, the
438 index will be mmapped rather than read if it is larger than the
438 index will be mmapped rather than read if it is larger than the
439 configured threshold.
439 configured threshold.
440
440
441 If censorable is True, the revlog can have censored revisions.
441 If censorable is True, the revlog can have censored revisions.
442
442
443 If `upperboundcomp` is not None, this is the expected maximal gain from
443 If `upperboundcomp` is not None, this is the expected maximal gain from
444 compression for the data content.
444 compression for the data content.
445
445
446 `concurrencychecker` is an optional function that receives 3 arguments: a
446 `concurrencychecker` is an optional function that receives 3 arguments: a
447 file handle, a filename, and an expected position. It should check whether
447 file handle, a filename, and an expected position. It should check whether
448 the current position in the file handle is valid, and log/warn/fail (by
448 the current position in the file handle is valid, and log/warn/fail (by
449 raising).
449 raising).
450 """
450 """
451
451
452 _flagserrorclass = error.RevlogError
452 _flagserrorclass = error.RevlogError
453
453
454 def __init__(
454 def __init__(
455 self,
455 self,
456 opener,
456 opener,
457 indexfile,
457 indexfile,
458 datafile=None,
458 datafile=None,
459 checkambig=False,
459 checkambig=False,
460 mmaplargeindex=False,
460 mmaplargeindex=False,
461 censorable=False,
461 censorable=False,
462 upperboundcomp=None,
462 upperboundcomp=None,
463 persistentnodemap=False,
463 persistentnodemap=False,
464 concurrencychecker=None,
464 concurrencychecker=None,
465 ):
465 ):
466 """
466 """
467 create a revlog object
467 create a revlog object
468
468
469 opener is a function that abstracts the file opening operation
469 opener is a function that abstracts the file opening operation
470 and can be used to implement COW semantics or the like.
470 and can be used to implement COW semantics or the like.
471
471
472 """
472 """
473 self.upperboundcomp = upperboundcomp
473 self.upperboundcomp = upperboundcomp
474 self.indexfile = indexfile
474 self.indexfile = indexfile
475 self.datafile = datafile or (indexfile[:-2] + b".d")
475 self.datafile = datafile or (indexfile[:-2] + b".d")
476 self.nodemap_file = None
476 self.nodemap_file = None
477 if persistentnodemap:
477 if persistentnodemap:
478 self.nodemap_file = nodemaputil.get_nodemap_file(
478 self.nodemap_file = nodemaputil.get_nodemap_file(
479 opener, self.indexfile
479 opener, self.indexfile
480 )
480 )
481
481
482 self.opener = opener
482 self.opener = opener
483 # When True, indexfile is opened with checkambig=True at writing, to
483 # When True, indexfile is opened with checkambig=True at writing, to
484 # avoid file stat ambiguity.
484 # avoid file stat ambiguity.
485 self._checkambig = checkambig
485 self._checkambig = checkambig
486 self._mmaplargeindex = mmaplargeindex
486 self._mmaplargeindex = mmaplargeindex
487 self._censorable = censorable
487 self._censorable = censorable
488 # 3-tuple of (node, rev, text) for a raw revision.
488 # 3-tuple of (node, rev, text) for a raw revision.
489 self._revisioncache = None
489 self._revisioncache = None
490 # Maps rev to chain base rev.
490 # Maps rev to chain base rev.
491 self._chainbasecache = util.lrucachedict(100)
491 self._chainbasecache = util.lrucachedict(100)
492 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
492 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
493 self._chunkcache = (0, b'')
493 self._chunkcache = (0, b'')
494 # How much data to read and cache into the raw revlog data cache.
494 # How much data to read and cache into the raw revlog data cache.
495 self._chunkcachesize = 65536
495 self._chunkcachesize = 65536
496 self._maxchainlen = None
496 self._maxchainlen = None
497 self._deltabothparents = True
497 self._deltabothparents = True
498 self.index = None
498 self.index = None
499 self._nodemap_docket = None
499 self._nodemap_docket = None
500 # Mapping of partial identifiers to full nodes.
500 # Mapping of partial identifiers to full nodes.
501 self._pcache = {}
501 self._pcache = {}
502 # Mapping of revision integer to full node.
502 # Mapping of revision integer to full node.
503 self._compengine = b'zlib'
503 self._compengine = b'zlib'
504 self._compengineopts = {}
504 self._compengineopts = {}
505 self._maxdeltachainspan = -1
505 self._maxdeltachainspan = -1
506 self._withsparseread = False
506 self._withsparseread = False
507 self._sparserevlog = False
507 self._sparserevlog = False
508 self._srdensitythreshold = 0.50
508 self._srdensitythreshold = 0.50
509 self._srmingapsize = 262144
509 self._srmingapsize = 262144
510
510
511 # Make copy of flag processors so each revlog instance can support
511 # Make copy of flag processors so each revlog instance can support
512 # custom flags.
512 # custom flags.
513 self._flagprocessors = dict(flagutil.flagprocessors)
513 self._flagprocessors = dict(flagutil.flagprocessors)
514
514
515 # 2-tuple of file handles being used for active writing.
515 # 2-tuple of file handles being used for active writing.
516 self._writinghandles = None
516 self._writinghandles = None
517
517
518 self._loadindex()
518 self._loadindex()
519
519
520 self._concurrencychecker = concurrencychecker
520 self._concurrencychecker = concurrencychecker
521
521
522 def _loadindex(self):
522 def _loadindex(self):
523 mmapindexthreshold = None
523 mmapindexthreshold = None
524 opts = self.opener.options
524 opts = self.opener.options
525
525
526 if b'revlogv2' in opts:
526 if b'revlogv2' in opts:
527 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
527 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
528 elif b'revlogv1' in opts:
528 elif b'revlogv1' in opts:
529 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
529 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
530 if b'generaldelta' in opts:
530 if b'generaldelta' in opts:
531 newversionflags |= FLAG_GENERALDELTA
531 newversionflags |= FLAG_GENERALDELTA
532 elif b'revlogv0' in self.opener.options:
532 elif b'revlogv0' in self.opener.options:
533 newversionflags = REVLOGV0
533 newversionflags = REVLOGV0
534 else:
534 else:
535 newversionflags = REVLOG_DEFAULT_VERSION
535 newversionflags = REVLOG_DEFAULT_VERSION
536
536
537 if b'chunkcachesize' in opts:
537 if b'chunkcachesize' in opts:
538 self._chunkcachesize = opts[b'chunkcachesize']
538 self._chunkcachesize = opts[b'chunkcachesize']
539 if b'maxchainlen' in opts:
539 if b'maxchainlen' in opts:
540 self._maxchainlen = opts[b'maxchainlen']
540 self._maxchainlen = opts[b'maxchainlen']
541 if b'deltabothparents' in opts:
541 if b'deltabothparents' in opts:
542 self._deltabothparents = opts[b'deltabothparents']
542 self._deltabothparents = opts[b'deltabothparents']
543 self._lazydelta = bool(opts.get(b'lazydelta', True))
543 self._lazydelta = bool(opts.get(b'lazydelta', True))
544 self._lazydeltabase = False
544 self._lazydeltabase = False
545 if self._lazydelta:
545 if self._lazydelta:
546 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
546 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
547 if b'compengine' in opts:
547 if b'compengine' in opts:
548 self._compengine = opts[b'compengine']
548 self._compengine = opts[b'compengine']
549 if b'zlib.level' in opts:
549 if b'zlib.level' in opts:
550 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
550 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
551 if b'zstd.level' in opts:
551 if b'zstd.level' in opts:
552 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
552 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
553 if b'maxdeltachainspan' in opts:
553 if b'maxdeltachainspan' in opts:
554 self._maxdeltachainspan = opts[b'maxdeltachainspan']
554 self._maxdeltachainspan = opts[b'maxdeltachainspan']
555 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
555 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
556 mmapindexthreshold = opts[b'mmapindexthreshold']
556 mmapindexthreshold = opts[b'mmapindexthreshold']
557 self.hassidedata = bool(opts.get(b'side-data', False))
557 self.hassidedata = bool(opts.get(b'side-data', False))
558 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
558 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
559 withsparseread = bool(opts.get(b'with-sparse-read', False))
559 withsparseread = bool(opts.get(b'with-sparse-read', False))
560 # sparse-revlog forces sparse-read
560 # sparse-revlog forces sparse-read
561 self._withsparseread = self._sparserevlog or withsparseread
561 self._withsparseread = self._sparserevlog or withsparseread
562 if b'sparse-read-density-threshold' in opts:
562 if b'sparse-read-density-threshold' in opts:
563 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
563 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
564 if b'sparse-read-min-gap-size' in opts:
564 if b'sparse-read-min-gap-size' in opts:
565 self._srmingapsize = opts[b'sparse-read-min-gap-size']
565 self._srmingapsize = opts[b'sparse-read-min-gap-size']
566 if opts.get(b'enableellipsis'):
566 if opts.get(b'enableellipsis'):
567 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
567 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
568
568
569 # revlog v0 doesn't have flag processors
569 # revlog v0 doesn't have flag processors
570 for flag, processor in pycompat.iteritems(
570 for flag, processor in pycompat.iteritems(
571 opts.get(b'flagprocessors', {})
571 opts.get(b'flagprocessors', {})
572 ):
572 ):
573 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
573 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
574
574
575 if self._chunkcachesize <= 0:
575 if self._chunkcachesize <= 0:
576 raise error.RevlogError(
576 raise error.RevlogError(
577 _(b'revlog chunk cache size %r is not greater than 0')
577 _(b'revlog chunk cache size %r is not greater than 0')
578 % self._chunkcachesize
578 % self._chunkcachesize
579 )
579 )
580 elif self._chunkcachesize & (self._chunkcachesize - 1):
580 elif self._chunkcachesize & (self._chunkcachesize - 1):
581 raise error.RevlogError(
581 raise error.RevlogError(
582 _(b'revlog chunk cache size %r is not a power of 2')
582 _(b'revlog chunk cache size %r is not a power of 2')
583 % self._chunkcachesize
583 % self._chunkcachesize
584 )
584 )
585
585
586 indexdata = b''
586 indexdata = b''
587 self._initempty = True
587 self._initempty = True
588 try:
588 try:
589 with self._indexfp() as f:
589 with self._indexfp() as f:
590 if (
590 if (
591 mmapindexthreshold is not None
591 mmapindexthreshold is not None
592 and self.opener.fstat(f).st_size >= mmapindexthreshold
592 and self.opener.fstat(f).st_size >= mmapindexthreshold
593 ):
593 ):
594 # TODO: should .close() to release resources without
594 # TODO: should .close() to release resources without
595 # relying on Python GC
595 # relying on Python GC
596 indexdata = util.buffer(util.mmapread(f))
596 indexdata = util.buffer(util.mmapread(f))
597 else:
597 else:
598 indexdata = f.read()
598 indexdata = f.read()
599 if len(indexdata) > 0:
599 if len(indexdata) > 0:
600 versionflags = versionformat_unpack(indexdata[:4])[0]
600 versionflags = versionformat_unpack(indexdata[:4])[0]
601 self._initempty = False
601 self._initempty = False
602 else:
602 else:
603 versionflags = newversionflags
603 versionflags = newversionflags
604 except IOError as inst:
604 except IOError as inst:
605 if inst.errno != errno.ENOENT:
605 if inst.errno != errno.ENOENT:
606 raise
606 raise
607
607
608 versionflags = newversionflags
608 versionflags = newversionflags
609
609
610 self.version = versionflags
610 self.version = versionflags
611
611
612 flags = versionflags & ~0xFFFF
612 flags = versionflags & ~0xFFFF
613 fmt = versionflags & 0xFFFF
613 fmt = versionflags & 0xFFFF
614
614
615 if fmt == REVLOGV0:
615 if fmt == REVLOGV0:
616 if flags:
616 if flags:
617 raise error.RevlogError(
617 raise error.RevlogError(
618 _(b'unknown flags (%#04x) in version %d revlog %s')
618 _(b'unknown flags (%#04x) in version %d revlog %s')
619 % (flags >> 16, fmt, self.indexfile)
619 % (flags >> 16, fmt, self.indexfile)
620 )
620 )
621
621
622 self._inline = False
622 self._inline = False
623 self._generaldelta = False
623 self._generaldelta = False
624
624
625 elif fmt == REVLOGV1:
625 elif fmt == REVLOGV1:
626 if flags & ~REVLOGV1_FLAGS:
626 if flags & ~REVLOGV1_FLAGS:
627 raise error.RevlogError(
627 raise error.RevlogError(
628 _(b'unknown flags (%#04x) in version %d revlog %s')
628 _(b'unknown flags (%#04x) in version %d revlog %s')
629 % (flags >> 16, fmt, self.indexfile)
629 % (flags >> 16, fmt, self.indexfile)
630 )
630 )
631
631
632 self._inline = versionflags & FLAG_INLINE_DATA
632 self._inline = versionflags & FLAG_INLINE_DATA
633 self._generaldelta = versionflags & FLAG_GENERALDELTA
633 self._generaldelta = versionflags & FLAG_GENERALDELTA
634
634
635 elif fmt == REVLOGV2:
635 elif fmt == REVLOGV2:
636 if flags & ~REVLOGV2_FLAGS:
636 if flags & ~REVLOGV2_FLAGS:
637 raise error.RevlogError(
637 raise error.RevlogError(
638 _(b'unknown flags (%#04x) in version %d revlog %s')
638 _(b'unknown flags (%#04x) in version %d revlog %s')
639 % (flags >> 16, fmt, self.indexfile)
639 % (flags >> 16, fmt, self.indexfile)
640 )
640 )
641
641
642 self._inline = versionflags & FLAG_INLINE_DATA
642 self._inline = versionflags & FLAG_INLINE_DATA
643 # generaldelta implied by version 2 revlogs.
643 # generaldelta implied by version 2 revlogs.
644 self._generaldelta = True
644 self._generaldelta = True
645
645
646 else:
646 else:
647 raise error.RevlogError(
647 raise error.RevlogError(
648 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
648 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
649 )
649 )
650 # sparse-revlog can't be on without general-delta (issue6056)
650 # sparse-revlog can't be on without general-delta (issue6056)
651 if not self._generaldelta:
651 if not self._generaldelta:
652 self._sparserevlog = False
652 self._sparserevlog = False
653
653
654 self._storedeltachains = True
654 self._storedeltachains = True
655
655
656 devel_nodemap = (
656 devel_nodemap = (
657 self.nodemap_file
657 self.nodemap_file
658 and opts.get(b'devel-force-nodemap', False)
658 and opts.get(b'devel-force-nodemap', False)
659 and NodemapRevlogIO is not None
659 and NodemapRevlogIO is not None
660 )
660 )
661
661
662 use_rust_index = False
662 use_rust_index = False
663 if rustrevlog is not None:
663 if rustrevlog is not None:
664 if self.nodemap_file is not None:
664 if self.nodemap_file is not None:
665 use_rust_index = True
665 use_rust_index = True
666 else:
666 else:
667 use_rust_index = self.opener.options.get(b'rust.index')
667 use_rust_index = self.opener.options.get(b'rust.index')
668
668
669 self._io = revlogio()
669 self._io = revlogio()
670 if self.version == REVLOGV0:
670 if self.version == REVLOGV0:
671 self._io = revlogoldio()
671 self._io = revlogoldio()
672 elif fmt == REVLOGV2:
672 elif fmt == REVLOGV2:
673 self._io = revlogv2io()
673 self._io = revlogv2io()
674 elif devel_nodemap:
674 elif devel_nodemap:
675 self._io = NodemapRevlogIO()
675 self._io = NodemapRevlogIO()
676 elif use_rust_index:
676 elif use_rust_index:
677 self._io = rustrevlogio()
677 self._io = rustrevlogio()
678 try:
678 try:
679 d = self._io.parseindex(indexdata, self._inline)
679 d = self._io.parseindex(indexdata, self._inline)
680 index, _chunkcache = d
680 index, _chunkcache = d
681 use_nodemap = (
681 use_nodemap = (
682 not self._inline
682 not self._inline
683 and self.nodemap_file is not None
683 and self.nodemap_file is not None
684 and util.safehasattr(index, 'update_nodemap_data')
684 and util.safehasattr(index, 'update_nodemap_data')
685 )
685 )
686 if use_nodemap:
686 if use_nodemap:
687 nodemap_data = nodemaputil.persisted_data(self)
687 nodemap_data = nodemaputil.persisted_data(self)
688 if nodemap_data is not None:
688 if nodemap_data is not None:
689 docket = nodemap_data[0]
689 docket = nodemap_data[0]
690 if (
690 if (
691 len(d[0]) > docket.tip_rev
691 len(d[0]) > docket.tip_rev
692 and d[0][docket.tip_rev][7] == docket.tip_node
692 and d[0][docket.tip_rev][7] == docket.tip_node
693 ):
693 ):
694 # no changelog tampering
694 # no changelog tampering
695 self._nodemap_docket = docket
695 self._nodemap_docket = docket
696 index.update_nodemap_data(*nodemap_data)
696 index.update_nodemap_data(*nodemap_data)
697 except (ValueError, IndexError):
697 except (ValueError, IndexError):
698 raise error.RevlogError(
698 raise error.RevlogError(
699 _(b"index %s is corrupted") % self.indexfile
699 _(b"index %s is corrupted") % self.indexfile
700 )
700 )
701 self.index, self._chunkcache = d
701 self.index, self._chunkcache = d
702 if not self._chunkcache:
702 if not self._chunkcache:
703 self._chunkclear()
703 self._chunkclear()
704 # revnum -> (chain-length, sum-delta-length)
704 # revnum -> (chain-length, sum-delta-length)
705 self._chaininfocache = util.lrucachedict(500)
705 self._chaininfocache = util.lrucachedict(500)
706 # revlog header -> revlog compressor
706 # revlog header -> revlog compressor
707 self._decompressors = {}
707 self._decompressors = {}
708
708
709 @util.propertycache
709 @util.propertycache
710 def _compressor(self):
710 def _compressor(self):
711 engine = util.compengines[self._compengine]
711 engine = util.compengines[self._compengine]
712 return engine.revlogcompressor(self._compengineopts)
712 return engine.revlogcompressor(self._compengineopts)
713
713
714 def _indexfp(self, mode=b'r'):
714 def _indexfp(self, mode=b'r'):
715 """file object for the revlog's index file"""
715 """file object for the revlog's index file"""
716 args = {'mode': mode}
716 args = {'mode': mode}
717 if mode != b'r':
717 if mode != b'r':
718 args['checkambig'] = self._checkambig
718 args['checkambig'] = self._checkambig
719 if mode == b'w':
719 if mode == b'w':
720 args['atomictemp'] = True
720 args['atomictemp'] = True
721 return self.opener(self.indexfile, **args)
721 return self.opener(self.indexfile, **args)
722
722
723 def _datafp(self, mode=b'r'):
723 def _datafp(self, mode=b'r'):
724 """file object for the revlog's data file"""
724 """file object for the revlog's data file"""
725 return self.opener(self.datafile, mode=mode)
725 return self.opener(self.datafile, mode=mode)
726
726
727 @contextlib.contextmanager
727 @contextlib.contextmanager
728 def _datareadfp(self, existingfp=None):
728 def _datareadfp(self, existingfp=None):
729 """file object suitable to read data"""
729 """file object suitable to read data"""
730 # Use explicit file handle, if given.
730 # Use explicit file handle, if given.
731 if existingfp is not None:
731 if existingfp is not None:
732 yield existingfp
732 yield existingfp
733
733
734 # Use a file handle being actively used for writes, if available.
734 # Use a file handle being actively used for writes, if available.
735 # There is some danger to doing this because reads will seek the
735 # There is some danger to doing this because reads will seek the
736 # file. However, _writeentry() performs a SEEK_END before all writes,
736 # file. However, _writeentry() performs a SEEK_END before all writes,
737 # so we should be safe.
737 # so we should be safe.
738 elif self._writinghandles:
738 elif self._writinghandles:
739 if self._inline:
739 if self._inline:
740 yield self._writinghandles[0]
740 yield self._writinghandles[0]
741 else:
741 else:
742 yield self._writinghandles[1]
742 yield self._writinghandles[1]
743
743
744 # Otherwise open a new file handle.
744 # Otherwise open a new file handle.
745 else:
745 else:
746 if self._inline:
746 if self._inline:
747 func = self._indexfp
747 func = self._indexfp
748 else:
748 else:
749 func = self._datafp
749 func = self._datafp
750 with func() as fp:
750 with func() as fp:
751 yield fp
751 yield fp
752
752
753 def tiprev(self):
753 def tiprev(self):
754 return len(self.index) - 1
754 return len(self.index) - 1
755
755
756 def tip(self):
756 def tip(self):
757 return self.node(self.tiprev())
757 return self.node(self.tiprev())
758
758
759 def __contains__(self, rev):
759 def __contains__(self, rev):
760 return 0 <= rev < len(self)
760 return 0 <= rev < len(self)
761
761
762 def __len__(self):
762 def __len__(self):
763 return len(self.index)
763 return len(self.index)
764
764
765 def __iter__(self):
765 def __iter__(self):
766 return iter(pycompat.xrange(len(self)))
766 return iter(pycompat.xrange(len(self)))
767
767
768 def revs(self, start=0, stop=None):
768 def revs(self, start=0, stop=None):
769 """iterate over all rev in this revlog (from start to stop)"""
769 """iterate over all rev in this revlog (from start to stop)"""
770 return storageutil.iterrevs(len(self), start=start, stop=stop)
770 return storageutil.iterrevs(len(self), start=start, stop=stop)
771
771
772 @property
772 @property
773 def nodemap(self):
773 def nodemap(self):
774 msg = (
774 msg = (
775 b"revlog.nodemap is deprecated, "
775 b"revlog.nodemap is deprecated, "
776 b"use revlog.index.[has_node|rev|get_rev]"
776 b"use revlog.index.[has_node|rev|get_rev]"
777 )
777 )
778 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
778 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
779 return self.index.nodemap
779 return self.index.nodemap
780
780
781 @property
781 @property
782 def _nodecache(self):
782 def _nodecache(self):
783 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
783 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
784 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
784 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
785 return self.index.nodemap
785 return self.index.nodemap
786
786
787 def hasnode(self, node):
787 def hasnode(self, node):
788 try:
788 try:
789 self.rev(node)
789 self.rev(node)
790 return True
790 return True
791 except KeyError:
791 except KeyError:
792 return False
792 return False
793
793
794 def candelta(self, baserev, rev):
794 def candelta(self, baserev, rev):
795 """whether two revisions (baserev, rev) can be delta-ed or not"""
795 """whether two revisions (baserev, rev) can be delta-ed or not"""
796 # Disable delta if either rev requires a content-changing flag
796 # Disable delta if either rev requires a content-changing flag
797 # processor (ex. LFS). This is because such flag processor can alter
797 # processor (ex. LFS). This is because such flag processor can alter
798 # the rawtext content that the delta will be based on, and two clients
798 # the rawtext content that the delta will be based on, and two clients
799 # could have a same revlog node with different flags (i.e. different
799 # could have a same revlog node with different flags (i.e. different
800 # rawtext contents) and the delta could be incompatible.
800 # rawtext contents) and the delta could be incompatible.
801 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
801 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
802 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
802 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
803 ):
803 ):
804 return False
804 return False
805 return True
805 return True
806
806
807 def update_caches(self, transaction):
807 def update_caches(self, transaction):
808 if self.nodemap_file is not None:
808 if self.nodemap_file is not None:
809 if transaction is None:
809 if transaction is None:
810 nodemaputil.update_persistent_nodemap(self)
810 nodemaputil.update_persistent_nodemap(self)
811 else:
811 else:
812 nodemaputil.setup_persistent_nodemap(transaction, self)
812 nodemaputil.setup_persistent_nodemap(transaction, self)
813
813
814 def clearcaches(self):
814 def clearcaches(self):
815 self._revisioncache = None
815 self._revisioncache = None
816 self._chainbasecache.clear()
816 self._chainbasecache.clear()
817 self._chunkcache = (0, b'')
817 self._chunkcache = (0, b'')
818 self._pcache = {}
818 self._pcache = {}
819 self._nodemap_docket = None
819 self._nodemap_docket = None
820 self.index.clearcaches()
820 self.index.clearcaches()
821 # The python code is the one responsible for validating the docket, we
821 # The python code is the one responsible for validating the docket, we
822 # end up having to refresh it here.
822 # end up having to refresh it here.
823 use_nodemap = (
823 use_nodemap = (
824 not self._inline
824 not self._inline
825 and self.nodemap_file is not None
825 and self.nodemap_file is not None
826 and util.safehasattr(self.index, 'update_nodemap_data')
826 and util.safehasattr(self.index, 'update_nodemap_data')
827 )
827 )
828 if use_nodemap:
828 if use_nodemap:
829 nodemap_data = nodemaputil.persisted_data(self)
829 nodemap_data = nodemaputil.persisted_data(self)
830 if nodemap_data is not None:
830 if nodemap_data is not None:
831 self._nodemap_docket = nodemap_data[0]
831 self._nodemap_docket = nodemap_data[0]
832 self.index.update_nodemap_data(*nodemap_data)
832 self.index.update_nodemap_data(*nodemap_data)
833
833
834 def rev(self, node):
834 def rev(self, node):
835 try:
835 try:
836 return self.index.rev(node)
836 return self.index.rev(node)
837 except TypeError:
837 except TypeError:
838 raise
838 raise
839 except error.RevlogError:
839 except error.RevlogError:
840 # parsers.c radix tree lookup failed
840 # parsers.c radix tree lookup failed
841 if node == wdirid or node in wdirfilenodeids:
841 if node == wdirid or node in wdirfilenodeids:
842 raise error.WdirUnsupported
842 raise error.WdirUnsupported
843 raise error.LookupError(node, self.indexfile, _(b'no node'))
843 raise error.LookupError(node, self.indexfile, _(b'no node'))
844
844
845 # Accessors for index entries.
845 # Accessors for index entries.
846
846
847 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
847 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
848 # are flags.
848 # are flags.
849 def start(self, rev):
849 def start(self, rev):
850 return int(self.index[rev][0] >> 16)
850 return int(self.index[rev][0] >> 16)
851
851
852 def flags(self, rev):
852 def flags(self, rev):
853 return self.index[rev][0] & 0xFFFF
853 return self.index[rev][0] & 0xFFFF
854
854
855 def length(self, rev):
855 def length(self, rev):
856 return self.index[rev][1]
856 return self.index[rev][1]
857
857
858 def sidedata_length(self, rev):
858 def sidedata_length(self, rev):
859 if self.version & 0xFFFF != REVLOGV2:
859 if self.version & 0xFFFF != REVLOGV2:
860 return 0
860 return 0
861 return self.index[rev][9]
861 return self.index[rev][9]
862
862
863 def rawsize(self, rev):
863 def rawsize(self, rev):
864 """return the length of the uncompressed text for a given revision"""
864 """return the length of the uncompressed text for a given revision"""
865 l = self.index[rev][2]
865 l = self.index[rev][2]
866 if l >= 0:
866 if l >= 0:
867 return l
867 return l
868
868
869 t = self.rawdata(rev)
869 t = self.rawdata(rev)
870 return len(t)
870 return len(t)
871
871
872 def size(self, rev):
872 def size(self, rev):
873 """length of non-raw text (processed by a "read" flag processor)"""
873 """length of non-raw text (processed by a "read" flag processor)"""
874 # fast path: if no "read" flag processor could change the content,
874 # fast path: if no "read" flag processor could change the content,
875 # size is rawsize. note: ELLIPSIS is known to not change the content.
875 # size is rawsize. note: ELLIPSIS is known to not change the content.
876 flags = self.flags(rev)
876 flags = self.flags(rev)
877 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
877 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
878 return self.rawsize(rev)
878 return self.rawsize(rev)
879
879
880 return len(self.revision(rev, raw=False))
880 return len(self.revision(rev, raw=False))
881
881
882 def chainbase(self, rev):
882 def chainbase(self, rev):
883 base = self._chainbasecache.get(rev)
883 base = self._chainbasecache.get(rev)
884 if base is not None:
884 if base is not None:
885 return base
885 return base
886
886
887 index = self.index
887 index = self.index
888 iterrev = rev
888 iterrev = rev
889 base = index[iterrev][3]
889 base = index[iterrev][3]
890 while base != iterrev:
890 while base != iterrev:
891 iterrev = base
891 iterrev = base
892 base = index[iterrev][3]
892 base = index[iterrev][3]
893
893
894 self._chainbasecache[rev] = base
894 self._chainbasecache[rev] = base
895 return base
895 return base
896
896
897 def linkrev(self, rev):
897 def linkrev(self, rev):
898 return self.index[rev][4]
898 return self.index[rev][4]
899
899
900 def parentrevs(self, rev):
900 def parentrevs(self, rev):
901 try:
901 try:
902 entry = self.index[rev]
902 entry = self.index[rev]
903 except IndexError:
903 except IndexError:
904 if rev == wdirrev:
904 if rev == wdirrev:
905 raise error.WdirUnsupported
905 raise error.WdirUnsupported
906 raise
906 raise
907
907
908 return entry[5], entry[6]
908 return entry[5], entry[6]
909
909
910 # fast parentrevs(rev) where rev isn't filtered
910 # fast parentrevs(rev) where rev isn't filtered
911 _uncheckedparentrevs = parentrevs
911 _uncheckedparentrevs = parentrevs
912
912
913 def node(self, rev):
913 def node(self, rev):
914 try:
914 try:
915 return self.index[rev][7]
915 return self.index[rev][7]
916 except IndexError:
916 except IndexError:
917 if rev == wdirrev:
917 if rev == wdirrev:
918 raise error.WdirUnsupported
918 raise error.WdirUnsupported
919 raise
919 raise
920
920
921 # Derived from index values.
921 # Derived from index values.
922
922
923 def end(self, rev):
923 def end(self, rev):
924 return self.start(rev) + self.length(rev)
924 return self.start(rev) + self.length(rev)
925
925
926 def parents(self, node):
926 def parents(self, node):
927 i = self.index
927 i = self.index
928 d = i[self.rev(node)]
928 d = i[self.rev(node)]
929 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
929 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
930
930
931 def chainlen(self, rev):
931 def chainlen(self, rev):
932 return self._chaininfo(rev)[0]
932 return self._chaininfo(rev)[0]
933
933
934 def _chaininfo(self, rev):
934 def _chaininfo(self, rev):
935 chaininfocache = self._chaininfocache
935 chaininfocache = self._chaininfocache
936 if rev in chaininfocache:
936 if rev in chaininfocache:
937 return chaininfocache[rev]
937 return chaininfocache[rev]
938 index = self.index
938 index = self.index
939 generaldelta = self._generaldelta
939 generaldelta = self._generaldelta
940 iterrev = rev
940 iterrev = rev
941 e = index[iterrev]
941 e = index[iterrev]
942 clen = 0
942 clen = 0
943 compresseddeltalen = 0
943 compresseddeltalen = 0
944 while iterrev != e[3]:
944 while iterrev != e[3]:
945 clen += 1
945 clen += 1
946 compresseddeltalen += e[1]
946 compresseddeltalen += e[1]
947 if generaldelta:
947 if generaldelta:
948 iterrev = e[3]
948 iterrev = e[3]
949 else:
949 else:
950 iterrev -= 1
950 iterrev -= 1
951 if iterrev in chaininfocache:
951 if iterrev in chaininfocache:
952 t = chaininfocache[iterrev]
952 t = chaininfocache[iterrev]
953 clen += t[0]
953 clen += t[0]
954 compresseddeltalen += t[1]
954 compresseddeltalen += t[1]
955 break
955 break
956 e = index[iterrev]
956 e = index[iterrev]
957 else:
957 else:
958 # Add text length of base since decompressing that also takes
958 # Add text length of base since decompressing that also takes
959 # work. For cache hits the length is already included.
959 # work. For cache hits the length is already included.
960 compresseddeltalen += e[1]
960 compresseddeltalen += e[1]
961 r = (clen, compresseddeltalen)
961 r = (clen, compresseddeltalen)
962 chaininfocache[rev] = r
962 chaininfocache[rev] = r
963 return r
963 return r
964
964
965 def _deltachain(self, rev, stoprev=None):
965 def _deltachain(self, rev, stoprev=None):
966 """Obtain the delta chain for a revision.
966 """Obtain the delta chain for a revision.
967
967
968 ``stoprev`` specifies a revision to stop at. If not specified, we
968 ``stoprev`` specifies a revision to stop at. If not specified, we
969 stop at the base of the chain.
969 stop at the base of the chain.
970
970
971 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
971 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
972 revs in ascending order and ``stopped`` is a bool indicating whether
972 revs in ascending order and ``stopped`` is a bool indicating whether
973 ``stoprev`` was hit.
973 ``stoprev`` was hit.
974 """
974 """
975 # Try C implementation.
975 # Try C implementation.
976 try:
976 try:
977 return self.index.deltachain(rev, stoprev, self._generaldelta)
977 return self.index.deltachain(rev, stoprev, self._generaldelta)
978 except AttributeError:
978 except AttributeError:
979 pass
979 pass
980
980
981 chain = []
981 chain = []
982
982
983 # Alias to prevent attribute lookup in tight loop.
983 # Alias to prevent attribute lookup in tight loop.
984 index = self.index
984 index = self.index
985 generaldelta = self._generaldelta
985 generaldelta = self._generaldelta
986
986
987 iterrev = rev
987 iterrev = rev
988 e = index[iterrev]
988 e = index[iterrev]
989 while iterrev != e[3] and iterrev != stoprev:
989 while iterrev != e[3] and iterrev != stoprev:
990 chain.append(iterrev)
990 chain.append(iterrev)
991 if generaldelta:
991 if generaldelta:
992 iterrev = e[3]
992 iterrev = e[3]
993 else:
993 else:
994 iterrev -= 1
994 iterrev -= 1
995 e = index[iterrev]
995 e = index[iterrev]
996
996
997 if iterrev == stoprev:
997 if iterrev == stoprev:
998 stopped = True
998 stopped = True
999 else:
999 else:
1000 chain.append(iterrev)
1000 chain.append(iterrev)
1001 stopped = False
1001 stopped = False
1002
1002
1003 chain.reverse()
1003 chain.reverse()
1004 return chain, stopped
1004 return chain, stopped
1005
1005
1006 def ancestors(self, revs, stoprev=0, inclusive=False):
1006 def ancestors(self, revs, stoprev=0, inclusive=False):
1007 """Generate the ancestors of 'revs' in reverse revision order.
1007 """Generate the ancestors of 'revs' in reverse revision order.
1008 Does not generate revs lower than stoprev.
1008 Does not generate revs lower than stoprev.
1009
1009
1010 See the documentation for ancestor.lazyancestors for more details."""
1010 See the documentation for ancestor.lazyancestors for more details."""
1011
1011
1012 # first, make sure start revisions aren't filtered
1012 # first, make sure start revisions aren't filtered
1013 revs = list(revs)
1013 revs = list(revs)
1014 checkrev = self.node
1014 checkrev = self.node
1015 for r in revs:
1015 for r in revs:
1016 checkrev(r)
1016 checkrev(r)
1017 # and we're sure ancestors aren't filtered as well
1017 # and we're sure ancestors aren't filtered as well
1018
1018
1019 if rustancestor is not None:
1019 if rustancestor is not None:
1020 lazyancestors = rustancestor.LazyAncestors
1020 lazyancestors = rustancestor.LazyAncestors
1021 arg = self.index
1021 arg = self.index
1022 else:
1022 else:
1023 lazyancestors = ancestor.lazyancestors
1023 lazyancestors = ancestor.lazyancestors
1024 arg = self._uncheckedparentrevs
1024 arg = self._uncheckedparentrevs
1025 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1025 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1026
1026
1027 def descendants(self, revs):
1027 def descendants(self, revs):
1028 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1028 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1029
1029
1030 def findcommonmissing(self, common=None, heads=None):
1030 def findcommonmissing(self, common=None, heads=None):
1031 """Return a tuple of the ancestors of common and the ancestors of heads
1031 """Return a tuple of the ancestors of common and the ancestors of heads
1032 that are not ancestors of common. In revset terminology, we return the
1032 that are not ancestors of common. In revset terminology, we return the
1033 tuple:
1033 tuple:
1034
1034
1035 ::common, (::heads) - (::common)
1035 ::common, (::heads) - (::common)
1036
1036
1037 The list is sorted by revision number, meaning it is
1037 The list is sorted by revision number, meaning it is
1038 topologically sorted.
1038 topologically sorted.
1039
1039
1040 'heads' and 'common' are both lists of node IDs. If heads is
1040 'heads' and 'common' are both lists of node IDs. If heads is
1041 not supplied, uses all of the revlog's heads. If common is not
1041 not supplied, uses all of the revlog's heads. If common is not
1042 supplied, uses nullid."""
1042 supplied, uses nullid."""
1043 if common is None:
1043 if common is None:
1044 common = [nullid]
1044 common = [nullid]
1045 if heads is None:
1045 if heads is None:
1046 heads = self.heads()
1046 heads = self.heads()
1047
1047
1048 common = [self.rev(n) for n in common]
1048 common = [self.rev(n) for n in common]
1049 heads = [self.rev(n) for n in heads]
1049 heads = [self.rev(n) for n in heads]
1050
1050
1051 # we want the ancestors, but inclusive
1051 # we want the ancestors, but inclusive
1052 class lazyset(object):
1052 class lazyset(object):
1053 def __init__(self, lazyvalues):
1053 def __init__(self, lazyvalues):
1054 self.addedvalues = set()
1054 self.addedvalues = set()
1055 self.lazyvalues = lazyvalues
1055 self.lazyvalues = lazyvalues
1056
1056
1057 def __contains__(self, value):
1057 def __contains__(self, value):
1058 return value in self.addedvalues or value in self.lazyvalues
1058 return value in self.addedvalues or value in self.lazyvalues
1059
1059
1060 def __iter__(self):
1060 def __iter__(self):
1061 added = self.addedvalues
1061 added = self.addedvalues
1062 for r in added:
1062 for r in added:
1063 yield r
1063 yield r
1064 for r in self.lazyvalues:
1064 for r in self.lazyvalues:
1065 if not r in added:
1065 if not r in added:
1066 yield r
1066 yield r
1067
1067
1068 def add(self, value):
1068 def add(self, value):
1069 self.addedvalues.add(value)
1069 self.addedvalues.add(value)
1070
1070
1071 def update(self, values):
1071 def update(self, values):
1072 self.addedvalues.update(values)
1072 self.addedvalues.update(values)
1073
1073
1074 has = lazyset(self.ancestors(common))
1074 has = lazyset(self.ancestors(common))
1075 has.add(nullrev)
1075 has.add(nullrev)
1076 has.update(common)
1076 has.update(common)
1077
1077
1078 # take all ancestors from heads that aren't in has
1078 # take all ancestors from heads that aren't in has
1079 missing = set()
1079 missing = set()
1080 visit = collections.deque(r for r in heads if r not in has)
1080 visit = collections.deque(r for r in heads if r not in has)
1081 while visit:
1081 while visit:
1082 r = visit.popleft()
1082 r = visit.popleft()
1083 if r in missing:
1083 if r in missing:
1084 continue
1084 continue
1085 else:
1085 else:
1086 missing.add(r)
1086 missing.add(r)
1087 for p in self.parentrevs(r):
1087 for p in self.parentrevs(r):
1088 if p not in has:
1088 if p not in has:
1089 visit.append(p)
1089 visit.append(p)
1090 missing = list(missing)
1090 missing = list(missing)
1091 missing.sort()
1091 missing.sort()
1092 return has, [self.node(miss) for miss in missing]
1092 return has, [self.node(miss) for miss in missing]
1093
1093
1094 def incrementalmissingrevs(self, common=None):
1094 def incrementalmissingrevs(self, common=None):
1095 """Return an object that can be used to incrementally compute the
1095 """Return an object that can be used to incrementally compute the
1096 revision numbers of the ancestors of arbitrary sets that are not
1096 revision numbers of the ancestors of arbitrary sets that are not
1097 ancestors of common. This is an ancestor.incrementalmissingancestors
1097 ancestors of common. This is an ancestor.incrementalmissingancestors
1098 object.
1098 object.
1099
1099
1100 'common' is a list of revision numbers. If common is not supplied, uses
1100 'common' is a list of revision numbers. If common is not supplied, uses
1101 nullrev.
1101 nullrev.
1102 """
1102 """
1103 if common is None:
1103 if common is None:
1104 common = [nullrev]
1104 common = [nullrev]
1105
1105
1106 if rustancestor is not None:
1106 if rustancestor is not None:
1107 return rustancestor.MissingAncestors(self.index, common)
1107 return rustancestor.MissingAncestors(self.index, common)
1108 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1108 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1109
1109
1110 def findmissingrevs(self, common=None, heads=None):
1110 def findmissingrevs(self, common=None, heads=None):
1111 """Return the revision numbers of the ancestors of heads that
1111 """Return the revision numbers of the ancestors of heads that
1112 are not ancestors of common.
1112 are not ancestors of common.
1113
1113
1114 More specifically, return a list of revision numbers corresponding to
1114 More specifically, return a list of revision numbers corresponding to
1115 nodes N such that every N satisfies the following constraints:
1115 nodes N such that every N satisfies the following constraints:
1116
1116
1117 1. N is an ancestor of some node in 'heads'
1117 1. N is an ancestor of some node in 'heads'
1118 2. N is not an ancestor of any node in 'common'
1118 2. N is not an ancestor of any node in 'common'
1119
1119
1120 The list is sorted by revision number, meaning it is
1120 The list is sorted by revision number, meaning it is
1121 topologically sorted.
1121 topologically sorted.
1122
1122
1123 'heads' and 'common' are both lists of revision numbers. If heads is
1123 'heads' and 'common' are both lists of revision numbers. If heads is
1124 not supplied, uses all of the revlog's heads. If common is not
1124 not supplied, uses all of the revlog's heads. If common is not
1125 supplied, uses nullid."""
1125 supplied, uses nullid."""
1126 if common is None:
1126 if common is None:
1127 common = [nullrev]
1127 common = [nullrev]
1128 if heads is None:
1128 if heads is None:
1129 heads = self.headrevs()
1129 heads = self.headrevs()
1130
1130
1131 inc = self.incrementalmissingrevs(common=common)
1131 inc = self.incrementalmissingrevs(common=common)
1132 return inc.missingancestors(heads)
1132 return inc.missingancestors(heads)
1133
1133
1134 def findmissing(self, common=None, heads=None):
1134 def findmissing(self, common=None, heads=None):
1135 """Return the ancestors of heads that are not ancestors of common.
1135 """Return the ancestors of heads that are not ancestors of common.
1136
1136
1137 More specifically, return a list of nodes N such that every N
1137 More specifically, return a list of nodes N such that every N
1138 satisfies the following constraints:
1138 satisfies the following constraints:
1139
1139
1140 1. N is an ancestor of some node in 'heads'
1140 1. N is an ancestor of some node in 'heads'
1141 2. N is not an ancestor of any node in 'common'
1141 2. N is not an ancestor of any node in 'common'
1142
1142
1143 The list is sorted by revision number, meaning it is
1143 The list is sorted by revision number, meaning it is
1144 topologically sorted.
1144 topologically sorted.
1145
1145
1146 'heads' and 'common' are both lists of node IDs. If heads is
1146 'heads' and 'common' are both lists of node IDs. If heads is
1147 not supplied, uses all of the revlog's heads. If common is not
1147 not supplied, uses all of the revlog's heads. If common is not
1148 supplied, uses nullid."""
1148 supplied, uses nullid."""
1149 if common is None:
1149 if common is None:
1150 common = [nullid]
1150 common = [nullid]
1151 if heads is None:
1151 if heads is None:
1152 heads = self.heads()
1152 heads = self.heads()
1153
1153
1154 common = [self.rev(n) for n in common]
1154 common = [self.rev(n) for n in common]
1155 heads = [self.rev(n) for n in heads]
1155 heads = [self.rev(n) for n in heads]
1156
1156
1157 inc = self.incrementalmissingrevs(common=common)
1157 inc = self.incrementalmissingrevs(common=common)
1158 return [self.node(r) for r in inc.missingancestors(heads)]
1158 return [self.node(r) for r in inc.missingancestors(heads)]
1159
1159
1160 def nodesbetween(self, roots=None, heads=None):
1160 def nodesbetween(self, roots=None, heads=None):
1161 """Return a topological path from 'roots' to 'heads'.
1161 """Return a topological path from 'roots' to 'heads'.
1162
1162
1163 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1163 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1164 topologically sorted list of all nodes N that satisfy both of
1164 topologically sorted list of all nodes N that satisfy both of
1165 these constraints:
1165 these constraints:
1166
1166
1167 1. N is a descendant of some node in 'roots'
1167 1. N is a descendant of some node in 'roots'
1168 2. N is an ancestor of some node in 'heads'
1168 2. N is an ancestor of some node in 'heads'
1169
1169
1170 Every node is considered to be both a descendant and an ancestor
1170 Every node is considered to be both a descendant and an ancestor
1171 of itself, so every reachable node in 'roots' and 'heads' will be
1171 of itself, so every reachable node in 'roots' and 'heads' will be
1172 included in 'nodes'.
1172 included in 'nodes'.
1173
1173
1174 'outroots' is the list of reachable nodes in 'roots', i.e., the
1174 'outroots' is the list of reachable nodes in 'roots', i.e., the
1175 subset of 'roots' that is returned in 'nodes'. Likewise,
1175 subset of 'roots' that is returned in 'nodes'. Likewise,
1176 'outheads' is the subset of 'heads' that is also in 'nodes'.
1176 'outheads' is the subset of 'heads' that is also in 'nodes'.
1177
1177
1178 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1178 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1179 unspecified, uses nullid as the only root. If 'heads' is
1179 unspecified, uses nullid as the only root. If 'heads' is
1180 unspecified, uses list of all of the revlog's heads."""
1180 unspecified, uses list of all of the revlog's heads."""
1181 nonodes = ([], [], [])
1181 nonodes = ([], [], [])
1182 if roots is not None:
1182 if roots is not None:
1183 roots = list(roots)
1183 roots = list(roots)
1184 if not roots:
1184 if not roots:
1185 return nonodes
1185 return nonodes
1186 lowestrev = min([self.rev(n) for n in roots])
1186 lowestrev = min([self.rev(n) for n in roots])
1187 else:
1187 else:
1188 roots = [nullid] # Everybody's a descendant of nullid
1188 roots = [nullid] # Everybody's a descendant of nullid
1189 lowestrev = nullrev
1189 lowestrev = nullrev
1190 if (lowestrev == nullrev) and (heads is None):
1190 if (lowestrev == nullrev) and (heads is None):
1191 # We want _all_ the nodes!
1191 # We want _all_ the nodes!
1192 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1192 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1193 if heads is None:
1193 if heads is None:
1194 # All nodes are ancestors, so the latest ancestor is the last
1194 # All nodes are ancestors, so the latest ancestor is the last
1195 # node.
1195 # node.
1196 highestrev = len(self) - 1
1196 highestrev = len(self) - 1
1197 # Set ancestors to None to signal that every node is an ancestor.
1197 # Set ancestors to None to signal that every node is an ancestor.
1198 ancestors = None
1198 ancestors = None
1199 # Set heads to an empty dictionary for later discovery of heads
1199 # Set heads to an empty dictionary for later discovery of heads
1200 heads = {}
1200 heads = {}
1201 else:
1201 else:
1202 heads = list(heads)
1202 heads = list(heads)
1203 if not heads:
1203 if not heads:
1204 return nonodes
1204 return nonodes
1205 ancestors = set()
1205 ancestors = set()
1206 # Turn heads into a dictionary so we can remove 'fake' heads.
1206 # Turn heads into a dictionary so we can remove 'fake' heads.
1207 # Also, later we will be using it to filter out the heads we can't
1207 # Also, later we will be using it to filter out the heads we can't
1208 # find from roots.
1208 # find from roots.
1209 heads = dict.fromkeys(heads, False)
1209 heads = dict.fromkeys(heads, False)
1210 # Start at the top and keep marking parents until we're done.
1210 # Start at the top and keep marking parents until we're done.
1211 nodestotag = set(heads)
1211 nodestotag = set(heads)
1212 # Remember where the top was so we can use it as a limit later.
1212 # Remember where the top was so we can use it as a limit later.
1213 highestrev = max([self.rev(n) for n in nodestotag])
1213 highestrev = max([self.rev(n) for n in nodestotag])
1214 while nodestotag:
1214 while nodestotag:
1215 # grab a node to tag
1215 # grab a node to tag
1216 n = nodestotag.pop()
1216 n = nodestotag.pop()
1217 # Never tag nullid
1217 # Never tag nullid
1218 if n == nullid:
1218 if n == nullid:
1219 continue
1219 continue
1220 # A node's revision number represents its place in a
1220 # A node's revision number represents its place in a
1221 # topologically sorted list of nodes.
1221 # topologically sorted list of nodes.
1222 r = self.rev(n)
1222 r = self.rev(n)
1223 if r >= lowestrev:
1223 if r >= lowestrev:
1224 if n not in ancestors:
1224 if n not in ancestors:
1225 # If we are possibly a descendant of one of the roots
1225 # If we are possibly a descendant of one of the roots
1226 # and we haven't already been marked as an ancestor
1226 # and we haven't already been marked as an ancestor
1227 ancestors.add(n) # Mark as ancestor
1227 ancestors.add(n) # Mark as ancestor
1228 # Add non-nullid parents to list of nodes to tag.
1228 # Add non-nullid parents to list of nodes to tag.
1229 nodestotag.update(
1229 nodestotag.update(
1230 [p for p in self.parents(n) if p != nullid]
1230 [p for p in self.parents(n) if p != nullid]
1231 )
1231 )
1232 elif n in heads: # We've seen it before, is it a fake head?
1232 elif n in heads: # We've seen it before, is it a fake head?
1233 # So it is, real heads should not be the ancestors of
1233 # So it is, real heads should not be the ancestors of
1234 # any other heads.
1234 # any other heads.
1235 heads.pop(n)
1235 heads.pop(n)
1236 if not ancestors:
1236 if not ancestors:
1237 return nonodes
1237 return nonodes
1238 # Now that we have our set of ancestors, we want to remove any
1238 # Now that we have our set of ancestors, we want to remove any
1239 # roots that are not ancestors.
1239 # roots that are not ancestors.
1240
1240
1241 # If one of the roots was nullid, everything is included anyway.
1241 # If one of the roots was nullid, everything is included anyway.
1242 if lowestrev > nullrev:
1242 if lowestrev > nullrev:
1243 # But, since we weren't, let's recompute the lowest rev to not
1243 # But, since we weren't, let's recompute the lowest rev to not
1244 # include roots that aren't ancestors.
1244 # include roots that aren't ancestors.
1245
1245
1246 # Filter out roots that aren't ancestors of heads
1246 # Filter out roots that aren't ancestors of heads
1247 roots = [root for root in roots if root in ancestors]
1247 roots = [root for root in roots if root in ancestors]
1248 # Recompute the lowest revision
1248 # Recompute the lowest revision
1249 if roots:
1249 if roots:
1250 lowestrev = min([self.rev(root) for root in roots])
1250 lowestrev = min([self.rev(root) for root in roots])
1251 else:
1251 else:
1252 # No more roots? Return empty list
1252 # No more roots? Return empty list
1253 return nonodes
1253 return nonodes
1254 else:
1254 else:
1255 # We are descending from nullid, and don't need to care about
1255 # We are descending from nullid, and don't need to care about
1256 # any other roots.
1256 # any other roots.
1257 lowestrev = nullrev
1257 lowestrev = nullrev
1258 roots = [nullid]
1258 roots = [nullid]
1259 # Transform our roots list into a set.
1259 # Transform our roots list into a set.
1260 descendants = set(roots)
1260 descendants = set(roots)
1261 # Also, keep the original roots so we can filter out roots that aren't
1261 # Also, keep the original roots so we can filter out roots that aren't
1262 # 'real' roots (i.e. are descended from other roots).
1262 # 'real' roots (i.e. are descended from other roots).
1263 roots = descendants.copy()
1263 roots = descendants.copy()
1264 # Our topologically sorted list of output nodes.
1264 # Our topologically sorted list of output nodes.
1265 orderedout = []
1265 orderedout = []
1266 # Don't start at nullid since we don't want nullid in our output list,
1266 # Don't start at nullid since we don't want nullid in our output list,
1267 # and if nullid shows up in descendants, empty parents will look like
1267 # and if nullid shows up in descendants, empty parents will look like
1268 # they're descendants.
1268 # they're descendants.
1269 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1269 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1270 n = self.node(r)
1270 n = self.node(r)
1271 isdescendant = False
1271 isdescendant = False
1272 if lowestrev == nullrev: # Everybody is a descendant of nullid
1272 if lowestrev == nullrev: # Everybody is a descendant of nullid
1273 isdescendant = True
1273 isdescendant = True
1274 elif n in descendants:
1274 elif n in descendants:
1275 # n is already a descendant
1275 # n is already a descendant
1276 isdescendant = True
1276 isdescendant = True
1277 # This check only needs to be done here because all the roots
1277 # This check only needs to be done here because all the roots
1278 # will start being marked is descendants before the loop.
1278 # will start being marked is descendants before the loop.
1279 if n in roots:
1279 if n in roots:
1280 # If n was a root, check if it's a 'real' root.
1280 # If n was a root, check if it's a 'real' root.
1281 p = tuple(self.parents(n))
1281 p = tuple(self.parents(n))
1282 # If any of its parents are descendants, it's not a root.
1282 # If any of its parents are descendants, it's not a root.
1283 if (p[0] in descendants) or (p[1] in descendants):
1283 if (p[0] in descendants) or (p[1] in descendants):
1284 roots.remove(n)
1284 roots.remove(n)
1285 else:
1285 else:
1286 p = tuple(self.parents(n))
1286 p = tuple(self.parents(n))
1287 # A node is a descendant if either of its parents are
1287 # A node is a descendant if either of its parents are
1288 # descendants. (We seeded the dependents list with the roots
1288 # descendants. (We seeded the dependents list with the roots
1289 # up there, remember?)
1289 # up there, remember?)
1290 if (p[0] in descendants) or (p[1] in descendants):
1290 if (p[0] in descendants) or (p[1] in descendants):
1291 descendants.add(n)
1291 descendants.add(n)
1292 isdescendant = True
1292 isdescendant = True
1293 if isdescendant and ((ancestors is None) or (n in ancestors)):
1293 if isdescendant and ((ancestors is None) or (n in ancestors)):
1294 # Only include nodes that are both descendants and ancestors.
1294 # Only include nodes that are both descendants and ancestors.
1295 orderedout.append(n)
1295 orderedout.append(n)
1296 if (ancestors is not None) and (n in heads):
1296 if (ancestors is not None) and (n in heads):
1297 # We're trying to figure out which heads are reachable
1297 # We're trying to figure out which heads are reachable
1298 # from roots.
1298 # from roots.
1299 # Mark this head as having been reached
1299 # Mark this head as having been reached
1300 heads[n] = True
1300 heads[n] = True
1301 elif ancestors is None:
1301 elif ancestors is None:
1302 # Otherwise, we're trying to discover the heads.
1302 # Otherwise, we're trying to discover the heads.
1303 # Assume this is a head because if it isn't, the next step
1303 # Assume this is a head because if it isn't, the next step
1304 # will eventually remove it.
1304 # will eventually remove it.
1305 heads[n] = True
1305 heads[n] = True
1306 # But, obviously its parents aren't.
1306 # But, obviously its parents aren't.
1307 for p in self.parents(n):
1307 for p in self.parents(n):
1308 heads.pop(p, None)
1308 heads.pop(p, None)
1309 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1309 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1310 roots = list(roots)
1310 roots = list(roots)
1311 assert orderedout
1311 assert orderedout
1312 assert roots
1312 assert roots
1313 assert heads
1313 assert heads
1314 return (orderedout, roots, heads)
1314 return (orderedout, roots, heads)
1315
1315
1316 def headrevs(self, revs=None):
1316 def headrevs(self, revs=None):
1317 if revs is None:
1317 if revs is None:
1318 try:
1318 try:
1319 return self.index.headrevs()
1319 return self.index.headrevs()
1320 except AttributeError:
1320 except AttributeError:
1321 return self._headrevs()
1321 return self._headrevs()
1322 if rustdagop is not None:
1322 if rustdagop is not None:
1323 return rustdagop.headrevs(self.index, revs)
1323 return rustdagop.headrevs(self.index, revs)
1324 return dagop.headrevs(revs, self._uncheckedparentrevs)
1324 return dagop.headrevs(revs, self._uncheckedparentrevs)
1325
1325
1326 def computephases(self, roots):
1326 def computephases(self, roots):
1327 return self.index.computephasesmapsets(roots)
1327 return self.index.computephasesmapsets(roots)
1328
1328
1329 def _headrevs(self):
1329 def _headrevs(self):
1330 count = len(self)
1330 count = len(self)
1331 if not count:
1331 if not count:
1332 return [nullrev]
1332 return [nullrev]
1333 # we won't iter over filtered rev so nobody is a head at start
1333 # we won't iter over filtered rev so nobody is a head at start
1334 ishead = [0] * (count + 1)
1334 ishead = [0] * (count + 1)
1335 index = self.index
1335 index = self.index
1336 for r in self:
1336 for r in self:
1337 ishead[r] = 1 # I may be an head
1337 ishead[r] = 1 # I may be an head
1338 e = index[r]
1338 e = index[r]
1339 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1339 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1340 return [r for r, val in enumerate(ishead) if val]
1340 return [r for r, val in enumerate(ishead) if val]
1341
1341
1342 def heads(self, start=None, stop=None):
1342 def heads(self, start=None, stop=None):
1343 """return the list of all nodes that have no children
1343 """return the list of all nodes that have no children
1344
1344
1345 if start is specified, only heads that are descendants of
1345 if start is specified, only heads that are descendants of
1346 start will be returned
1346 start will be returned
1347 if stop is specified, it will consider all the revs from stop
1347 if stop is specified, it will consider all the revs from stop
1348 as if they had no children
1348 as if they had no children
1349 """
1349 """
1350 if start is None and stop is None:
1350 if start is None and stop is None:
1351 if not len(self):
1351 if not len(self):
1352 return [nullid]
1352 return [nullid]
1353 return [self.node(r) for r in self.headrevs()]
1353 return [self.node(r) for r in self.headrevs()]
1354
1354
1355 if start is None:
1355 if start is None:
1356 start = nullrev
1356 start = nullrev
1357 else:
1357 else:
1358 start = self.rev(start)
1358 start = self.rev(start)
1359
1359
1360 stoprevs = {self.rev(n) for n in stop or []}
1360 stoprevs = {self.rev(n) for n in stop or []}
1361
1361
1362 revs = dagop.headrevssubset(
1362 revs = dagop.headrevssubset(
1363 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1363 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1364 )
1364 )
1365
1365
1366 return [self.node(rev) for rev in revs]
1366 return [self.node(rev) for rev in revs]
1367
1367
1368 def children(self, node):
1368 def children(self, node):
1369 """find the children of a given node"""
1369 """find the children of a given node"""
1370 c = []
1370 c = []
1371 p = self.rev(node)
1371 p = self.rev(node)
1372 for r in self.revs(start=p + 1):
1372 for r in self.revs(start=p + 1):
1373 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1373 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1374 if prevs:
1374 if prevs:
1375 for pr in prevs:
1375 for pr in prevs:
1376 if pr == p:
1376 if pr == p:
1377 c.append(self.node(r))
1377 c.append(self.node(r))
1378 elif p == nullrev:
1378 elif p == nullrev:
1379 c.append(self.node(r))
1379 c.append(self.node(r))
1380 return c
1380 return c
1381
1381
1382 def commonancestorsheads(self, a, b):
1382 def commonancestorsheads(self, a, b):
1383 """calculate all the heads of the common ancestors of nodes a and b"""
1383 """calculate all the heads of the common ancestors of nodes a and b"""
1384 a, b = self.rev(a), self.rev(b)
1384 a, b = self.rev(a), self.rev(b)
1385 ancs = self._commonancestorsheads(a, b)
1385 ancs = self._commonancestorsheads(a, b)
1386 return pycompat.maplist(self.node, ancs)
1386 return pycompat.maplist(self.node, ancs)
1387
1387
1388 def _commonancestorsheads(self, *revs):
1388 def _commonancestorsheads(self, *revs):
1389 """calculate all the heads of the common ancestors of revs"""
1389 """calculate all the heads of the common ancestors of revs"""
1390 try:
1390 try:
1391 ancs = self.index.commonancestorsheads(*revs)
1391 ancs = self.index.commonancestorsheads(*revs)
1392 except (AttributeError, OverflowError): # C implementation failed
1392 except (AttributeError, OverflowError): # C implementation failed
1393 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1393 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1394 return ancs
1394 return ancs
1395
1395
1396 def isancestor(self, a, b):
1396 def isancestor(self, a, b):
1397 """return True if node a is an ancestor of node b
1397 """return True if node a is an ancestor of node b
1398
1398
1399 A revision is considered an ancestor of itself."""
1399 A revision is considered an ancestor of itself."""
1400 a, b = self.rev(a), self.rev(b)
1400 a, b = self.rev(a), self.rev(b)
1401 return self.isancestorrev(a, b)
1401 return self.isancestorrev(a, b)
1402
1402
1403 def isancestorrev(self, a, b):
1403 def isancestorrev(self, a, b):
1404 """return True if revision a is an ancestor of revision b
1404 """return True if revision a is an ancestor of revision b
1405
1405
1406 A revision is considered an ancestor of itself.
1406 A revision is considered an ancestor of itself.
1407
1407
1408 The implementation of this is trivial but the use of
1408 The implementation of this is trivial but the use of
1409 reachableroots is not."""
1409 reachableroots is not."""
1410 if a == nullrev:
1410 if a == nullrev:
1411 return True
1411 return True
1412 elif a == b:
1412 elif a == b:
1413 return True
1413 return True
1414 elif a > b:
1414 elif a > b:
1415 return False
1415 return False
1416 return bool(self.reachableroots(a, [b], [a], includepath=False))
1416 return bool(self.reachableroots(a, [b], [a], includepath=False))
1417
1417
1418 def reachableroots(self, minroot, heads, roots, includepath=False):
1418 def reachableroots(self, minroot, heads, roots, includepath=False):
1419 """return (heads(::(<roots> and <roots>::<heads>)))
1419 """return (heads(::(<roots> and <roots>::<heads>)))
1420
1420
1421 If includepath is True, return (<roots>::<heads>)."""
1421 If includepath is True, return (<roots>::<heads>)."""
1422 try:
1422 try:
1423 return self.index.reachableroots2(
1423 return self.index.reachableroots2(
1424 minroot, heads, roots, includepath
1424 minroot, heads, roots, includepath
1425 )
1425 )
1426 except AttributeError:
1426 except AttributeError:
1427 return dagop._reachablerootspure(
1427 return dagop._reachablerootspure(
1428 self.parentrevs, minroot, roots, heads, includepath
1428 self.parentrevs, minroot, roots, heads, includepath
1429 )
1429 )
1430
1430
1431 def ancestor(self, a, b):
1431 def ancestor(self, a, b):
1432 """calculate the "best" common ancestor of nodes a and b"""
1432 """calculate the "best" common ancestor of nodes a and b"""
1433
1433
1434 a, b = self.rev(a), self.rev(b)
1434 a, b = self.rev(a), self.rev(b)
1435 try:
1435 try:
1436 ancs = self.index.ancestors(a, b)
1436 ancs = self.index.ancestors(a, b)
1437 except (AttributeError, OverflowError):
1437 except (AttributeError, OverflowError):
1438 ancs = ancestor.ancestors(self.parentrevs, a, b)
1438 ancs = ancestor.ancestors(self.parentrevs, a, b)
1439 if ancs:
1439 if ancs:
1440 # choose a consistent winner when there's a tie
1440 # choose a consistent winner when there's a tie
1441 return min(map(self.node, ancs))
1441 return min(map(self.node, ancs))
1442 return nullid
1442 return nullid
1443
1443
1444 def _match(self, id):
1444 def _match(self, id):
1445 if isinstance(id, int):
1445 if isinstance(id, int):
1446 # rev
1446 # rev
1447 return self.node(id)
1447 return self.node(id)
1448 if len(id) == 20:
1448 if len(id) == 20:
1449 # possibly a binary node
1449 # possibly a binary node
1450 # odds of a binary node being all hex in ASCII are 1 in 10**25
1450 # odds of a binary node being all hex in ASCII are 1 in 10**25
1451 try:
1451 try:
1452 node = id
1452 node = id
1453 self.rev(node) # quick search the index
1453 self.rev(node) # quick search the index
1454 return node
1454 return node
1455 except error.LookupError:
1455 except error.LookupError:
1456 pass # may be partial hex id
1456 pass # may be partial hex id
1457 try:
1457 try:
1458 # str(rev)
1458 # str(rev)
1459 rev = int(id)
1459 rev = int(id)
1460 if b"%d" % rev != id:
1460 if b"%d" % rev != id:
1461 raise ValueError
1461 raise ValueError
1462 if rev < 0:
1462 if rev < 0:
1463 rev = len(self) + rev
1463 rev = len(self) + rev
1464 if rev < 0 or rev >= len(self):
1464 if rev < 0 or rev >= len(self):
1465 raise ValueError
1465 raise ValueError
1466 return self.node(rev)
1466 return self.node(rev)
1467 except (ValueError, OverflowError):
1467 except (ValueError, OverflowError):
1468 pass
1468 pass
1469 if len(id) == 40:
1469 if len(id) == 40:
1470 try:
1470 try:
1471 # a full hex nodeid?
1471 # a full hex nodeid?
1472 node = bin(id)
1472 node = bin(id)
1473 self.rev(node)
1473 self.rev(node)
1474 return node
1474 return node
1475 except (TypeError, error.LookupError):
1475 except (TypeError, error.LookupError):
1476 pass
1476 pass
1477
1477
1478 def _partialmatch(self, id):
1478 def _partialmatch(self, id):
1479 # we don't care wdirfilenodeids as they should be always full hash
1479 # we don't care wdirfilenodeids as they should be always full hash
1480 maybewdir = wdirhex.startswith(id)
1480 maybewdir = wdirhex.startswith(id)
1481 try:
1481 try:
1482 partial = self.index.partialmatch(id)
1482 partial = self.index.partialmatch(id)
1483 if partial and self.hasnode(partial):
1483 if partial and self.hasnode(partial):
1484 if maybewdir:
1484 if maybewdir:
1485 # single 'ff...' match in radix tree, ambiguous with wdir
1485 # single 'ff...' match in radix tree, ambiguous with wdir
1486 raise error.RevlogError
1486 raise error.RevlogError
1487 return partial
1487 return partial
1488 if maybewdir:
1488 if maybewdir:
1489 # no 'ff...' match in radix tree, wdir identified
1489 # no 'ff...' match in radix tree, wdir identified
1490 raise error.WdirUnsupported
1490 raise error.WdirUnsupported
1491 return None
1491 return None
1492 except error.RevlogError:
1492 except error.RevlogError:
1493 # parsers.c radix tree lookup gave multiple matches
1493 # parsers.c radix tree lookup gave multiple matches
1494 # fast path: for unfiltered changelog, radix tree is accurate
1494 # fast path: for unfiltered changelog, radix tree is accurate
1495 if not getattr(self, 'filteredrevs', None):
1495 if not getattr(self, 'filteredrevs', None):
1496 raise error.AmbiguousPrefixLookupError(
1496 raise error.AmbiguousPrefixLookupError(
1497 id, self.indexfile, _(b'ambiguous identifier')
1497 id, self.indexfile, _(b'ambiguous identifier')
1498 )
1498 )
1499 # fall through to slow path that filters hidden revisions
1499 # fall through to slow path that filters hidden revisions
1500 except (AttributeError, ValueError):
1500 except (AttributeError, ValueError):
1501 # we are pure python, or key was too short to search radix tree
1501 # we are pure python, or key was too short to search radix tree
1502 pass
1502 pass
1503
1503
1504 if id in self._pcache:
1504 if id in self._pcache:
1505 return self._pcache[id]
1505 return self._pcache[id]
1506
1506
1507 if len(id) <= 40:
1507 if len(id) <= 40:
1508 try:
1508 try:
1509 # hex(node)[:...]
1509 # hex(node)[:...]
1510 l = len(id) // 2 # grab an even number of digits
1510 l = len(id) // 2 # grab an even number of digits
1511 prefix = bin(id[: l * 2])
1511 prefix = bin(id[: l * 2])
1512 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1512 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1513 nl = [
1513 nl = [
1514 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1514 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1515 ]
1515 ]
1516 if nullhex.startswith(id):
1516 if nullhex.startswith(id):
1517 nl.append(nullid)
1517 nl.append(nullid)
1518 if len(nl) > 0:
1518 if len(nl) > 0:
1519 if len(nl) == 1 and not maybewdir:
1519 if len(nl) == 1 and not maybewdir:
1520 self._pcache[id] = nl[0]
1520 self._pcache[id] = nl[0]
1521 return nl[0]
1521 return nl[0]
1522 raise error.AmbiguousPrefixLookupError(
1522 raise error.AmbiguousPrefixLookupError(
1523 id, self.indexfile, _(b'ambiguous identifier')
1523 id, self.indexfile, _(b'ambiguous identifier')
1524 )
1524 )
1525 if maybewdir:
1525 if maybewdir:
1526 raise error.WdirUnsupported
1526 raise error.WdirUnsupported
1527 return None
1527 return None
1528 except TypeError:
1528 except TypeError:
1529 pass
1529 pass
1530
1530
1531 def lookup(self, id):
1531 def lookup(self, id):
1532 """locate a node based on:
1532 """locate a node based on:
1533 - revision number or str(revision number)
1533 - revision number or str(revision number)
1534 - nodeid or subset of hex nodeid
1534 - nodeid or subset of hex nodeid
1535 """
1535 """
1536 n = self._match(id)
1536 n = self._match(id)
1537 if n is not None:
1537 if n is not None:
1538 return n
1538 return n
1539 n = self._partialmatch(id)
1539 n = self._partialmatch(id)
1540 if n:
1540 if n:
1541 return n
1541 return n
1542
1542
1543 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1543 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1544
1544
1545 def shortest(self, node, minlength=1):
1545 def shortest(self, node, minlength=1):
1546 """Find the shortest unambiguous prefix that matches node."""
1546 """Find the shortest unambiguous prefix that matches node."""
1547
1547
1548 def isvalid(prefix):
1548 def isvalid(prefix):
1549 try:
1549 try:
1550 matchednode = self._partialmatch(prefix)
1550 matchednode = self._partialmatch(prefix)
1551 except error.AmbiguousPrefixLookupError:
1551 except error.AmbiguousPrefixLookupError:
1552 return False
1552 return False
1553 except error.WdirUnsupported:
1553 except error.WdirUnsupported:
1554 # single 'ff...' match
1554 # single 'ff...' match
1555 return True
1555 return True
1556 if matchednode is None:
1556 if matchednode is None:
1557 raise error.LookupError(node, self.indexfile, _(b'no node'))
1557 raise error.LookupError(node, self.indexfile, _(b'no node'))
1558 return True
1558 return True
1559
1559
1560 def maybewdir(prefix):
1560 def maybewdir(prefix):
1561 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1561 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1562
1562
1563 hexnode = hex(node)
1563 hexnode = hex(node)
1564
1564
1565 def disambiguate(hexnode, minlength):
1565 def disambiguate(hexnode, minlength):
1566 """Disambiguate against wdirid."""
1566 """Disambiguate against wdirid."""
1567 for length in range(minlength, len(hexnode) + 1):
1567 for length in range(minlength, len(hexnode) + 1):
1568 prefix = hexnode[:length]
1568 prefix = hexnode[:length]
1569 if not maybewdir(prefix):
1569 if not maybewdir(prefix):
1570 return prefix
1570 return prefix
1571
1571
1572 if not getattr(self, 'filteredrevs', None):
1572 if not getattr(self, 'filteredrevs', None):
1573 try:
1573 try:
1574 length = max(self.index.shortest(node), minlength)
1574 length = max(self.index.shortest(node), minlength)
1575 return disambiguate(hexnode, length)
1575 return disambiguate(hexnode, length)
1576 except error.RevlogError:
1576 except error.RevlogError:
1577 if node != wdirid:
1577 if node != wdirid:
1578 raise error.LookupError(node, self.indexfile, _(b'no node'))
1578 raise error.LookupError(node, self.indexfile, _(b'no node'))
1579 except AttributeError:
1579 except AttributeError:
1580 # Fall through to pure code
1580 # Fall through to pure code
1581 pass
1581 pass
1582
1582
1583 if node == wdirid:
1583 if node == wdirid:
1584 for length in range(minlength, len(hexnode) + 1):
1584 for length in range(minlength, len(hexnode) + 1):
1585 prefix = hexnode[:length]
1585 prefix = hexnode[:length]
1586 if isvalid(prefix):
1586 if isvalid(prefix):
1587 return prefix
1587 return prefix
1588
1588
1589 for length in range(minlength, len(hexnode) + 1):
1589 for length in range(minlength, len(hexnode) + 1):
1590 prefix = hexnode[:length]
1590 prefix = hexnode[:length]
1591 if isvalid(prefix):
1591 if isvalid(prefix):
1592 return disambiguate(hexnode, length)
1592 return disambiguate(hexnode, length)
1593
1593
1594 def cmp(self, node, text):
1594 def cmp(self, node, text):
1595 """compare text with a given file revision
1595 """compare text with a given file revision
1596
1596
1597 returns True if text is different than what is stored.
1597 returns True if text is different than what is stored.
1598 """
1598 """
1599 p1, p2 = self.parents(node)
1599 p1, p2 = self.parents(node)
1600 return storageutil.hashrevisionsha1(text, p1, p2) != node
1600 return storageutil.hashrevisionsha1(text, p1, p2) != node
1601
1601
1602 def _cachesegment(self, offset, data):
1602 def _cachesegment(self, offset, data):
1603 """Add a segment to the revlog cache.
1603 """Add a segment to the revlog cache.
1604
1604
1605 Accepts an absolute offset and the data that is at that location.
1605 Accepts an absolute offset and the data that is at that location.
1606 """
1606 """
1607 o, d = self._chunkcache
1607 o, d = self._chunkcache
1608 # try to add to existing cache
1608 # try to add to existing cache
1609 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1609 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1610 self._chunkcache = o, d + data
1610 self._chunkcache = o, d + data
1611 else:
1611 else:
1612 self._chunkcache = offset, data
1612 self._chunkcache = offset, data
1613
1613
1614 def _readsegment(self, offset, length, df=None):
1614 def _readsegment(self, offset, length, df=None):
1615 """Load a segment of raw data from the revlog.
1615 """Load a segment of raw data from the revlog.
1616
1616
1617 Accepts an absolute offset, length to read, and an optional existing
1617 Accepts an absolute offset, length to read, and an optional existing
1618 file handle to read from.
1618 file handle to read from.
1619
1619
1620 If an existing file handle is passed, it will be seeked and the
1620 If an existing file handle is passed, it will be seeked and the
1621 original seek position will NOT be restored.
1621 original seek position will NOT be restored.
1622
1622
1623 Returns a str or buffer of raw byte data.
1623 Returns a str or buffer of raw byte data.
1624
1624
1625 Raises if the requested number of bytes could not be read.
1625 Raises if the requested number of bytes could not be read.
1626 """
1626 """
1627 # Cache data both forward and backward around the requested
1627 # Cache data both forward and backward around the requested
1628 # data, in a fixed size window. This helps speed up operations
1628 # data, in a fixed size window. This helps speed up operations
1629 # involving reading the revlog backwards.
1629 # involving reading the revlog backwards.
1630 cachesize = self._chunkcachesize
1630 cachesize = self._chunkcachesize
1631 realoffset = offset & ~(cachesize - 1)
1631 realoffset = offset & ~(cachesize - 1)
1632 reallength = (
1632 reallength = (
1633 (offset + length + cachesize) & ~(cachesize - 1)
1633 (offset + length + cachesize) & ~(cachesize - 1)
1634 ) - realoffset
1634 ) - realoffset
1635 with self._datareadfp(df) as df:
1635 with self._datareadfp(df) as df:
1636 df.seek(realoffset)
1636 df.seek(realoffset)
1637 d = df.read(reallength)
1637 d = df.read(reallength)
1638
1638
1639 self._cachesegment(realoffset, d)
1639 self._cachesegment(realoffset, d)
1640 if offset != realoffset or reallength != length:
1640 if offset != realoffset or reallength != length:
1641 startoffset = offset - realoffset
1641 startoffset = offset - realoffset
1642 if len(d) - startoffset < length:
1642 if len(d) - startoffset < length:
1643 raise error.RevlogError(
1643 raise error.RevlogError(
1644 _(
1644 _(
1645 b'partial read of revlog %s; expected %d bytes from '
1645 b'partial read of revlog %s; expected %d bytes from '
1646 b'offset %d, got %d'
1646 b'offset %d, got %d'
1647 )
1647 )
1648 % (
1648 % (
1649 self.indexfile if self._inline else self.datafile,
1649 self.indexfile if self._inline else self.datafile,
1650 length,
1650 length,
1651 realoffset,
1651 realoffset,
1652 len(d) - startoffset,
1652 len(d) - startoffset,
1653 )
1653 )
1654 )
1654 )
1655
1655
1656 return util.buffer(d, startoffset, length)
1656 return util.buffer(d, startoffset, length)
1657
1657
1658 if len(d) < length:
1658 if len(d) < length:
1659 raise error.RevlogError(
1659 raise error.RevlogError(
1660 _(
1660 _(
1661 b'partial read of revlog %s; expected %d bytes from offset '
1661 b'partial read of revlog %s; expected %d bytes from offset '
1662 b'%d, got %d'
1662 b'%d, got %d'
1663 )
1663 )
1664 % (
1664 % (
1665 self.indexfile if self._inline else self.datafile,
1665 self.indexfile if self._inline else self.datafile,
1666 length,
1666 length,
1667 offset,
1667 offset,
1668 len(d),
1668 len(d),
1669 )
1669 )
1670 )
1670 )
1671
1671
1672 return d
1672 return d
1673
1673
1674 def _getsegment(self, offset, length, df=None):
1674 def _getsegment(self, offset, length, df=None):
1675 """Obtain a segment of raw data from the revlog.
1675 """Obtain a segment of raw data from the revlog.
1676
1676
1677 Accepts an absolute offset, length of bytes to obtain, and an
1677 Accepts an absolute offset, length of bytes to obtain, and an
1678 optional file handle to the already-opened revlog. If the file
1678 optional file handle to the already-opened revlog. If the file
1679 handle is used, it's original seek position will not be preserved.
1679 handle is used, it's original seek position will not be preserved.
1680
1680
1681 Requests for data may be returned from a cache.
1681 Requests for data may be returned from a cache.
1682
1682
1683 Returns a str or a buffer instance of raw byte data.
1683 Returns a str or a buffer instance of raw byte data.
1684 """
1684 """
1685 o, d = self._chunkcache
1685 o, d = self._chunkcache
1686 l = len(d)
1686 l = len(d)
1687
1687
1688 # is it in the cache?
1688 # is it in the cache?
1689 cachestart = offset - o
1689 cachestart = offset - o
1690 cacheend = cachestart + length
1690 cacheend = cachestart + length
1691 if cachestart >= 0 and cacheend <= l:
1691 if cachestart >= 0 and cacheend <= l:
1692 if cachestart == 0 and cacheend == l:
1692 if cachestart == 0 and cacheend == l:
1693 return d # avoid a copy
1693 return d # avoid a copy
1694 return util.buffer(d, cachestart, cacheend - cachestart)
1694 return util.buffer(d, cachestart, cacheend - cachestart)
1695
1695
1696 return self._readsegment(offset, length, df=df)
1696 return self._readsegment(offset, length, df=df)
1697
1697
1698 def _getsegmentforrevs(self, startrev, endrev, df=None):
1698 def _getsegmentforrevs(self, startrev, endrev, df=None):
1699 """Obtain a segment of raw data corresponding to a range of revisions.
1699 """Obtain a segment of raw data corresponding to a range of revisions.
1700
1700
1701 Accepts the start and end revisions and an optional already-open
1701 Accepts the start and end revisions and an optional already-open
1702 file handle to be used for reading. If the file handle is read, its
1702 file handle to be used for reading. If the file handle is read, its
1703 seek position will not be preserved.
1703 seek position will not be preserved.
1704
1704
1705 Requests for data may be satisfied by a cache.
1705 Requests for data may be satisfied by a cache.
1706
1706
1707 Returns a 2-tuple of (offset, data) for the requested range of
1707 Returns a 2-tuple of (offset, data) for the requested range of
1708 revisions. Offset is the integer offset from the beginning of the
1708 revisions. Offset is the integer offset from the beginning of the
1709 revlog and data is a str or buffer of the raw byte data.
1709 revlog and data is a str or buffer of the raw byte data.
1710
1710
1711 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1711 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1712 to determine where each revision's data begins and ends.
1712 to determine where each revision's data begins and ends.
1713 """
1713 """
1714 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1714 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1715 # (functions are expensive).
1715 # (functions are expensive).
1716 index = self.index
1716 index = self.index
1717 istart = index[startrev]
1717 istart = index[startrev]
1718 start = int(istart[0] >> 16)
1718 start = int(istart[0] >> 16)
1719 if startrev == endrev:
1719 if startrev == endrev:
1720 end = start + istart[1]
1720 end = start + istart[1]
1721 else:
1721 else:
1722 iend = index[endrev]
1722 iend = index[endrev]
1723 end = int(iend[0] >> 16) + iend[1]
1723 end = int(iend[0] >> 16) + iend[1]
1724
1724
1725 if self._inline:
1725 if self._inline:
1726 start += (startrev + 1) * self._io.size
1726 start += (startrev + 1) * self._io.size
1727 end += (endrev + 1) * self._io.size
1727 end += (endrev + 1) * self._io.size
1728 length = end - start
1728 length = end - start
1729
1729
1730 return start, self._getsegment(start, length, df=df)
1730 return start, self._getsegment(start, length, df=df)
1731
1731
1732 def _chunk(self, rev, df=None):
1732 def _chunk(self, rev, df=None):
1733 """Obtain a single decompressed chunk for a revision.
1733 """Obtain a single decompressed chunk for a revision.
1734
1734
1735 Accepts an integer revision and an optional already-open file handle
1735 Accepts an integer revision and an optional already-open file handle
1736 to be used for reading. If used, the seek position of the file will not
1736 to be used for reading. If used, the seek position of the file will not
1737 be preserved.
1737 be preserved.
1738
1738
1739 Returns a str holding uncompressed data for the requested revision.
1739 Returns a str holding uncompressed data for the requested revision.
1740 """
1740 """
1741 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1741 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1742
1742
1743 def _chunks(self, revs, df=None, targetsize=None):
1743 def _chunks(self, revs, df=None, targetsize=None):
1744 """Obtain decompressed chunks for the specified revisions.
1744 """Obtain decompressed chunks for the specified revisions.
1745
1745
1746 Accepts an iterable of numeric revisions that are assumed to be in
1746 Accepts an iterable of numeric revisions that are assumed to be in
1747 ascending order. Also accepts an optional already-open file handle
1747 ascending order. Also accepts an optional already-open file handle
1748 to be used for reading. If used, the seek position of the file will
1748 to be used for reading. If used, the seek position of the file will
1749 not be preserved.
1749 not be preserved.
1750
1750
1751 This function is similar to calling ``self._chunk()`` multiple times,
1751 This function is similar to calling ``self._chunk()`` multiple times,
1752 but is faster.
1752 but is faster.
1753
1753
1754 Returns a list with decompressed data for each requested revision.
1754 Returns a list with decompressed data for each requested revision.
1755 """
1755 """
1756 if not revs:
1756 if not revs:
1757 return []
1757 return []
1758 start = self.start
1758 start = self.start
1759 length = self.length
1759 length = self.length
1760 inline = self._inline
1760 inline = self._inline
1761 iosize = self._io.size
1761 iosize = self._io.size
1762 buffer = util.buffer
1762 buffer = util.buffer
1763
1763
1764 l = []
1764 l = []
1765 ladd = l.append
1765 ladd = l.append
1766
1766
1767 if not self._withsparseread:
1767 if not self._withsparseread:
1768 slicedchunks = (revs,)
1768 slicedchunks = (revs,)
1769 else:
1769 else:
1770 slicedchunks = deltautil.slicechunk(
1770 slicedchunks = deltautil.slicechunk(
1771 self, revs, targetsize=targetsize
1771 self, revs, targetsize=targetsize
1772 )
1772 )
1773
1773
1774 for revschunk in slicedchunks:
1774 for revschunk in slicedchunks:
1775 firstrev = revschunk[0]
1775 firstrev = revschunk[0]
1776 # Skip trailing revisions with empty diff
1776 # Skip trailing revisions with empty diff
1777 for lastrev in revschunk[::-1]:
1777 for lastrev in revschunk[::-1]:
1778 if length(lastrev) != 0:
1778 if length(lastrev) != 0:
1779 break
1779 break
1780
1780
1781 try:
1781 try:
1782 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1782 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1783 except OverflowError:
1783 except OverflowError:
1784 # issue4215 - we can't cache a run of chunks greater than
1784 # issue4215 - we can't cache a run of chunks greater than
1785 # 2G on Windows
1785 # 2G on Windows
1786 return [self._chunk(rev, df=df) for rev in revschunk]
1786 return [self._chunk(rev, df=df) for rev in revschunk]
1787
1787
1788 decomp = self.decompress
1788 decomp = self.decompress
1789 for rev in revschunk:
1789 for rev in revschunk:
1790 chunkstart = start(rev)
1790 chunkstart = start(rev)
1791 if inline:
1791 if inline:
1792 chunkstart += (rev + 1) * iosize
1792 chunkstart += (rev + 1) * iosize
1793 chunklength = length(rev)
1793 chunklength = length(rev)
1794 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1794 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1795
1795
1796 return l
1796 return l
1797
1797
1798 def _chunkclear(self):
1798 def _chunkclear(self):
1799 """Clear the raw chunk cache."""
1799 """Clear the raw chunk cache."""
1800 self._chunkcache = (0, b'')
1800 self._chunkcache = (0, b'')
1801
1801
1802 def deltaparent(self, rev):
1802 def deltaparent(self, rev):
1803 """return deltaparent of the given revision"""
1803 """return deltaparent of the given revision"""
1804 base = self.index[rev][3]
1804 base = self.index[rev][3]
1805 if base == rev:
1805 if base == rev:
1806 return nullrev
1806 return nullrev
1807 elif self._generaldelta:
1807 elif self._generaldelta:
1808 return base
1808 return base
1809 else:
1809 else:
1810 return rev - 1
1810 return rev - 1
1811
1811
1812 def issnapshot(self, rev):
1812 def issnapshot(self, rev):
1813 """tells whether rev is a snapshot"""
1813 """tells whether rev is a snapshot"""
1814 if not self._sparserevlog:
1814 if not self._sparserevlog:
1815 return self.deltaparent(rev) == nullrev
1815 return self.deltaparent(rev) == nullrev
1816 elif util.safehasattr(self.index, b'issnapshot'):
1816 elif util.safehasattr(self.index, b'issnapshot'):
1817 # directly assign the method to cache the testing and access
1817 # directly assign the method to cache the testing and access
1818 self.issnapshot = self.index.issnapshot
1818 self.issnapshot = self.index.issnapshot
1819 return self.issnapshot(rev)
1819 return self.issnapshot(rev)
1820 if rev == nullrev:
1820 if rev == nullrev:
1821 return True
1821 return True
1822 entry = self.index[rev]
1822 entry = self.index[rev]
1823 base = entry[3]
1823 base = entry[3]
1824 if base == rev:
1824 if base == rev:
1825 return True
1825 return True
1826 if base == nullrev:
1826 if base == nullrev:
1827 return True
1827 return True
1828 p1 = entry[5]
1828 p1 = entry[5]
1829 p2 = entry[6]
1829 p2 = entry[6]
1830 if base == p1 or base == p2:
1830 if base == p1 or base == p2:
1831 return False
1831 return False
1832 return self.issnapshot(base)
1832 return self.issnapshot(base)
1833
1833
1834 def snapshotdepth(self, rev):
1834 def snapshotdepth(self, rev):
1835 """number of snapshot in the chain before this one"""
1835 """number of snapshot in the chain before this one"""
1836 if not self.issnapshot(rev):
1836 if not self.issnapshot(rev):
1837 raise error.ProgrammingError(b'revision %d not a snapshot')
1837 raise error.ProgrammingError(b'revision %d not a snapshot')
1838 return len(self._deltachain(rev)[0]) - 1
1838 return len(self._deltachain(rev)[0]) - 1
1839
1839
1840 def revdiff(self, rev1, rev2):
1840 def revdiff(self, rev1, rev2):
1841 """return or calculate a delta between two revisions
1841 """return or calculate a delta between two revisions
1842
1842
1843 The delta calculated is in binary form and is intended to be written to
1843 The delta calculated is in binary form and is intended to be written to
1844 revlog data directly. So this function needs raw revision data.
1844 revlog data directly. So this function needs raw revision data.
1845 """
1845 """
1846 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1846 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1847 return bytes(self._chunk(rev2))
1847 return bytes(self._chunk(rev2))
1848
1848
1849 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1849 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1850
1850
1851 def _processflags(self, text, flags, operation, raw=False):
1851 def _processflags(self, text, flags, operation, raw=False):
1852 """deprecated entry point to access flag processors"""
1852 """deprecated entry point to access flag processors"""
1853 msg = b'_processflag(...) use the specialized variant'
1853 msg = b'_processflag(...) use the specialized variant'
1854 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1854 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1855 if raw:
1855 if raw:
1856 return text, flagutil.processflagsraw(self, text, flags)
1856 return text, flagutil.processflagsraw(self, text, flags)
1857 elif operation == b'read':
1857 elif operation == b'read':
1858 return flagutil.processflagsread(self, text, flags)
1858 return flagutil.processflagsread(self, text, flags)
1859 else: # write operation
1859 else: # write operation
1860 return flagutil.processflagswrite(self, text, flags)
1860 return flagutil.processflagswrite(self, text, flags)
1861
1861
1862 def revision(self, nodeorrev, _df=None, raw=False):
1862 def revision(self, nodeorrev, _df=None, raw=False):
1863 """return an uncompressed revision of a given node or revision
1863 """return an uncompressed revision of a given node or revision
1864 number.
1864 number.
1865
1865
1866 _df - an existing file handle to read from. (internal-only)
1866 _df - an existing file handle to read from. (internal-only)
1867 raw - an optional argument specifying if the revision data is to be
1867 raw - an optional argument specifying if the revision data is to be
1868 treated as raw data when applying flag transforms. 'raw' should be set
1868 treated as raw data when applying flag transforms. 'raw' should be set
1869 to True when generating changegroups or in debug commands.
1869 to True when generating changegroups or in debug commands.
1870 """
1870 """
1871 if raw:
1871 if raw:
1872 msg = (
1872 msg = (
1873 b'revlog.revision(..., raw=True) is deprecated, '
1873 b'revlog.revision(..., raw=True) is deprecated, '
1874 b'use revlog.rawdata(...)'
1874 b'use revlog.rawdata(...)'
1875 )
1875 )
1876 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1876 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1877 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1877 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1878
1878
1879 def sidedata(self, nodeorrev, _df=None):
1879 def sidedata(self, nodeorrev, _df=None):
1880 """a map of extra data related to the changeset but not part of the hash
1880 """a map of extra data related to the changeset but not part of the hash
1881
1881
1882 This function currently return a dictionary. However, more advanced
1882 This function currently return a dictionary. However, more advanced
1883 mapping object will likely be used in the future for a more
1883 mapping object will likely be used in the future for a more
1884 efficient/lazy code.
1884 efficient/lazy code.
1885 """
1885 """
1886 return self._revisiondata(nodeorrev, _df)[1]
1886 return self._revisiondata(nodeorrev, _df)[1]
1887
1887
1888 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1888 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1889 # deal with <nodeorrev> argument type
1889 # deal with <nodeorrev> argument type
1890 if isinstance(nodeorrev, int):
1890 if isinstance(nodeorrev, int):
1891 rev = nodeorrev
1891 rev = nodeorrev
1892 node = self.node(rev)
1892 node = self.node(rev)
1893 else:
1893 else:
1894 node = nodeorrev
1894 node = nodeorrev
1895 rev = None
1895 rev = None
1896
1896
1897 # fast path the special `nullid` rev
1897 # fast path the special `nullid` rev
1898 if node == nullid:
1898 if node == nullid:
1899 return b"", {}
1899 return b"", {}
1900
1900
1901 # ``rawtext`` is the text as stored inside the revlog. Might be the
1901 # ``rawtext`` is the text as stored inside the revlog. Might be the
1902 # revision or might need to be processed to retrieve the revision.
1902 # revision or might need to be processed to retrieve the revision.
1903 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1903 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1904
1904
1905 if self.version & 0xFFFF == REVLOGV2:
1905 if self.version & 0xFFFF == REVLOGV2:
1906 if rev is None:
1906 if rev is None:
1907 rev = self.rev(node)
1907 rev = self.rev(node)
1908 sidedata = self._sidedata(rev)
1908 sidedata = self._sidedata(rev)
1909 else:
1909 else:
1910 sidedata = {}
1910 sidedata = {}
1911
1911
1912 if raw and validated:
1912 if raw and validated:
1913 # if we don't want to process the raw text and that raw
1913 # if we don't want to process the raw text and that raw
1914 # text is cached, we can exit early.
1914 # text is cached, we can exit early.
1915 return rawtext, sidedata
1915 return rawtext, sidedata
1916 if rev is None:
1916 if rev is None:
1917 rev = self.rev(node)
1917 rev = self.rev(node)
1918 # the revlog's flag for this revision
1918 # the revlog's flag for this revision
1919 # (usually alter its state or content)
1919 # (usually alter its state or content)
1920 flags = self.flags(rev)
1920 flags = self.flags(rev)
1921
1921
1922 if validated and flags == REVIDX_DEFAULT_FLAGS:
1922 if validated and flags == REVIDX_DEFAULT_FLAGS:
1923 # no extra flags set, no flag processor runs, text = rawtext
1923 # no extra flags set, no flag processor runs, text = rawtext
1924 return rawtext, sidedata
1924 return rawtext, sidedata
1925
1925
1926 if raw:
1926 if raw:
1927 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1927 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1928 text = rawtext
1928 text = rawtext
1929 else:
1929 else:
1930 r = flagutil.processflagsread(self, rawtext, flags)
1930 r = flagutil.processflagsread(self, rawtext, flags)
1931 text, validatehash = r
1931 text, validatehash = r
1932 if validatehash:
1932 if validatehash:
1933 self.checkhash(text, node, rev=rev)
1933 self.checkhash(text, node, rev=rev)
1934 if not validated:
1934 if not validated:
1935 self._revisioncache = (node, rev, rawtext)
1935 self._revisioncache = (node, rev, rawtext)
1936
1936
1937 return text, sidedata
1937 return text, sidedata
1938
1938
1939 def _rawtext(self, node, rev, _df=None):
1939 def _rawtext(self, node, rev, _df=None):
1940 """return the possibly unvalidated rawtext for a revision
1940 """return the possibly unvalidated rawtext for a revision
1941
1941
1942 returns (rev, rawtext, validated)
1942 returns (rev, rawtext, validated)
1943 """
1943 """
1944
1944
1945 # revision in the cache (could be useful to apply delta)
1945 # revision in the cache (could be useful to apply delta)
1946 cachedrev = None
1946 cachedrev = None
1947 # An intermediate text to apply deltas to
1947 # An intermediate text to apply deltas to
1948 basetext = None
1948 basetext = None
1949
1949
1950 # Check if we have the entry in cache
1950 # Check if we have the entry in cache
1951 # The cache entry looks like (node, rev, rawtext)
1951 # The cache entry looks like (node, rev, rawtext)
1952 if self._revisioncache:
1952 if self._revisioncache:
1953 if self._revisioncache[0] == node:
1953 if self._revisioncache[0] == node:
1954 return (rev, self._revisioncache[2], True)
1954 return (rev, self._revisioncache[2], True)
1955 cachedrev = self._revisioncache[1]
1955 cachedrev = self._revisioncache[1]
1956
1956
1957 if rev is None:
1957 if rev is None:
1958 rev = self.rev(node)
1958 rev = self.rev(node)
1959
1959
1960 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1960 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1961 if stopped:
1961 if stopped:
1962 basetext = self._revisioncache[2]
1962 basetext = self._revisioncache[2]
1963
1963
1964 # drop cache to save memory, the caller is expected to
1964 # drop cache to save memory, the caller is expected to
1965 # update self._revisioncache after validating the text
1965 # update self._revisioncache after validating the text
1966 self._revisioncache = None
1966 self._revisioncache = None
1967
1967
1968 targetsize = None
1968 targetsize = None
1969 rawsize = self.index[rev][2]
1969 rawsize = self.index[rev][2]
1970 if 0 <= rawsize:
1970 if 0 <= rawsize:
1971 targetsize = 4 * rawsize
1971 targetsize = 4 * rawsize
1972
1972
1973 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1973 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1974 if basetext is None:
1974 if basetext is None:
1975 basetext = bytes(bins[0])
1975 basetext = bytes(bins[0])
1976 bins = bins[1:]
1976 bins = bins[1:]
1977
1977
1978 rawtext = mdiff.patches(basetext, bins)
1978 rawtext = mdiff.patches(basetext, bins)
1979 del basetext # let us have a chance to free memory early
1979 del basetext # let us have a chance to free memory early
1980 return (rev, rawtext, False)
1980 return (rev, rawtext, False)
1981
1981
1982 def _sidedata(self, rev):
1982 def _sidedata(self, rev):
1983 """Return the sidedata for a given revision number."""
1983 """Return the sidedata for a given revision number."""
1984 index_entry = self.index[rev]
1984 index_entry = self.index[rev]
1985 sidedata_offset = index_entry[8]
1985 sidedata_offset = index_entry[8]
1986 sidedata_size = index_entry[9]
1986 sidedata_size = index_entry[9]
1987
1987
1988 if self._inline:
1988 if self._inline:
1989 sidedata_offset += self._io.size * (1 + rev)
1989 sidedata_offset += self._io.size * (1 + rev)
1990 if sidedata_size == 0:
1990 if sidedata_size == 0:
1991 return {}
1991 return {}
1992
1992
1993 segment = self._getsegment(sidedata_offset, sidedata_size)
1993 segment = self._getsegment(sidedata_offset, sidedata_size)
1994 sidedata = sidedatautil.deserialize_sidedata(segment)
1994 sidedata = sidedatautil.deserialize_sidedata(segment)
1995 return sidedata
1995 return sidedata
1996
1996
1997 def rawdata(self, nodeorrev, _df=None):
1997 def rawdata(self, nodeorrev, _df=None):
1998 """return an uncompressed raw data of a given node or revision number.
1998 """return an uncompressed raw data of a given node or revision number.
1999
1999
2000 _df - an existing file handle to read from. (internal-only)
2000 _df - an existing file handle to read from. (internal-only)
2001 """
2001 """
2002 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2002 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2003
2003
2004 def hash(self, text, p1, p2):
2004 def hash(self, text, p1, p2):
2005 """Compute a node hash.
2005 """Compute a node hash.
2006
2006
2007 Available as a function so that subclasses can replace the hash
2007 Available as a function so that subclasses can replace the hash
2008 as needed.
2008 as needed.
2009 """
2009 """
2010 return storageutil.hashrevisionsha1(text, p1, p2)
2010 return storageutil.hashrevisionsha1(text, p1, p2)
2011
2011
2012 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2012 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2013 """Check node hash integrity.
2013 """Check node hash integrity.
2014
2014
2015 Available as a function so that subclasses can extend hash mismatch
2015 Available as a function so that subclasses can extend hash mismatch
2016 behaviors as needed.
2016 behaviors as needed.
2017 """
2017 """
2018 try:
2018 try:
2019 if p1 is None and p2 is None:
2019 if p1 is None and p2 is None:
2020 p1, p2 = self.parents(node)
2020 p1, p2 = self.parents(node)
2021 if node != self.hash(text, p1, p2):
2021 if node != self.hash(text, p1, p2):
2022 # Clear the revision cache on hash failure. The revision cache
2022 # Clear the revision cache on hash failure. The revision cache
2023 # only stores the raw revision and clearing the cache does have
2023 # only stores the raw revision and clearing the cache does have
2024 # the side-effect that we won't have a cache hit when the raw
2024 # the side-effect that we won't have a cache hit when the raw
2025 # revision data is accessed. But this case should be rare and
2025 # revision data is accessed. But this case should be rare and
2026 # it is extra work to teach the cache about the hash
2026 # it is extra work to teach the cache about the hash
2027 # verification state.
2027 # verification state.
2028 if self._revisioncache and self._revisioncache[0] == node:
2028 if self._revisioncache and self._revisioncache[0] == node:
2029 self._revisioncache = None
2029 self._revisioncache = None
2030
2030
2031 revornode = rev
2031 revornode = rev
2032 if revornode is None:
2032 if revornode is None:
2033 revornode = templatefilters.short(hex(node))
2033 revornode = templatefilters.short(hex(node))
2034 raise error.RevlogError(
2034 raise error.RevlogError(
2035 _(b"integrity check failed on %s:%s")
2035 _(b"integrity check failed on %s:%s")
2036 % (self.indexfile, pycompat.bytestr(revornode))
2036 % (self.indexfile, pycompat.bytestr(revornode))
2037 )
2037 )
2038 except error.RevlogError:
2038 except error.RevlogError:
2039 if self._censorable and storageutil.iscensoredtext(text):
2039 if self._censorable and storageutil.iscensoredtext(text):
2040 raise error.CensoredNodeError(self.indexfile, node, text)
2040 raise error.CensoredNodeError(self.indexfile, node, text)
2041 raise
2041 raise
2042
2042
2043 def _enforceinlinesize(self, tr, fp=None):
2043 def _enforceinlinesize(self, tr, fp=None):
2044 """Check if the revlog is too big for inline and convert if so.
2044 """Check if the revlog is too big for inline and convert if so.
2045
2045
2046 This should be called after revisions are added to the revlog. If the
2046 This should be called after revisions are added to the revlog. If the
2047 revlog has grown too large to be an inline revlog, it will convert it
2047 revlog has grown too large to be an inline revlog, it will convert it
2048 to use multiple index and data files.
2048 to use multiple index and data files.
2049 """
2049 """
2050 tiprev = len(self) - 1
2050 tiprev = len(self) - 1
2051 if (
2051 if (
2052 not self._inline
2052 not self._inline
2053 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2053 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2054 ):
2054 ):
2055 return
2055 return
2056
2056
2057 troffset = tr.findoffset(self.indexfile)
2057 troffset = tr.findoffset(self.indexfile)
2058 if troffset is None:
2058 if troffset is None:
2059 raise error.RevlogError(
2059 raise error.RevlogError(
2060 _(b"%s not found in the transaction") % self.indexfile
2060 _(b"%s not found in the transaction") % self.indexfile
2061 )
2061 )
2062 trindex = 0
2062 trindex = 0
2063 tr.add(self.datafile, 0)
2063 tr.add(self.datafile, 0)
2064
2064
2065 if fp:
2065 if fp:
2066 fp.flush()
2066 fp.flush()
2067 fp.close()
2067 fp.close()
2068 # We can't use the cached file handle after close(). So prevent
2068 # We can't use the cached file handle after close(). So prevent
2069 # its usage.
2069 # its usage.
2070 self._writinghandles = None
2070 self._writinghandles = None
2071
2071
2072 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2072 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2073 for r in self:
2073 for r in self:
2074 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2074 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2075 if troffset <= self.start(r):
2075 if troffset <= self.start(r):
2076 trindex = r
2076 trindex = r
2077
2077
2078 with self._indexfp(b'w') as fp:
2078 with self._indexfp(b'w') as fp:
2079 self.version &= ~FLAG_INLINE_DATA
2079 self.version &= ~FLAG_INLINE_DATA
2080 self._inline = False
2080 self._inline = False
2081 io = self._io
2081 io = self._io
2082 for i in self:
2082 for i in self:
2083 e = io.packentry(self.index[i], self.node, self.version, i)
2083 e = io.packentry(self.index[i], self.node, self.version, i)
2084 fp.write(e)
2084 fp.write(e)
2085
2085
2086 # the temp file replace the real index when we exit the context
2086 # the temp file replace the real index when we exit the context
2087 # manager
2087 # manager
2088
2088
2089 tr.replace(self.indexfile, trindex * self._io.size)
2089 tr.replace(self.indexfile, trindex * self._io.size)
2090 nodemaputil.setup_persistent_nodemap(tr, self)
2090 nodemaputil.setup_persistent_nodemap(tr, self)
2091 self._chunkclear()
2091 self._chunkclear()
2092
2092
2093 def _nodeduplicatecallback(self, transaction, node):
2093 def _nodeduplicatecallback(self, transaction, node):
2094 """called when trying to add a node already stored."""
2094 """called when trying to add a node already stored."""
2095
2095
2096 def addrevision(
2096 def addrevision(
2097 self,
2097 self,
2098 text,
2098 text,
2099 transaction,
2099 transaction,
2100 link,
2100 link,
2101 p1,
2101 p1,
2102 p2,
2102 p2,
2103 cachedelta=None,
2103 cachedelta=None,
2104 node=None,
2104 node=None,
2105 flags=REVIDX_DEFAULT_FLAGS,
2105 flags=REVIDX_DEFAULT_FLAGS,
2106 deltacomputer=None,
2106 deltacomputer=None,
2107 sidedata=None,
2107 sidedata=None,
2108 ):
2108 ):
2109 """add a revision to the log
2109 """add a revision to the log
2110
2110
2111 text - the revision data to add
2111 text - the revision data to add
2112 transaction - the transaction object used for rollback
2112 transaction - the transaction object used for rollback
2113 link - the linkrev data to add
2113 link - the linkrev data to add
2114 p1, p2 - the parent nodeids of the revision
2114 p1, p2 - the parent nodeids of the revision
2115 cachedelta - an optional precomputed delta
2115 cachedelta - an optional precomputed delta
2116 node - nodeid of revision; typically node is not specified, and it is
2116 node - nodeid of revision; typically node is not specified, and it is
2117 computed by default as hash(text, p1, p2), however subclasses might
2117 computed by default as hash(text, p1, p2), however subclasses might
2118 use different hashing method (and override checkhash() in such case)
2118 use different hashing method (and override checkhash() in such case)
2119 flags - the known flags to set on the revision
2119 flags - the known flags to set on the revision
2120 deltacomputer - an optional deltacomputer instance shared between
2120 deltacomputer - an optional deltacomputer instance shared between
2121 multiple calls
2121 multiple calls
2122 """
2122 """
2123 if link == nullrev:
2123 if link == nullrev:
2124 raise error.RevlogError(
2124 raise error.RevlogError(
2125 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2125 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2126 )
2126 )
2127
2127
2128 if sidedata is None:
2128 if sidedata is None:
2129 sidedata = {}
2129 sidedata = {}
2130 elif not self.hassidedata:
2130 elif not self.hassidedata:
2131 raise error.ProgrammingError(
2131 raise error.ProgrammingError(
2132 _(b"trying to add sidedata to a revlog who don't support them")
2132 _(b"trying to add sidedata to a revlog who don't support them")
2133 )
2133 )
2134
2134
2135 if flags:
2135 if flags:
2136 node = node or self.hash(text, p1, p2)
2136 node = node or self.hash(text, p1, p2)
2137
2137
2138 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2138 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2139
2139
2140 # If the flag processor modifies the revision data, ignore any provided
2140 # If the flag processor modifies the revision data, ignore any provided
2141 # cachedelta.
2141 # cachedelta.
2142 if rawtext != text:
2142 if rawtext != text:
2143 cachedelta = None
2143 cachedelta = None
2144
2144
2145 if len(rawtext) > _maxentrysize:
2145 if len(rawtext) > _maxentrysize:
2146 raise error.RevlogError(
2146 raise error.RevlogError(
2147 _(
2147 _(
2148 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2148 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2149 )
2149 )
2150 % (self.indexfile, len(rawtext))
2150 % (self.indexfile, len(rawtext))
2151 )
2151 )
2152
2152
2153 node = node or self.hash(rawtext, p1, p2)
2153 node = node or self.hash(rawtext, p1, p2)
2154 rev = self.index.get_rev(node)
2154 rev = self.index.get_rev(node)
2155 if rev is not None:
2155 if rev is not None:
2156 return rev
2156 return rev
2157
2157
2158 if validatehash:
2158 if validatehash:
2159 self.checkhash(rawtext, node, p1=p1, p2=p2)
2159 self.checkhash(rawtext, node, p1=p1, p2=p2)
2160
2160
2161 return self.addrawrevision(
2161 return self.addrawrevision(
2162 rawtext,
2162 rawtext,
2163 transaction,
2163 transaction,
2164 link,
2164 link,
2165 p1,
2165 p1,
2166 p2,
2166 p2,
2167 node,
2167 node,
2168 flags,
2168 flags,
2169 cachedelta=cachedelta,
2169 cachedelta=cachedelta,
2170 deltacomputer=deltacomputer,
2170 deltacomputer=deltacomputer,
2171 sidedata=sidedata,
2171 sidedata=sidedata,
2172 )
2172 )
2173
2173
2174 def addrawrevision(
2174 def addrawrevision(
2175 self,
2175 self,
2176 rawtext,
2176 rawtext,
2177 transaction,
2177 transaction,
2178 link,
2178 link,
2179 p1,
2179 p1,
2180 p2,
2180 p2,
2181 node,
2181 node,
2182 flags,
2182 flags,
2183 cachedelta=None,
2183 cachedelta=None,
2184 deltacomputer=None,
2184 deltacomputer=None,
2185 sidedata=None,
2185 sidedata=None,
2186 ):
2186 ):
2187 """add a raw revision with known flags, node and parents
2187 """add a raw revision with known flags, node and parents
2188 useful when reusing a revision not stored in this revlog (ex: received
2188 useful when reusing a revision not stored in this revlog (ex: received
2189 over wire, or read from an external bundle).
2189 over wire, or read from an external bundle).
2190 """
2190 """
2191 dfh = None
2191 dfh = None
2192 if not self._inline:
2192 if not self._inline:
2193 dfh = self._datafp(b"a+")
2193 dfh = self._datafp(b"a+")
2194 ifh = self._indexfp(b"a+")
2194 ifh = self._indexfp(b"a+")
2195 try:
2195 try:
2196 return self._addrevision(
2196 return self._addrevision(
2197 node,
2197 node,
2198 rawtext,
2198 rawtext,
2199 transaction,
2199 transaction,
2200 link,
2200 link,
2201 p1,
2201 p1,
2202 p2,
2202 p2,
2203 flags,
2203 flags,
2204 cachedelta,
2204 cachedelta,
2205 ifh,
2205 ifh,
2206 dfh,
2206 dfh,
2207 deltacomputer=deltacomputer,
2207 deltacomputer=deltacomputer,
2208 sidedata=sidedata,
2208 sidedata=sidedata,
2209 )
2209 )
2210 finally:
2210 finally:
2211 if dfh:
2211 if dfh:
2212 dfh.close()
2212 dfh.close()
2213 ifh.close()
2213 ifh.close()
2214
2214
2215 def compress(self, data):
2215 def compress(self, data):
2216 """Generate a possibly-compressed representation of data."""
2216 """Generate a possibly-compressed representation of data."""
2217 if not data:
2217 if not data:
2218 return b'', data
2218 return b'', data
2219
2219
2220 compressed = self._compressor.compress(data)
2220 compressed = self._compressor.compress(data)
2221
2221
2222 if compressed:
2222 if compressed:
2223 # The revlog compressor added the header in the returned data.
2223 # The revlog compressor added the header in the returned data.
2224 return b'', compressed
2224 return b'', compressed
2225
2225
2226 if data[0:1] == b'\0':
2226 if data[0:1] == b'\0':
2227 return b'', data
2227 return b'', data
2228 return b'u', data
2228 return b'u', data
2229
2229
2230 def decompress(self, data):
2230 def decompress(self, data):
2231 """Decompress a revlog chunk.
2231 """Decompress a revlog chunk.
2232
2232
2233 The chunk is expected to begin with a header identifying the
2233 The chunk is expected to begin with a header identifying the
2234 format type so it can be routed to an appropriate decompressor.
2234 format type so it can be routed to an appropriate decompressor.
2235 """
2235 """
2236 if not data:
2236 if not data:
2237 return data
2237 return data
2238
2238
2239 # Revlogs are read much more frequently than they are written and many
2239 # Revlogs are read much more frequently than they are written and many
2240 # chunks only take microseconds to decompress, so performance is
2240 # chunks only take microseconds to decompress, so performance is
2241 # important here.
2241 # important here.
2242 #
2242 #
2243 # We can make a few assumptions about revlogs:
2243 # We can make a few assumptions about revlogs:
2244 #
2244 #
2245 # 1) the majority of chunks will be compressed (as opposed to inline
2245 # 1) the majority of chunks will be compressed (as opposed to inline
2246 # raw data).
2246 # raw data).
2247 # 2) decompressing *any* data will likely by at least 10x slower than
2247 # 2) decompressing *any* data will likely by at least 10x slower than
2248 # returning raw inline data.
2248 # returning raw inline data.
2249 # 3) we want to prioritize common and officially supported compression
2249 # 3) we want to prioritize common and officially supported compression
2250 # engines
2250 # engines
2251 #
2251 #
2252 # It follows that we want to optimize for "decompress compressed data
2252 # It follows that we want to optimize for "decompress compressed data
2253 # when encoded with common and officially supported compression engines"
2253 # when encoded with common and officially supported compression engines"
2254 # case over "raw data" and "data encoded by less common or non-official
2254 # case over "raw data" and "data encoded by less common or non-official
2255 # compression engines." That is why we have the inline lookup first
2255 # compression engines." That is why we have the inline lookup first
2256 # followed by the compengines lookup.
2256 # followed by the compengines lookup.
2257 #
2257 #
2258 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2258 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2259 # compressed chunks. And this matters for changelog and manifest reads.
2259 # compressed chunks. And this matters for changelog and manifest reads.
2260 t = data[0:1]
2260 t = data[0:1]
2261
2261
2262 if t == b'x':
2262 if t == b'x':
2263 try:
2263 try:
2264 return _zlibdecompress(data)
2264 return _zlibdecompress(data)
2265 except zlib.error as e:
2265 except zlib.error as e:
2266 raise error.RevlogError(
2266 raise error.RevlogError(
2267 _(b'revlog decompress error: %s')
2267 _(b'revlog decompress error: %s')
2268 % stringutil.forcebytestr(e)
2268 % stringutil.forcebytestr(e)
2269 )
2269 )
2270 # '\0' is more common than 'u' so it goes first.
2270 # '\0' is more common than 'u' so it goes first.
2271 elif t == b'\0':
2271 elif t == b'\0':
2272 return data
2272 return data
2273 elif t == b'u':
2273 elif t == b'u':
2274 return util.buffer(data, 1)
2274 return util.buffer(data, 1)
2275
2275
2276 try:
2276 try:
2277 compressor = self._decompressors[t]
2277 compressor = self._decompressors[t]
2278 except KeyError:
2278 except KeyError:
2279 try:
2279 try:
2280 engine = util.compengines.forrevlogheader(t)
2280 engine = util.compengines.forrevlogheader(t)
2281 compressor = engine.revlogcompressor(self._compengineopts)
2281 compressor = engine.revlogcompressor(self._compengineopts)
2282 self._decompressors[t] = compressor
2282 self._decompressors[t] = compressor
2283 except KeyError:
2283 except KeyError:
2284 raise error.RevlogError(_(b'unknown compression type %r') % t)
2284 raise error.RevlogError(_(b'unknown compression type %r') % t)
2285
2285
2286 return compressor.decompress(data)
2286 return compressor.decompress(data)
2287
2287
2288 def _addrevision(
2288 def _addrevision(
2289 self,
2289 self,
2290 node,
2290 node,
2291 rawtext,
2291 rawtext,
2292 transaction,
2292 transaction,
2293 link,
2293 link,
2294 p1,
2294 p1,
2295 p2,
2295 p2,
2296 flags,
2296 flags,
2297 cachedelta,
2297 cachedelta,
2298 ifh,
2298 ifh,
2299 dfh,
2299 dfh,
2300 alwayscache=False,
2300 alwayscache=False,
2301 deltacomputer=None,
2301 deltacomputer=None,
2302 sidedata=None,
2302 sidedata=None,
2303 ):
2303 ):
2304 """internal function to add revisions to the log
2304 """internal function to add revisions to the log
2305
2305
2306 see addrevision for argument descriptions.
2306 see addrevision for argument descriptions.
2307
2307
2308 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2308 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2309
2309
2310 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2310 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2311 be used.
2311 be used.
2312
2312
2313 invariants:
2313 invariants:
2314 - rawtext is optional (can be None); if not set, cachedelta must be set.
2314 - rawtext is optional (can be None); if not set, cachedelta must be set.
2315 if both are set, they must correspond to each other.
2315 if both are set, they must correspond to each other.
2316 """
2316 """
2317 if node == nullid:
2317 if node == nullid:
2318 raise error.RevlogError(
2318 raise error.RevlogError(
2319 _(b"%s: attempt to add null revision") % self.indexfile
2319 _(b"%s: attempt to add null revision") % self.indexfile
2320 )
2320 )
2321 if node == wdirid or node in wdirfilenodeids:
2321 if node == wdirid or node in wdirfilenodeids:
2322 raise error.RevlogError(
2322 raise error.RevlogError(
2323 _(b"%s: attempt to add wdir revision") % self.indexfile
2323 _(b"%s: attempt to add wdir revision") % self.indexfile
2324 )
2324 )
2325
2325
2326 if self._inline:
2326 if self._inline:
2327 fh = ifh
2327 fh = ifh
2328 else:
2328 else:
2329 fh = dfh
2329 fh = dfh
2330
2330
2331 btext = [rawtext]
2331 btext = [rawtext]
2332
2332
2333 curr = len(self)
2333 curr = len(self)
2334 prev = curr - 1
2334 prev = curr - 1
2335
2335
2336 offset = self._get_data_offset(prev)
2336 offset = self._get_data_offset(prev)
2337
2337
2338 if self._concurrencychecker:
2338 if self._concurrencychecker:
2339 if self._inline:
2339 if self._inline:
2340 # offset is "as if" it were in the .d file, so we need to add on
2340 # offset is "as if" it were in the .d file, so we need to add on
2341 # the size of the entry metadata.
2341 # the size of the entry metadata.
2342 self._concurrencychecker(
2342 self._concurrencychecker(
2343 ifh, self.indexfile, offset + curr * self._io.size
2343 ifh, self.indexfile, offset + curr * self._io.size
2344 )
2344 )
2345 else:
2345 else:
2346 # Entries in the .i are a consistent size.
2346 # Entries in the .i are a consistent size.
2347 self._concurrencychecker(
2347 self._concurrencychecker(
2348 ifh, self.indexfile, curr * self._io.size
2348 ifh, self.indexfile, curr * self._io.size
2349 )
2349 )
2350 self._concurrencychecker(dfh, self.datafile, offset)
2350 self._concurrencychecker(dfh, self.datafile, offset)
2351
2351
2352 p1r, p2r = self.rev(p1), self.rev(p2)
2352 p1r, p2r = self.rev(p1), self.rev(p2)
2353
2353
2354 # full versions are inserted when the needed deltas
2354 # full versions are inserted when the needed deltas
2355 # become comparable to the uncompressed text
2355 # become comparable to the uncompressed text
2356 if rawtext is None:
2356 if rawtext is None:
2357 # need rawtext size, before changed by flag processors, which is
2357 # need rawtext size, before changed by flag processors, which is
2358 # the non-raw size. use revlog explicitly to avoid filelog's extra
2358 # the non-raw size. use revlog explicitly to avoid filelog's extra
2359 # logic that might remove metadata size.
2359 # logic that might remove metadata size.
2360 textlen = mdiff.patchedsize(
2360 textlen = mdiff.patchedsize(
2361 revlog.size(self, cachedelta[0]), cachedelta[1]
2361 revlog.size(self, cachedelta[0]), cachedelta[1]
2362 )
2362 )
2363 else:
2363 else:
2364 textlen = len(rawtext)
2364 textlen = len(rawtext)
2365
2365
2366 if deltacomputer is None:
2366 if deltacomputer is None:
2367 deltacomputer = deltautil.deltacomputer(self)
2367 deltacomputer = deltautil.deltacomputer(self)
2368
2368
2369 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2369 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2370
2370
2371 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2371 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2372
2372
2373 if sidedata:
2373 if sidedata:
2374 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2374 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2375 sidedata_offset = offset + deltainfo.deltalen
2375 sidedata_offset = offset + deltainfo.deltalen
2376 else:
2376 else:
2377 serialized_sidedata = b""
2377 serialized_sidedata = b""
2378 # Don't store the offset if the sidedata is empty, that way
2378 # Don't store the offset if the sidedata is empty, that way
2379 # we can easily detect empty sidedata and they will be no different
2379 # we can easily detect empty sidedata and they will be no different
2380 # than ones we manually add.
2380 # than ones we manually add.
2381 sidedata_offset = 0
2381 sidedata_offset = 0
2382
2382
2383 e = (
2383 e = (
2384 offset_type(offset, flags),
2384 offset_type(offset, flags),
2385 deltainfo.deltalen,
2385 deltainfo.deltalen,
2386 textlen,
2386 textlen,
2387 deltainfo.base,
2387 deltainfo.base,
2388 link,
2388 link,
2389 p1r,
2389 p1r,
2390 p2r,
2390 p2r,
2391 node,
2391 node,
2392 sidedata_offset,
2392 sidedata_offset,
2393 len(serialized_sidedata),
2393 len(serialized_sidedata),
2394 )
2394 )
2395
2395
2396 if self.version & 0xFFFF != REVLOGV2:
2396 if self.version & 0xFFFF != REVLOGV2:
2397 e = e[:8]
2397 e = e[:8]
2398
2398
2399 self.index.append(e)
2399 self.index.append(e)
2400 entry = self._io.packentry(e, self.node, self.version, curr)
2400 entry = self._io.packentry(e, self.node, self.version, curr)
2401 self._writeentry(
2401 self._writeentry(
2402 transaction,
2402 transaction,
2403 ifh,
2403 ifh,
2404 dfh,
2404 dfh,
2405 entry,
2405 entry,
2406 deltainfo.data,
2406 deltainfo.data,
2407 link,
2407 link,
2408 offset,
2408 offset,
2409 serialized_sidedata,
2409 serialized_sidedata,
2410 )
2410 )
2411
2411
2412 rawtext = btext[0]
2412 rawtext = btext[0]
2413
2413
2414 if alwayscache and rawtext is None:
2414 if alwayscache and rawtext is None:
2415 rawtext = deltacomputer.buildtext(revinfo, fh)
2415 rawtext = deltacomputer.buildtext(revinfo, fh)
2416
2416
2417 if type(rawtext) == bytes: # only accept immutable objects
2417 if type(rawtext) == bytes: # only accept immutable objects
2418 self._revisioncache = (node, curr, rawtext)
2418 self._revisioncache = (node, curr, rawtext)
2419 self._chainbasecache[curr] = deltainfo.chainbase
2419 self._chainbasecache[curr] = deltainfo.chainbase
2420 return curr
2420 return curr
2421
2421
2422 def _get_data_offset(self, prev):
2422 def _get_data_offset(self, prev):
2423 """Returns the current offset in the (in-transaction) data file.
2423 """Returns the current offset in the (in-transaction) data file.
2424 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2424 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2425 file to store that information: since sidedata can be rewritten to the
2425 file to store that information: since sidedata can be rewritten to the
2426 end of the data file within a transaction, you can have cases where, for
2426 end of the data file within a transaction, you can have cases where, for
2427 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2427 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2428 to `n - 1`'s sidedata being written after `n`'s data.
2428 to `n - 1`'s sidedata being written after `n`'s data.
2429
2429
2430 TODO cache this in a docket file before getting out of experimental."""
2430 TODO cache this in a docket file before getting out of experimental."""
2431 if self.version & 0xFFFF != REVLOGV2:
2431 if self.version & 0xFFFF != REVLOGV2:
2432 return self.end(prev)
2432 return self.end(prev)
2433
2433
2434 offset = 0
2434 offset = 0
2435 for rev, entry in enumerate(self.index):
2435 for rev, entry in enumerate(self.index):
2436 sidedata_end = entry[8] + entry[9]
2436 sidedata_end = entry[8] + entry[9]
2437 # Sidedata for a previous rev has potentially been written after
2437 # Sidedata for a previous rev has potentially been written after
2438 # this rev's end, so take the max.
2438 # this rev's end, so take the max.
2439 offset = max(self.end(rev), offset, sidedata_end)
2439 offset = max(self.end(rev), offset, sidedata_end)
2440 return offset
2440 return offset
2441
2441
2442 def _writeentry(
2442 def _writeentry(
2443 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2443 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2444 ):
2444 ):
2445 # Files opened in a+ mode have inconsistent behavior on various
2445 # Files opened in a+ mode have inconsistent behavior on various
2446 # platforms. Windows requires that a file positioning call be made
2446 # platforms. Windows requires that a file positioning call be made
2447 # when the file handle transitions between reads and writes. See
2447 # when the file handle transitions between reads and writes. See
2448 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2448 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2449 # platforms, Python or the platform itself can be buggy. Some versions
2449 # platforms, Python or the platform itself can be buggy. Some versions
2450 # of Solaris have been observed to not append at the end of the file
2450 # of Solaris have been observed to not append at the end of the file
2451 # if the file was seeked to before the end. See issue4943 for more.
2451 # if the file was seeked to before the end. See issue4943 for more.
2452 #
2452 #
2453 # We work around this issue by inserting a seek() before writing.
2453 # We work around this issue by inserting a seek() before writing.
2454 # Note: This is likely not necessary on Python 3. However, because
2454 # Note: This is likely not necessary on Python 3. However, because
2455 # the file handle is reused for reads and may be seeked there, we need
2455 # the file handle is reused for reads and may be seeked there, we need
2456 # to be careful before changing this.
2456 # to be careful before changing this.
2457 ifh.seek(0, os.SEEK_END)
2457 ifh.seek(0, os.SEEK_END)
2458 if dfh:
2458 if dfh:
2459 dfh.seek(0, os.SEEK_END)
2459 dfh.seek(0, os.SEEK_END)
2460
2460
2461 curr = len(self) - 1
2461 curr = len(self) - 1
2462 if not self._inline:
2462 if not self._inline:
2463 transaction.add(self.datafile, offset)
2463 transaction.add(self.datafile, offset)
2464 transaction.add(self.indexfile, curr * len(entry))
2464 transaction.add(self.indexfile, curr * len(entry))
2465 if data[0]:
2465 if data[0]:
2466 dfh.write(data[0])
2466 dfh.write(data[0])
2467 dfh.write(data[1])
2467 dfh.write(data[1])
2468 if sidedata:
2468 if sidedata:
2469 dfh.write(sidedata)
2469 dfh.write(sidedata)
2470 ifh.write(entry)
2470 ifh.write(entry)
2471 else:
2471 else:
2472 offset += curr * self._io.size
2472 offset += curr * self._io.size
2473 transaction.add(self.indexfile, offset)
2473 transaction.add(self.indexfile, offset)
2474 ifh.write(entry)
2474 ifh.write(entry)
2475 ifh.write(data[0])
2475 ifh.write(data[0])
2476 ifh.write(data[1])
2476 ifh.write(data[1])
2477 if sidedata:
2477 if sidedata:
2478 ifh.write(sidedata)
2478 ifh.write(sidedata)
2479 self._enforceinlinesize(transaction, ifh)
2479 self._enforceinlinesize(transaction, ifh)
2480 nodemaputil.setup_persistent_nodemap(transaction, self)
2480 nodemaputil.setup_persistent_nodemap(transaction, self)
2481
2481
2482 def addgroup(
2482 def addgroup(
2483 self,
2483 self,
2484 deltas,
2484 deltas,
2485 linkmapper,
2485 linkmapper,
2486 transaction,
2486 transaction,
2487 alwayscache=False,
2487 alwayscache=False,
2488 addrevisioncb=None,
2488 addrevisioncb=None,
2489 duplicaterevisioncb=None,
2489 duplicaterevisioncb=None,
2490 ):
2490 ):
2491 """
2491 """
2492 add a delta group
2492 add a delta group
2493
2493
2494 given a set of deltas, add them to the revision log. the
2494 given a set of deltas, add them to the revision log. the
2495 first delta is against its parent, which should be in our
2495 first delta is against its parent, which should be in our
2496 log, the rest are against the previous delta.
2496 log, the rest are against the previous delta.
2497
2497
2498 If ``addrevisioncb`` is defined, it will be called with arguments of
2498 If ``addrevisioncb`` is defined, it will be called with arguments of
2499 this revlog and the node that was added.
2499 this revlog and the node that was added.
2500 """
2500 """
2501
2501
2502 if self._writinghandles:
2502 if self._writinghandles:
2503 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2503 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2504
2504
2505 r = len(self)
2505 r = len(self)
2506 end = 0
2506 end = 0
2507 if r:
2507 if r:
2508 end = self.end(r - 1)
2508 end = self.end(r - 1)
2509 ifh = self._indexfp(b"a+")
2509 ifh = self._indexfp(b"a+")
2510 isize = r * self._io.size
2510 isize = r * self._io.size
2511 if self._inline:
2511 if self._inline:
2512 transaction.add(self.indexfile, end + isize)
2512 transaction.add(self.indexfile, end + isize)
2513 dfh = None
2513 dfh = None
2514 else:
2514 else:
2515 transaction.add(self.indexfile, isize)
2515 transaction.add(self.indexfile, isize)
2516 transaction.add(self.datafile, end)
2516 transaction.add(self.datafile, end)
2517 dfh = self._datafp(b"a+")
2517 dfh = self._datafp(b"a+")
2518
2518
2519 def flush():
2519 def flush():
2520 if dfh:
2520 if dfh:
2521 dfh.flush()
2521 dfh.flush()
2522 ifh.flush()
2522 ifh.flush()
2523
2523
2524 self._writinghandles = (ifh, dfh)
2524 self._writinghandles = (ifh, dfh)
2525 empty = True
2525 empty = True
2526
2526
2527 try:
2527 try:
2528 deltacomputer = deltautil.deltacomputer(self)
2528 deltacomputer = deltautil.deltacomputer(self)
2529 # loop through our set of deltas
2529 # loop through our set of deltas
2530 for data in deltas:
2530 for data in deltas:
2531 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2531 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2532 link = linkmapper(linknode)
2532 link = linkmapper(linknode)
2533 flags = flags or REVIDX_DEFAULT_FLAGS
2533 flags = flags or REVIDX_DEFAULT_FLAGS
2534
2534
2535 rev = self.index.get_rev(node)
2535 rev = self.index.get_rev(node)
2536 if rev is not None:
2536 if rev is not None:
2537 # this can happen if two branches make the same change
2537 # this can happen if two branches make the same change
2538 self._nodeduplicatecallback(transaction, rev)
2538 self._nodeduplicatecallback(transaction, rev)
2539 if duplicaterevisioncb:
2539 if duplicaterevisioncb:
2540 duplicaterevisioncb(self, rev)
2540 duplicaterevisioncb(self, rev)
2541 empty = False
2541 empty = False
2542 continue
2542 continue
2543
2543
2544 for p in (p1, p2):
2544 for p in (p1, p2):
2545 if not self.index.has_node(p):
2545 if not self.index.has_node(p):
2546 raise error.LookupError(
2546 raise error.LookupError(
2547 p, self.indexfile, _(b'unknown parent')
2547 p, self.indexfile, _(b'unknown parent')
2548 )
2548 )
2549
2549
2550 if not self.index.has_node(deltabase):
2550 if not self.index.has_node(deltabase):
2551 raise error.LookupError(
2551 raise error.LookupError(
2552 deltabase, self.indexfile, _(b'unknown delta base')
2552 deltabase, self.indexfile, _(b'unknown delta base')
2553 )
2553 )
2554
2554
2555 baserev = self.rev(deltabase)
2555 baserev = self.rev(deltabase)
2556
2556
2557 if baserev != nullrev and self.iscensored(baserev):
2557 if baserev != nullrev and self.iscensored(baserev):
2558 # if base is censored, delta must be full replacement in a
2558 # if base is censored, delta must be full replacement in a
2559 # single patch operation
2559 # single patch operation
2560 hlen = struct.calcsize(b">lll")
2560 hlen = struct.calcsize(b">lll")
2561 oldlen = self.rawsize(baserev)
2561 oldlen = self.rawsize(baserev)
2562 newlen = len(delta) - hlen
2562 newlen = len(delta) - hlen
2563 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2563 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2564 raise error.CensoredBaseError(
2564 raise error.CensoredBaseError(
2565 self.indexfile, self.node(baserev)
2565 self.indexfile, self.node(baserev)
2566 )
2566 )
2567
2567
2568 if not flags and self._peek_iscensored(baserev, delta, flush):
2568 if not flags and self._peek_iscensored(baserev, delta, flush):
2569 flags |= REVIDX_ISCENSORED
2569 flags |= REVIDX_ISCENSORED
2570
2570
2571 # We assume consumers of addrevisioncb will want to retrieve
2571 # We assume consumers of addrevisioncb will want to retrieve
2572 # the added revision, which will require a call to
2572 # the added revision, which will require a call to
2573 # revision(). revision() will fast path if there is a cache
2573 # revision(). revision() will fast path if there is a cache
2574 # hit. So, we tell _addrevision() to always cache in this case.
2574 # hit. So, we tell _addrevision() to always cache in this case.
2575 # We're only using addgroup() in the context of changegroup
2575 # We're only using addgroup() in the context of changegroup
2576 # generation so the revision data can always be handled as raw
2576 # generation so the revision data can always be handled as raw
2577 # by the flagprocessor.
2577 # by the flagprocessor.
2578 rev = self._addrevision(
2578 rev = self._addrevision(
2579 node,
2579 node,
2580 None,
2580 None,
2581 transaction,
2581 transaction,
2582 link,
2582 link,
2583 p1,
2583 p1,
2584 p2,
2584 p2,
2585 flags,
2585 flags,
2586 (baserev, delta),
2586 (baserev, delta),
2587 ifh,
2587 ifh,
2588 dfh,
2588 dfh,
2589 alwayscache=alwayscache,
2589 alwayscache=alwayscache,
2590 deltacomputer=deltacomputer,
2590 deltacomputer=deltacomputer,
2591 sidedata=sidedata,
2591 sidedata=sidedata,
2592 )
2592 )
2593
2593
2594 if addrevisioncb:
2594 if addrevisioncb:
2595 addrevisioncb(self, rev)
2595 addrevisioncb(self, rev)
2596 empty = False
2596 empty = False
2597
2597
2598 if not dfh and not self._inline:
2598 if not dfh and not self._inline:
2599 # addrevision switched from inline to conventional
2599 # addrevision switched from inline to conventional
2600 # reopen the index
2600 # reopen the index
2601 ifh.close()
2601 ifh.close()
2602 dfh = self._datafp(b"a+")
2602 dfh = self._datafp(b"a+")
2603 ifh = self._indexfp(b"a+")
2603 ifh = self._indexfp(b"a+")
2604 self._writinghandles = (ifh, dfh)
2604 self._writinghandles = (ifh, dfh)
2605 finally:
2605 finally:
2606 self._writinghandles = None
2606 self._writinghandles = None
2607
2607
2608 if dfh:
2608 if dfh:
2609 dfh.close()
2609 dfh.close()
2610 ifh.close()
2610 ifh.close()
2611 return not empty
2611 return not empty
2612
2612
2613 def iscensored(self, rev):
2613 def iscensored(self, rev):
2614 """Check if a file revision is censored."""
2614 """Check if a file revision is censored."""
2615 if not self._censorable:
2615 if not self._censorable:
2616 return False
2616 return False
2617
2617
2618 return self.flags(rev) & REVIDX_ISCENSORED
2618 return self.flags(rev) & REVIDX_ISCENSORED
2619
2619
2620 def _peek_iscensored(self, baserev, delta, flush):
2620 def _peek_iscensored(self, baserev, delta, flush):
2621 """Quickly check if a delta produces a censored revision."""
2621 """Quickly check if a delta produces a censored revision."""
2622 if not self._censorable:
2622 if not self._censorable:
2623 return False
2623 return False
2624
2624
2625 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2625 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2626
2626
2627 def getstrippoint(self, minlink):
2627 def getstrippoint(self, minlink):
2628 """find the minimum rev that must be stripped to strip the linkrev
2628 """find the minimum rev that must be stripped to strip the linkrev
2629
2629
2630 Returns a tuple containing the minimum rev and a set of all revs that
2630 Returns a tuple containing the minimum rev and a set of all revs that
2631 have linkrevs that will be broken by this strip.
2631 have linkrevs that will be broken by this strip.
2632 """
2632 """
2633 return storageutil.resolvestripinfo(
2633 return storageutil.resolvestripinfo(
2634 minlink,
2634 minlink,
2635 len(self) - 1,
2635 len(self) - 1,
2636 self.headrevs(),
2636 self.headrevs(),
2637 self.linkrev,
2637 self.linkrev,
2638 self.parentrevs,
2638 self.parentrevs,
2639 )
2639 )
2640
2640
2641 def strip(self, minlink, transaction):
2641 def strip(self, minlink, transaction):
2642 """truncate the revlog on the first revision with a linkrev >= minlink
2642 """truncate the revlog on the first revision with a linkrev >= minlink
2643
2643
2644 This function is called when we're stripping revision minlink and
2644 This function is called when we're stripping revision minlink and
2645 its descendants from the repository.
2645 its descendants from the repository.
2646
2646
2647 We have to remove all revisions with linkrev >= minlink, because
2647 We have to remove all revisions with linkrev >= minlink, because
2648 the equivalent changelog revisions will be renumbered after the
2648 the equivalent changelog revisions will be renumbered after the
2649 strip.
2649 strip.
2650
2650
2651 So we truncate the revlog on the first of these revisions, and
2651 So we truncate the revlog on the first of these revisions, and
2652 trust that the caller has saved the revisions that shouldn't be
2652 trust that the caller has saved the revisions that shouldn't be
2653 removed and that it'll re-add them after this truncation.
2653 removed and that it'll re-add them after this truncation.
2654 """
2654 """
2655 if len(self) == 0:
2655 if len(self) == 0:
2656 return
2656 return
2657
2657
2658 rev, _ = self.getstrippoint(minlink)
2658 rev, _ = self.getstrippoint(minlink)
2659 if rev == len(self):
2659 if rev == len(self):
2660 return
2660 return
2661
2661
2662 # first truncate the files on disk
2662 # first truncate the files on disk
2663 end = self.start(rev)
2663 end = self.start(rev)
2664 if not self._inline:
2664 if not self._inline:
2665 transaction.add(self.datafile, end)
2665 transaction.add(self.datafile, end)
2666 end = rev * self._io.size
2666 end = rev * self._io.size
2667 else:
2667 else:
2668 end += rev * self._io.size
2668 end += rev * self._io.size
2669
2669
2670 transaction.add(self.indexfile, end)
2670 transaction.add(self.indexfile, end)
2671
2671
2672 # then reset internal state in memory to forget those revisions
2672 # then reset internal state in memory to forget those revisions
2673 self._revisioncache = None
2673 self._revisioncache = None
2674 self._chaininfocache = util.lrucachedict(500)
2674 self._chaininfocache = util.lrucachedict(500)
2675 self._chunkclear()
2675 self._chunkclear()
2676
2676
2677 del self.index[rev:-1]
2677 del self.index[rev:-1]
2678
2678
2679 def checksize(self):
2679 def checksize(self):
2680 """Check size of index and data files
2680 """Check size of index and data files
2681
2681
2682 return a (dd, di) tuple.
2682 return a (dd, di) tuple.
2683 - dd: extra bytes for the "data" file
2683 - dd: extra bytes for the "data" file
2684 - di: extra bytes for the "index" file
2684 - di: extra bytes for the "index" file
2685
2685
2686 A healthy revlog will return (0, 0).
2686 A healthy revlog will return (0, 0).
2687 """
2687 """
2688 expected = 0
2688 expected = 0
2689 if len(self):
2689 if len(self):
2690 expected = max(0, self.end(len(self) - 1))
2690 expected = max(0, self.end(len(self) - 1))
2691
2691
2692 try:
2692 try:
2693 with self._datafp() as f:
2693 with self._datafp() as f:
2694 f.seek(0, io.SEEK_END)
2694 f.seek(0, io.SEEK_END)
2695 actual = f.tell()
2695 actual = f.tell()
2696 dd = actual - expected
2696 dd = actual - expected
2697 except IOError as inst:
2697 except IOError as inst:
2698 if inst.errno != errno.ENOENT:
2698 if inst.errno != errno.ENOENT:
2699 raise
2699 raise
2700 dd = 0
2700 dd = 0
2701
2701
2702 try:
2702 try:
2703 f = self.opener(self.indexfile)
2703 f = self.opener(self.indexfile)
2704 f.seek(0, io.SEEK_END)
2704 f.seek(0, io.SEEK_END)
2705 actual = f.tell()
2705 actual = f.tell()
2706 f.close()
2706 f.close()
2707 s = self._io.size
2707 s = self._io.size
2708 i = max(0, actual // s)
2708 i = max(0, actual // s)
2709 di = actual - (i * s)
2709 di = actual - (i * s)
2710 if self._inline:
2710 if self._inline:
2711 databytes = 0
2711 databytes = 0
2712 for r in self:
2712 for r in self:
2713 databytes += max(0, self.length(r))
2713 databytes += max(0, self.length(r))
2714 dd = 0
2714 dd = 0
2715 di = actual - len(self) * s - databytes
2715 di = actual - len(self) * s - databytes
2716 except IOError as inst:
2716 except IOError as inst:
2717 if inst.errno != errno.ENOENT:
2717 if inst.errno != errno.ENOENT:
2718 raise
2718 raise
2719 di = 0
2719 di = 0
2720
2720
2721 return (dd, di)
2721 return (dd, di)
2722
2722
2723 def files(self):
2723 def files(self):
2724 res = [self.indexfile]
2724 res = [self.indexfile]
2725 if not self._inline:
2725 if not self._inline:
2726 res.append(self.datafile)
2726 res.append(self.datafile)
2727 return res
2727 return res
2728
2728
2729 def emitrevisions(
2729 def emitrevisions(
2730 self,
2730 self,
2731 nodes,
2731 nodes,
2732 nodesorder=None,
2732 nodesorder=None,
2733 revisiondata=False,
2733 revisiondata=False,
2734 assumehaveparentrevisions=False,
2734 assumehaveparentrevisions=False,
2735 deltamode=repository.CG_DELTAMODE_STD,
2735 deltamode=repository.CG_DELTAMODE_STD,
2736 sidedata_helpers=None,
2736 ):
2737 ):
2737 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2738 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2738 raise error.ProgrammingError(
2739 raise error.ProgrammingError(
2739 b'unhandled value for nodesorder: %s' % nodesorder
2740 b'unhandled value for nodesorder: %s' % nodesorder
2740 )
2741 )
2741
2742
2742 if nodesorder is None and not self._generaldelta:
2743 if nodesorder is None and not self._generaldelta:
2743 nodesorder = b'storage'
2744 nodesorder = b'storage'
2744
2745
2745 if (
2746 if (
2746 not self._storedeltachains
2747 not self._storedeltachains
2747 and deltamode != repository.CG_DELTAMODE_PREV
2748 and deltamode != repository.CG_DELTAMODE_PREV
2748 ):
2749 ):
2749 deltamode = repository.CG_DELTAMODE_FULL
2750 deltamode = repository.CG_DELTAMODE_FULL
2750
2751
2751 return storageutil.emitrevisions(
2752 return storageutil.emitrevisions(
2752 self,
2753 self,
2753 nodes,
2754 nodes,
2754 nodesorder,
2755 nodesorder,
2755 revlogrevisiondelta,
2756 revlogrevisiondelta,
2756 deltaparentfn=self.deltaparent,
2757 deltaparentfn=self.deltaparent,
2757 candeltafn=self.candelta,
2758 candeltafn=self.candelta,
2758 rawsizefn=self.rawsize,
2759 rawsizefn=self.rawsize,
2759 revdifffn=self.revdiff,
2760 revdifffn=self.revdiff,
2760 flagsfn=self.flags,
2761 flagsfn=self.flags,
2761 deltamode=deltamode,
2762 deltamode=deltamode,
2762 revisiondata=revisiondata,
2763 revisiondata=revisiondata,
2763 assumehaveparentrevisions=assumehaveparentrevisions,
2764 assumehaveparentrevisions=assumehaveparentrevisions,
2765 sidedata_helpers=sidedata_helpers,
2764 )
2766 )
2765
2767
2766 DELTAREUSEALWAYS = b'always'
2768 DELTAREUSEALWAYS = b'always'
2767 DELTAREUSESAMEREVS = b'samerevs'
2769 DELTAREUSESAMEREVS = b'samerevs'
2768 DELTAREUSENEVER = b'never'
2770 DELTAREUSENEVER = b'never'
2769
2771
2770 DELTAREUSEFULLADD = b'fulladd'
2772 DELTAREUSEFULLADD = b'fulladd'
2771
2773
2772 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2774 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2773
2775
2774 def clone(
2776 def clone(
2775 self,
2777 self,
2776 tr,
2778 tr,
2777 destrevlog,
2779 destrevlog,
2778 addrevisioncb=None,
2780 addrevisioncb=None,
2779 deltareuse=DELTAREUSESAMEREVS,
2781 deltareuse=DELTAREUSESAMEREVS,
2780 forcedeltabothparents=None,
2782 forcedeltabothparents=None,
2781 sidedatacompanion=None,
2783 sidedatacompanion=None,
2782 ):
2784 ):
2783 """Copy this revlog to another, possibly with format changes.
2785 """Copy this revlog to another, possibly with format changes.
2784
2786
2785 The destination revlog will contain the same revisions and nodes.
2787 The destination revlog will contain the same revisions and nodes.
2786 However, it may not be bit-for-bit identical due to e.g. delta encoding
2788 However, it may not be bit-for-bit identical due to e.g. delta encoding
2787 differences.
2789 differences.
2788
2790
2789 The ``deltareuse`` argument control how deltas from the existing revlog
2791 The ``deltareuse`` argument control how deltas from the existing revlog
2790 are preserved in the destination revlog. The argument can have the
2792 are preserved in the destination revlog. The argument can have the
2791 following values:
2793 following values:
2792
2794
2793 DELTAREUSEALWAYS
2795 DELTAREUSEALWAYS
2794 Deltas will always be reused (if possible), even if the destination
2796 Deltas will always be reused (if possible), even if the destination
2795 revlog would not select the same revisions for the delta. This is the
2797 revlog would not select the same revisions for the delta. This is the
2796 fastest mode of operation.
2798 fastest mode of operation.
2797 DELTAREUSESAMEREVS
2799 DELTAREUSESAMEREVS
2798 Deltas will be reused if the destination revlog would pick the same
2800 Deltas will be reused if the destination revlog would pick the same
2799 revisions for the delta. This mode strikes a balance between speed
2801 revisions for the delta. This mode strikes a balance between speed
2800 and optimization.
2802 and optimization.
2801 DELTAREUSENEVER
2803 DELTAREUSENEVER
2802 Deltas will never be reused. This is the slowest mode of execution.
2804 Deltas will never be reused. This is the slowest mode of execution.
2803 This mode can be used to recompute deltas (e.g. if the diff/delta
2805 This mode can be used to recompute deltas (e.g. if the diff/delta
2804 algorithm changes).
2806 algorithm changes).
2805 DELTAREUSEFULLADD
2807 DELTAREUSEFULLADD
2806 Revision will be re-added as if their were new content. This is
2808 Revision will be re-added as if their were new content. This is
2807 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2809 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2808 eg: large file detection and handling.
2810 eg: large file detection and handling.
2809
2811
2810 Delta computation can be slow, so the choice of delta reuse policy can
2812 Delta computation can be slow, so the choice of delta reuse policy can
2811 significantly affect run time.
2813 significantly affect run time.
2812
2814
2813 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2815 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2814 two extremes. Deltas will be reused if they are appropriate. But if the
2816 two extremes. Deltas will be reused if they are appropriate. But if the
2815 delta could choose a better revision, it will do so. This means if you
2817 delta could choose a better revision, it will do so. This means if you
2816 are converting a non-generaldelta revlog to a generaldelta revlog,
2818 are converting a non-generaldelta revlog to a generaldelta revlog,
2817 deltas will be recomputed if the delta's parent isn't a parent of the
2819 deltas will be recomputed if the delta's parent isn't a parent of the
2818 revision.
2820 revision.
2819
2821
2820 In addition to the delta policy, the ``forcedeltabothparents``
2822 In addition to the delta policy, the ``forcedeltabothparents``
2821 argument controls whether to force compute deltas against both parents
2823 argument controls whether to force compute deltas against both parents
2822 for merges. By default, the current default is used.
2824 for merges. By default, the current default is used.
2823
2825
2824 If not None, the `sidedatacompanion` is callable that accept two
2826 If not None, the `sidedatacompanion` is callable that accept two
2825 arguments:
2827 arguments:
2826
2828
2827 (srcrevlog, rev)
2829 (srcrevlog, rev)
2828
2830
2829 and return a quintet that control changes to sidedata content from the
2831 and return a quintet that control changes to sidedata content from the
2830 old revision to the new clone result:
2832 old revision to the new clone result:
2831
2833
2832 (dropall, filterout, update, new_flags, dropped_flags)
2834 (dropall, filterout, update, new_flags, dropped_flags)
2833
2835
2834 * if `dropall` is True, all sidedata should be dropped
2836 * if `dropall` is True, all sidedata should be dropped
2835 * `filterout` is a set of sidedata keys that should be dropped
2837 * `filterout` is a set of sidedata keys that should be dropped
2836 * `update` is a mapping of additionnal/new key -> value
2838 * `update` is a mapping of additionnal/new key -> value
2837 * new_flags is a bitfields of new flags that the revision should get
2839 * new_flags is a bitfields of new flags that the revision should get
2838 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2840 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2839 """
2841 """
2840 if deltareuse not in self.DELTAREUSEALL:
2842 if deltareuse not in self.DELTAREUSEALL:
2841 raise ValueError(
2843 raise ValueError(
2842 _(b'value for deltareuse invalid: %s') % deltareuse
2844 _(b'value for deltareuse invalid: %s') % deltareuse
2843 )
2845 )
2844
2846
2845 if len(destrevlog):
2847 if len(destrevlog):
2846 raise ValueError(_(b'destination revlog is not empty'))
2848 raise ValueError(_(b'destination revlog is not empty'))
2847
2849
2848 if getattr(self, 'filteredrevs', None):
2850 if getattr(self, 'filteredrevs', None):
2849 raise ValueError(_(b'source revlog has filtered revisions'))
2851 raise ValueError(_(b'source revlog has filtered revisions'))
2850 if getattr(destrevlog, 'filteredrevs', None):
2852 if getattr(destrevlog, 'filteredrevs', None):
2851 raise ValueError(_(b'destination revlog has filtered revisions'))
2853 raise ValueError(_(b'destination revlog has filtered revisions'))
2852
2854
2853 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2855 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2854 # if possible.
2856 # if possible.
2855 oldlazydelta = destrevlog._lazydelta
2857 oldlazydelta = destrevlog._lazydelta
2856 oldlazydeltabase = destrevlog._lazydeltabase
2858 oldlazydeltabase = destrevlog._lazydeltabase
2857 oldamd = destrevlog._deltabothparents
2859 oldamd = destrevlog._deltabothparents
2858
2860
2859 try:
2861 try:
2860 if deltareuse == self.DELTAREUSEALWAYS:
2862 if deltareuse == self.DELTAREUSEALWAYS:
2861 destrevlog._lazydeltabase = True
2863 destrevlog._lazydeltabase = True
2862 destrevlog._lazydelta = True
2864 destrevlog._lazydelta = True
2863 elif deltareuse == self.DELTAREUSESAMEREVS:
2865 elif deltareuse == self.DELTAREUSESAMEREVS:
2864 destrevlog._lazydeltabase = False
2866 destrevlog._lazydeltabase = False
2865 destrevlog._lazydelta = True
2867 destrevlog._lazydelta = True
2866 elif deltareuse == self.DELTAREUSENEVER:
2868 elif deltareuse == self.DELTAREUSENEVER:
2867 destrevlog._lazydeltabase = False
2869 destrevlog._lazydeltabase = False
2868 destrevlog._lazydelta = False
2870 destrevlog._lazydelta = False
2869
2871
2870 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2872 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2871
2873
2872 self._clone(
2874 self._clone(
2873 tr,
2875 tr,
2874 destrevlog,
2876 destrevlog,
2875 addrevisioncb,
2877 addrevisioncb,
2876 deltareuse,
2878 deltareuse,
2877 forcedeltabothparents,
2879 forcedeltabothparents,
2878 sidedatacompanion,
2880 sidedatacompanion,
2879 )
2881 )
2880
2882
2881 finally:
2883 finally:
2882 destrevlog._lazydelta = oldlazydelta
2884 destrevlog._lazydelta = oldlazydelta
2883 destrevlog._lazydeltabase = oldlazydeltabase
2885 destrevlog._lazydeltabase = oldlazydeltabase
2884 destrevlog._deltabothparents = oldamd
2886 destrevlog._deltabothparents = oldamd
2885
2887
2886 def _clone(
2888 def _clone(
2887 self,
2889 self,
2888 tr,
2890 tr,
2889 destrevlog,
2891 destrevlog,
2890 addrevisioncb,
2892 addrevisioncb,
2891 deltareuse,
2893 deltareuse,
2892 forcedeltabothparents,
2894 forcedeltabothparents,
2893 sidedatacompanion,
2895 sidedatacompanion,
2894 ):
2896 ):
2895 """perform the core duty of `revlog.clone` after parameter processing"""
2897 """perform the core duty of `revlog.clone` after parameter processing"""
2896 deltacomputer = deltautil.deltacomputer(destrevlog)
2898 deltacomputer = deltautil.deltacomputer(destrevlog)
2897 index = self.index
2899 index = self.index
2898 for rev in self:
2900 for rev in self:
2899 entry = index[rev]
2901 entry = index[rev]
2900
2902
2901 # Some classes override linkrev to take filtered revs into
2903 # Some classes override linkrev to take filtered revs into
2902 # account. Use raw entry from index.
2904 # account. Use raw entry from index.
2903 flags = entry[0] & 0xFFFF
2905 flags = entry[0] & 0xFFFF
2904 linkrev = entry[4]
2906 linkrev = entry[4]
2905 p1 = index[entry[5]][7]
2907 p1 = index[entry[5]][7]
2906 p2 = index[entry[6]][7]
2908 p2 = index[entry[6]][7]
2907 node = entry[7]
2909 node = entry[7]
2908
2910
2909 sidedataactions = (False, [], {}, 0, 0)
2911 sidedataactions = (False, [], {}, 0, 0)
2910 if sidedatacompanion is not None:
2912 if sidedatacompanion is not None:
2911 sidedataactions = sidedatacompanion(self, rev)
2913 sidedataactions = sidedatacompanion(self, rev)
2912
2914
2913 # (Possibly) reuse the delta from the revlog if allowed and
2915 # (Possibly) reuse the delta from the revlog if allowed and
2914 # the revlog chunk is a delta.
2916 # the revlog chunk is a delta.
2915 cachedelta = None
2917 cachedelta = None
2916 rawtext = None
2918 rawtext = None
2917 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2919 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2918 dropall = sidedataactions[0]
2920 dropall = sidedataactions[0]
2919 filterout = sidedataactions[1]
2921 filterout = sidedataactions[1]
2920 update = sidedataactions[2]
2922 update = sidedataactions[2]
2921 new_flags = sidedataactions[3]
2923 new_flags = sidedataactions[3]
2922 dropped_flags = sidedataactions[4]
2924 dropped_flags = sidedataactions[4]
2923 text, sidedata = self._revisiondata(rev)
2925 text, sidedata = self._revisiondata(rev)
2924 if dropall:
2926 if dropall:
2925 sidedata = {}
2927 sidedata = {}
2926 for key in filterout:
2928 for key in filterout:
2927 sidedata.pop(key, None)
2929 sidedata.pop(key, None)
2928 sidedata.update(update)
2930 sidedata.update(update)
2929 if not sidedata:
2931 if not sidedata:
2930 sidedata = None
2932 sidedata = None
2931
2933
2932 flags |= new_flags
2934 flags |= new_flags
2933 flags &= ~dropped_flags
2935 flags &= ~dropped_flags
2934
2936
2935 destrevlog.addrevision(
2937 destrevlog.addrevision(
2936 text,
2938 text,
2937 tr,
2939 tr,
2938 linkrev,
2940 linkrev,
2939 p1,
2941 p1,
2940 p2,
2942 p2,
2941 cachedelta=cachedelta,
2943 cachedelta=cachedelta,
2942 node=node,
2944 node=node,
2943 flags=flags,
2945 flags=flags,
2944 deltacomputer=deltacomputer,
2946 deltacomputer=deltacomputer,
2945 sidedata=sidedata,
2947 sidedata=sidedata,
2946 )
2948 )
2947 else:
2949 else:
2948 if destrevlog._lazydelta:
2950 if destrevlog._lazydelta:
2949 dp = self.deltaparent(rev)
2951 dp = self.deltaparent(rev)
2950 if dp != nullrev:
2952 if dp != nullrev:
2951 cachedelta = (dp, bytes(self._chunk(rev)))
2953 cachedelta = (dp, bytes(self._chunk(rev)))
2952
2954
2953 if not cachedelta:
2955 if not cachedelta:
2954 rawtext = self.rawdata(rev)
2956 rawtext = self.rawdata(rev)
2955
2957
2956 ifh = destrevlog.opener(
2958 ifh = destrevlog.opener(
2957 destrevlog.indexfile, b'a+', checkambig=False
2959 destrevlog.indexfile, b'a+', checkambig=False
2958 )
2960 )
2959 dfh = None
2961 dfh = None
2960 if not destrevlog._inline:
2962 if not destrevlog._inline:
2961 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2963 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2962 try:
2964 try:
2963 destrevlog._addrevision(
2965 destrevlog._addrevision(
2964 node,
2966 node,
2965 rawtext,
2967 rawtext,
2966 tr,
2968 tr,
2967 linkrev,
2969 linkrev,
2968 p1,
2970 p1,
2969 p2,
2971 p2,
2970 flags,
2972 flags,
2971 cachedelta,
2973 cachedelta,
2972 ifh,
2974 ifh,
2973 dfh,
2975 dfh,
2974 deltacomputer=deltacomputer,
2976 deltacomputer=deltacomputer,
2975 )
2977 )
2976 finally:
2978 finally:
2977 if dfh:
2979 if dfh:
2978 dfh.close()
2980 dfh.close()
2979 ifh.close()
2981 ifh.close()
2980
2982
2981 if addrevisioncb:
2983 if addrevisioncb:
2982 addrevisioncb(self, rev, node)
2984 addrevisioncb(self, rev, node)
2983
2985
2984 def censorrevision(self, tr, censornode, tombstone=b''):
2986 def censorrevision(self, tr, censornode, tombstone=b''):
2985 if (self.version & 0xFFFF) == REVLOGV0:
2987 if (self.version & 0xFFFF) == REVLOGV0:
2986 raise error.RevlogError(
2988 raise error.RevlogError(
2987 _(b'cannot censor with version %d revlogs') % self.version
2989 _(b'cannot censor with version %d revlogs') % self.version
2988 )
2990 )
2989
2991
2990 censorrev = self.rev(censornode)
2992 censorrev = self.rev(censornode)
2991 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2993 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2992
2994
2993 if len(tombstone) > self.rawsize(censorrev):
2995 if len(tombstone) > self.rawsize(censorrev):
2994 raise error.Abort(
2996 raise error.Abort(
2995 _(b'censor tombstone must be no longer than censored data')
2997 _(b'censor tombstone must be no longer than censored data')
2996 )
2998 )
2997
2999
2998 # Rewriting the revlog in place is hard. Our strategy for censoring is
3000 # Rewriting the revlog in place is hard. Our strategy for censoring is
2999 # to create a new revlog, copy all revisions to it, then replace the
3001 # to create a new revlog, copy all revisions to it, then replace the
3000 # revlogs on transaction close.
3002 # revlogs on transaction close.
3001
3003
3002 newindexfile = self.indexfile + b'.tmpcensored'
3004 newindexfile = self.indexfile + b'.tmpcensored'
3003 newdatafile = self.datafile + b'.tmpcensored'
3005 newdatafile = self.datafile + b'.tmpcensored'
3004
3006
3005 # This is a bit dangerous. We could easily have a mismatch of state.
3007 # This is a bit dangerous. We could easily have a mismatch of state.
3006 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3008 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3007 newrl.version = self.version
3009 newrl.version = self.version
3008 newrl._generaldelta = self._generaldelta
3010 newrl._generaldelta = self._generaldelta
3009 newrl._io = self._io
3011 newrl._io = self._io
3010
3012
3011 for rev in self.revs():
3013 for rev in self.revs():
3012 node = self.node(rev)
3014 node = self.node(rev)
3013 p1, p2 = self.parents(node)
3015 p1, p2 = self.parents(node)
3014
3016
3015 if rev == censorrev:
3017 if rev == censorrev:
3016 newrl.addrawrevision(
3018 newrl.addrawrevision(
3017 tombstone,
3019 tombstone,
3018 tr,
3020 tr,
3019 self.linkrev(censorrev),
3021 self.linkrev(censorrev),
3020 p1,
3022 p1,
3021 p2,
3023 p2,
3022 censornode,
3024 censornode,
3023 REVIDX_ISCENSORED,
3025 REVIDX_ISCENSORED,
3024 )
3026 )
3025
3027
3026 if newrl.deltaparent(rev) != nullrev:
3028 if newrl.deltaparent(rev) != nullrev:
3027 raise error.Abort(
3029 raise error.Abort(
3028 _(
3030 _(
3029 b'censored revision stored as delta; '
3031 b'censored revision stored as delta; '
3030 b'cannot censor'
3032 b'cannot censor'
3031 ),
3033 ),
3032 hint=_(
3034 hint=_(
3033 b'censoring of revlogs is not '
3035 b'censoring of revlogs is not '
3034 b'fully implemented; please report '
3036 b'fully implemented; please report '
3035 b'this bug'
3037 b'this bug'
3036 ),
3038 ),
3037 )
3039 )
3038 continue
3040 continue
3039
3041
3040 if self.iscensored(rev):
3042 if self.iscensored(rev):
3041 if self.deltaparent(rev) != nullrev:
3043 if self.deltaparent(rev) != nullrev:
3042 raise error.Abort(
3044 raise error.Abort(
3043 _(
3045 _(
3044 b'cannot censor due to censored '
3046 b'cannot censor due to censored '
3045 b'revision having delta stored'
3047 b'revision having delta stored'
3046 )
3048 )
3047 )
3049 )
3048 rawtext = self._chunk(rev)
3050 rawtext = self._chunk(rev)
3049 else:
3051 else:
3050 rawtext = self.rawdata(rev)
3052 rawtext = self.rawdata(rev)
3051
3053
3052 newrl.addrawrevision(
3054 newrl.addrawrevision(
3053 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3055 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3054 )
3056 )
3055
3057
3056 tr.addbackup(self.indexfile, location=b'store')
3058 tr.addbackup(self.indexfile, location=b'store')
3057 if not self._inline:
3059 if not self._inline:
3058 tr.addbackup(self.datafile, location=b'store')
3060 tr.addbackup(self.datafile, location=b'store')
3059
3061
3060 self.opener.rename(newrl.indexfile, self.indexfile)
3062 self.opener.rename(newrl.indexfile, self.indexfile)
3061 if not self._inline:
3063 if not self._inline:
3062 self.opener.rename(newrl.datafile, self.datafile)
3064 self.opener.rename(newrl.datafile, self.datafile)
3063
3065
3064 self.clearcaches()
3066 self.clearcaches()
3065 self._loadindex()
3067 self._loadindex()
3066
3068
3067 def verifyintegrity(self, state):
3069 def verifyintegrity(self, state):
3068 """Verifies the integrity of the revlog.
3070 """Verifies the integrity of the revlog.
3069
3071
3070 Yields ``revlogproblem`` instances describing problems that are
3072 Yields ``revlogproblem`` instances describing problems that are
3071 found.
3073 found.
3072 """
3074 """
3073 dd, di = self.checksize()
3075 dd, di = self.checksize()
3074 if dd:
3076 if dd:
3075 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3077 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3076 if di:
3078 if di:
3077 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3079 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3078
3080
3079 version = self.version & 0xFFFF
3081 version = self.version & 0xFFFF
3080
3082
3081 # The verifier tells us what version revlog we should be.
3083 # The verifier tells us what version revlog we should be.
3082 if version != state[b'expectedversion']:
3084 if version != state[b'expectedversion']:
3083 yield revlogproblem(
3085 yield revlogproblem(
3084 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3086 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3085 % (self.indexfile, version, state[b'expectedversion'])
3087 % (self.indexfile, version, state[b'expectedversion'])
3086 )
3088 )
3087
3089
3088 state[b'skipread'] = set()
3090 state[b'skipread'] = set()
3089 state[b'safe_renamed'] = set()
3091 state[b'safe_renamed'] = set()
3090
3092
3091 for rev in self:
3093 for rev in self:
3092 node = self.node(rev)
3094 node = self.node(rev)
3093
3095
3094 # Verify contents. 4 cases to care about:
3096 # Verify contents. 4 cases to care about:
3095 #
3097 #
3096 # common: the most common case
3098 # common: the most common case
3097 # rename: with a rename
3099 # rename: with a rename
3098 # meta: file content starts with b'\1\n', the metadata
3100 # meta: file content starts with b'\1\n', the metadata
3099 # header defined in filelog.py, but without a rename
3101 # header defined in filelog.py, but without a rename
3100 # ext: content stored externally
3102 # ext: content stored externally
3101 #
3103 #
3102 # More formally, their differences are shown below:
3104 # More formally, their differences are shown below:
3103 #
3105 #
3104 # | common | rename | meta | ext
3106 # | common | rename | meta | ext
3105 # -------------------------------------------------------
3107 # -------------------------------------------------------
3106 # flags() | 0 | 0 | 0 | not 0
3108 # flags() | 0 | 0 | 0 | not 0
3107 # renamed() | False | True | False | ?
3109 # renamed() | False | True | False | ?
3108 # rawtext[0:2]=='\1\n'| False | True | True | ?
3110 # rawtext[0:2]=='\1\n'| False | True | True | ?
3109 #
3111 #
3110 # "rawtext" means the raw text stored in revlog data, which
3112 # "rawtext" means the raw text stored in revlog data, which
3111 # could be retrieved by "rawdata(rev)". "text"
3113 # could be retrieved by "rawdata(rev)". "text"
3112 # mentioned below is "revision(rev)".
3114 # mentioned below is "revision(rev)".
3113 #
3115 #
3114 # There are 3 different lengths stored physically:
3116 # There are 3 different lengths stored physically:
3115 # 1. L1: rawsize, stored in revlog index
3117 # 1. L1: rawsize, stored in revlog index
3116 # 2. L2: len(rawtext), stored in revlog data
3118 # 2. L2: len(rawtext), stored in revlog data
3117 # 3. L3: len(text), stored in revlog data if flags==0, or
3119 # 3. L3: len(text), stored in revlog data if flags==0, or
3118 # possibly somewhere else if flags!=0
3120 # possibly somewhere else if flags!=0
3119 #
3121 #
3120 # L1 should be equal to L2. L3 could be different from them.
3122 # L1 should be equal to L2. L3 could be different from them.
3121 # "text" may or may not affect commit hash depending on flag
3123 # "text" may or may not affect commit hash depending on flag
3122 # processors (see flagutil.addflagprocessor).
3124 # processors (see flagutil.addflagprocessor).
3123 #
3125 #
3124 # | common | rename | meta | ext
3126 # | common | rename | meta | ext
3125 # -------------------------------------------------
3127 # -------------------------------------------------
3126 # rawsize() | L1 | L1 | L1 | L1
3128 # rawsize() | L1 | L1 | L1 | L1
3127 # size() | L1 | L2-LM | L1(*) | L1 (?)
3129 # size() | L1 | L2-LM | L1(*) | L1 (?)
3128 # len(rawtext) | L2 | L2 | L2 | L2
3130 # len(rawtext) | L2 | L2 | L2 | L2
3129 # len(text) | L2 | L2 | L2 | L3
3131 # len(text) | L2 | L2 | L2 | L3
3130 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3132 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3131 #
3133 #
3132 # LM: length of metadata, depending on rawtext
3134 # LM: length of metadata, depending on rawtext
3133 # (*): not ideal, see comment in filelog.size
3135 # (*): not ideal, see comment in filelog.size
3134 # (?): could be "- len(meta)" if the resolved content has
3136 # (?): could be "- len(meta)" if the resolved content has
3135 # rename metadata
3137 # rename metadata
3136 #
3138 #
3137 # Checks needed to be done:
3139 # Checks needed to be done:
3138 # 1. length check: L1 == L2, in all cases.
3140 # 1. length check: L1 == L2, in all cases.
3139 # 2. hash check: depending on flag processor, we may need to
3141 # 2. hash check: depending on flag processor, we may need to
3140 # use either "text" (external), or "rawtext" (in revlog).
3142 # use either "text" (external), or "rawtext" (in revlog).
3141
3143
3142 try:
3144 try:
3143 skipflags = state.get(b'skipflags', 0)
3145 skipflags = state.get(b'skipflags', 0)
3144 if skipflags:
3146 if skipflags:
3145 skipflags &= self.flags(rev)
3147 skipflags &= self.flags(rev)
3146
3148
3147 _verify_revision(self, skipflags, state, node)
3149 _verify_revision(self, skipflags, state, node)
3148
3150
3149 l1 = self.rawsize(rev)
3151 l1 = self.rawsize(rev)
3150 l2 = len(self.rawdata(node))
3152 l2 = len(self.rawdata(node))
3151
3153
3152 if l1 != l2:
3154 if l1 != l2:
3153 yield revlogproblem(
3155 yield revlogproblem(
3154 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3156 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3155 node=node,
3157 node=node,
3156 )
3158 )
3157
3159
3158 except error.CensoredNodeError:
3160 except error.CensoredNodeError:
3159 if state[b'erroroncensored']:
3161 if state[b'erroroncensored']:
3160 yield revlogproblem(
3162 yield revlogproblem(
3161 error=_(b'censored file data'), node=node
3163 error=_(b'censored file data'), node=node
3162 )
3164 )
3163 state[b'skipread'].add(node)
3165 state[b'skipread'].add(node)
3164 except Exception as e:
3166 except Exception as e:
3165 yield revlogproblem(
3167 yield revlogproblem(
3166 error=_(b'unpacking %s: %s')
3168 error=_(b'unpacking %s: %s')
3167 % (short(node), stringutil.forcebytestr(e)),
3169 % (short(node), stringutil.forcebytestr(e)),
3168 node=node,
3170 node=node,
3169 )
3171 )
3170 state[b'skipread'].add(node)
3172 state[b'skipread'].add(node)
3171
3173
3172 def storageinfo(
3174 def storageinfo(
3173 self,
3175 self,
3174 exclusivefiles=False,
3176 exclusivefiles=False,
3175 sharedfiles=False,
3177 sharedfiles=False,
3176 revisionscount=False,
3178 revisionscount=False,
3177 trackedsize=False,
3179 trackedsize=False,
3178 storedsize=False,
3180 storedsize=False,
3179 ):
3181 ):
3180 d = {}
3182 d = {}
3181
3183
3182 if exclusivefiles:
3184 if exclusivefiles:
3183 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3185 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3184 if not self._inline:
3186 if not self._inline:
3185 d[b'exclusivefiles'].append((self.opener, self.datafile))
3187 d[b'exclusivefiles'].append((self.opener, self.datafile))
3186
3188
3187 if sharedfiles:
3189 if sharedfiles:
3188 d[b'sharedfiles'] = []
3190 d[b'sharedfiles'] = []
3189
3191
3190 if revisionscount:
3192 if revisionscount:
3191 d[b'revisionscount'] = len(self)
3193 d[b'revisionscount'] = len(self)
3192
3194
3193 if trackedsize:
3195 if trackedsize:
3194 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3196 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3195
3197
3196 if storedsize:
3198 if storedsize:
3197 d[b'storedsize'] = sum(
3199 d[b'storedsize'] = sum(
3198 self.opener.stat(path).st_size for path in self.files()
3200 self.opener.stat(path).st_size for path in self.files()
3199 )
3201 )
3200
3202
3201 return d
3203 return d
@@ -1,513 +1,560 b''
1 # storageutil.py - Storage functionality agnostic of backend implementation.
1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 #
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import re
10 import re
11 import struct
11 import struct
12
12
13 from ..i18n import _
13 from ..i18n import _
14 from ..node import (
14 from ..node import (
15 bin,
15 bin,
16 nullid,
16 nullid,
17 nullrev,
17 nullrev,
18 )
18 )
19 from .. import (
19 from .. import (
20 dagop,
20 dagop,
21 error,
21 error,
22 mdiff,
22 mdiff,
23 pycompat,
23 pycompat,
24 )
24 )
25 from ..interfaces import repository
25 from ..interfaces import repository
26 from ..revlogutils import sidedata as sidedatamod
26 from ..utils import hashutil
27 from ..utils import hashutil
27
28
28 _nullhash = hashutil.sha1(nullid)
29 _nullhash = hashutil.sha1(nullid)
29
30
30
31
31 def hashrevisionsha1(text, p1, p2):
32 def hashrevisionsha1(text, p1, p2):
32 """Compute the SHA-1 for revision data and its parents.
33 """Compute the SHA-1 for revision data and its parents.
33
34
34 This hash combines both the current file contents and its history
35 This hash combines both the current file contents and its history
35 in a manner that makes it easy to distinguish nodes with the same
36 in a manner that makes it easy to distinguish nodes with the same
36 content in the revision graph.
37 content in the revision graph.
37 """
38 """
38 # As of now, if one of the parent node is null, p2 is null
39 # As of now, if one of the parent node is null, p2 is null
39 if p2 == nullid:
40 if p2 == nullid:
40 # deep copy of a hash is faster than creating one
41 # deep copy of a hash is faster than creating one
41 s = _nullhash.copy()
42 s = _nullhash.copy()
42 s.update(p1)
43 s.update(p1)
43 else:
44 else:
44 # none of the parent nodes are nullid
45 # none of the parent nodes are nullid
45 if p1 < p2:
46 if p1 < p2:
46 a = p1
47 a = p1
47 b = p2
48 b = p2
48 else:
49 else:
49 a = p2
50 a = p2
50 b = p1
51 b = p1
51 s = hashutil.sha1(a)
52 s = hashutil.sha1(a)
52 s.update(b)
53 s.update(b)
53 s.update(text)
54 s.update(text)
54 return s.digest()
55 return s.digest()
55
56
56
57
57 METADATA_RE = re.compile(b'\x01\n')
58 METADATA_RE = re.compile(b'\x01\n')
58
59
59
60
60 def parsemeta(text):
61 def parsemeta(text):
61 """Parse metadata header from revision data.
62 """Parse metadata header from revision data.
62
63
63 Returns a 2-tuple of (metadata, offset), where both can be None if there
64 Returns a 2-tuple of (metadata, offset), where both can be None if there
64 is no metadata.
65 is no metadata.
65 """
66 """
66 # text can be buffer, so we can't use .startswith or .index
67 # text can be buffer, so we can't use .startswith or .index
67 if text[:2] != b'\x01\n':
68 if text[:2] != b'\x01\n':
68 return None, None
69 return None, None
69 s = METADATA_RE.search(text, 2).start()
70 s = METADATA_RE.search(text, 2).start()
70 mtext = text[2:s]
71 mtext = text[2:s]
71 meta = {}
72 meta = {}
72 for l in mtext.splitlines():
73 for l in mtext.splitlines():
73 k, v = l.split(b': ', 1)
74 k, v = l.split(b': ', 1)
74 meta[k] = v
75 meta[k] = v
75 return meta, s + 2
76 return meta, s + 2
76
77
77
78
78 def packmeta(meta, text):
79 def packmeta(meta, text):
79 """Add metadata to fulltext to produce revision text."""
80 """Add metadata to fulltext to produce revision text."""
80 keys = sorted(meta)
81 keys = sorted(meta)
81 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
82 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
82 return b'\x01\n%s\x01\n%s' % (metatext, text)
83 return b'\x01\n%s\x01\n%s' % (metatext, text)
83
84
84
85
85 def iscensoredtext(text):
86 def iscensoredtext(text):
86 meta = parsemeta(text)[0]
87 meta = parsemeta(text)[0]
87 return meta and b'censored' in meta
88 return meta and b'censored' in meta
88
89
89
90
90 def filtermetadata(text):
91 def filtermetadata(text):
91 """Extract just the revision data from source text.
92 """Extract just the revision data from source text.
92
93
93 Returns ``text`` unless it has a metadata header, in which case we return
94 Returns ``text`` unless it has a metadata header, in which case we return
94 a new buffer without hte metadata.
95 a new buffer without hte metadata.
95 """
96 """
96 if not text.startswith(b'\x01\n'):
97 if not text.startswith(b'\x01\n'):
97 return text
98 return text
98
99
99 offset = text.index(b'\x01\n', 2)
100 offset = text.index(b'\x01\n', 2)
100 return text[offset + 2 :]
101 return text[offset + 2 :]
101
102
102
103
103 def filerevisioncopied(store, node):
104 def filerevisioncopied(store, node):
104 """Resolve file revision copy metadata.
105 """Resolve file revision copy metadata.
105
106
106 Returns ``False`` if the file has no copy metadata. Otherwise a
107 Returns ``False`` if the file has no copy metadata. Otherwise a
107 2-tuple of the source filename and node.
108 2-tuple of the source filename and node.
108 """
109 """
109 if store.parents(node)[0] != nullid:
110 if store.parents(node)[0] != nullid:
110 return False
111 return False
111
112
112 meta = parsemeta(store.revision(node))[0]
113 meta = parsemeta(store.revision(node))[0]
113
114
114 # copy and copyrev occur in pairs. In rare cases due to old bugs,
115 # copy and copyrev occur in pairs. In rare cases due to old bugs,
115 # one can occur without the other. So ensure both are present to flag
116 # one can occur without the other. So ensure both are present to flag
116 # as a copy.
117 # as a copy.
117 if meta and b'copy' in meta and b'copyrev' in meta:
118 if meta and b'copy' in meta and b'copyrev' in meta:
118 return meta[b'copy'], bin(meta[b'copyrev'])
119 return meta[b'copy'], bin(meta[b'copyrev'])
119
120
120 return False
121 return False
121
122
122
123
123 def filedataequivalent(store, node, filedata):
124 def filedataequivalent(store, node, filedata):
124 """Determines whether file data is equivalent to a stored node.
125 """Determines whether file data is equivalent to a stored node.
125
126
126 Returns True if the passed file data would hash to the same value
127 Returns True if the passed file data would hash to the same value
127 as a stored revision and False otherwise.
128 as a stored revision and False otherwise.
128
129
129 When a stored revision is censored, filedata must be empty to have
130 When a stored revision is censored, filedata must be empty to have
130 equivalence.
131 equivalence.
131
132
132 When a stored revision has copy metadata, it is ignored as part
133 When a stored revision has copy metadata, it is ignored as part
133 of the compare.
134 of the compare.
134 """
135 """
135
136
136 if filedata.startswith(b'\x01\n'):
137 if filedata.startswith(b'\x01\n'):
137 revisiontext = b'\x01\n\x01\n' + filedata
138 revisiontext = b'\x01\n\x01\n' + filedata
138 else:
139 else:
139 revisiontext = filedata
140 revisiontext = filedata
140
141
141 p1, p2 = store.parents(node)
142 p1, p2 = store.parents(node)
142
143
143 computednode = hashrevisionsha1(revisiontext, p1, p2)
144 computednode = hashrevisionsha1(revisiontext, p1, p2)
144
145
145 if computednode == node:
146 if computednode == node:
146 return True
147 return True
147
148
148 # Censored files compare against the empty file.
149 # Censored files compare against the empty file.
149 if store.iscensored(store.rev(node)):
150 if store.iscensored(store.rev(node)):
150 return filedata == b''
151 return filedata == b''
151
152
152 # Renaming a file produces a different hash, even if the data
153 # Renaming a file produces a different hash, even if the data
153 # remains unchanged. Check if that's the case.
154 # remains unchanged. Check if that's the case.
154 if store.renamed(node):
155 if store.renamed(node):
155 return store.read(node) == filedata
156 return store.read(node) == filedata
156
157
157 return False
158 return False
158
159
159
160
160 def iterrevs(storelen, start=0, stop=None):
161 def iterrevs(storelen, start=0, stop=None):
161 """Iterate over revision numbers in a store."""
162 """Iterate over revision numbers in a store."""
162 step = 1
163 step = 1
163
164
164 if stop is not None:
165 if stop is not None:
165 if start > stop:
166 if start > stop:
166 step = -1
167 step = -1
167 stop += step
168 stop += step
168 if stop > storelen:
169 if stop > storelen:
169 stop = storelen
170 stop = storelen
170 else:
171 else:
171 stop = storelen
172 stop = storelen
172
173
173 return pycompat.xrange(start, stop, step)
174 return pycompat.xrange(start, stop, step)
174
175
175
176
176 def fileidlookup(store, fileid, identifier):
177 def fileidlookup(store, fileid, identifier):
177 """Resolve the file node for a value.
178 """Resolve the file node for a value.
178
179
179 ``store`` is an object implementing the ``ifileindex`` interface.
180 ``store`` is an object implementing the ``ifileindex`` interface.
180
181
181 ``fileid`` can be:
182 ``fileid`` can be:
182
183
183 * A 20 or 32 byte binary node.
184 * A 20 or 32 byte binary node.
184 * An integer revision number
185 * An integer revision number
185 * A 40 or 64 byte hex node.
186 * A 40 or 64 byte hex node.
186 * A bytes that can be parsed as an integer representing a revision number.
187 * A bytes that can be parsed as an integer representing a revision number.
187
188
188 ``identifier`` is used to populate ``error.LookupError`` with an identifier
189 ``identifier`` is used to populate ``error.LookupError`` with an identifier
189 for the store.
190 for the store.
190
191
191 Raises ``error.LookupError`` on failure.
192 Raises ``error.LookupError`` on failure.
192 """
193 """
193 if isinstance(fileid, int):
194 if isinstance(fileid, int):
194 try:
195 try:
195 return store.node(fileid)
196 return store.node(fileid)
196 except IndexError:
197 except IndexError:
197 raise error.LookupError(
198 raise error.LookupError(
198 b'%d' % fileid, identifier, _(b'no match found')
199 b'%d' % fileid, identifier, _(b'no match found')
199 )
200 )
200
201
201 if len(fileid) in (20, 32):
202 if len(fileid) in (20, 32):
202 try:
203 try:
203 store.rev(fileid)
204 store.rev(fileid)
204 return fileid
205 return fileid
205 except error.LookupError:
206 except error.LookupError:
206 pass
207 pass
207
208
208 if len(fileid) in (40, 64):
209 if len(fileid) in (40, 64):
209 try:
210 try:
210 rawnode = bin(fileid)
211 rawnode = bin(fileid)
211 store.rev(rawnode)
212 store.rev(rawnode)
212 return rawnode
213 return rawnode
213 except TypeError:
214 except TypeError:
214 pass
215 pass
215
216
216 try:
217 try:
217 rev = int(fileid)
218 rev = int(fileid)
218
219
219 if b'%d' % rev != fileid:
220 if b'%d' % rev != fileid:
220 raise ValueError
221 raise ValueError
221
222
222 try:
223 try:
223 return store.node(rev)
224 return store.node(rev)
224 except (IndexError, TypeError):
225 except (IndexError, TypeError):
225 pass
226 pass
226 except (ValueError, OverflowError):
227 except (ValueError, OverflowError):
227 pass
228 pass
228
229
229 raise error.LookupError(fileid, identifier, _(b'no match found'))
230 raise error.LookupError(fileid, identifier, _(b'no match found'))
230
231
231
232
232 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
233 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
233 """Resolve information needed to strip revisions.
234 """Resolve information needed to strip revisions.
234
235
235 Finds the minimum revision number that must be stripped in order to
236 Finds the minimum revision number that must be stripped in order to
236 strip ``minlinkrev``.
237 strip ``minlinkrev``.
237
238
238 Returns a 2-tuple of the minimum revision number to do that and a set
239 Returns a 2-tuple of the minimum revision number to do that and a set
239 of all revision numbers that have linkrevs that would be broken
240 of all revision numbers that have linkrevs that would be broken
240 by that strip.
241 by that strip.
241
242
242 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
243 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
243 ``headrevs`` is an iterable of head revisions.
244 ``headrevs`` is an iterable of head revisions.
244 ``linkrevfn`` is a callable that receives a revision and returns a linked
245 ``linkrevfn`` is a callable that receives a revision and returns a linked
245 revision.
246 revision.
246 ``parentrevsfn`` is a callable that receives a revision number and returns
247 ``parentrevsfn`` is a callable that receives a revision number and returns
247 an iterable of its parent revision numbers.
248 an iterable of its parent revision numbers.
248 """
249 """
249 brokenrevs = set()
250 brokenrevs = set()
250 strippoint = tiprev + 1
251 strippoint = tiprev + 1
251
252
252 heads = {}
253 heads = {}
253 futurelargelinkrevs = set()
254 futurelargelinkrevs = set()
254 for head in headrevs:
255 for head in headrevs:
255 headlinkrev = linkrevfn(head)
256 headlinkrev = linkrevfn(head)
256 heads[head] = headlinkrev
257 heads[head] = headlinkrev
257 if headlinkrev >= minlinkrev:
258 if headlinkrev >= minlinkrev:
258 futurelargelinkrevs.add(headlinkrev)
259 futurelargelinkrevs.add(headlinkrev)
259
260
260 # This algorithm involves walking down the rev graph, starting at the
261 # This algorithm involves walking down the rev graph, starting at the
261 # heads. Since the revs are topologically sorted according to linkrev,
262 # heads. Since the revs are topologically sorted according to linkrev,
262 # once all head linkrevs are below the minlink, we know there are
263 # once all head linkrevs are below the minlink, we know there are
263 # no more revs that could have a linkrev greater than minlink.
264 # no more revs that could have a linkrev greater than minlink.
264 # So we can stop walking.
265 # So we can stop walking.
265 while futurelargelinkrevs:
266 while futurelargelinkrevs:
266 strippoint -= 1
267 strippoint -= 1
267 linkrev = heads.pop(strippoint)
268 linkrev = heads.pop(strippoint)
268
269
269 if linkrev < minlinkrev:
270 if linkrev < minlinkrev:
270 brokenrevs.add(strippoint)
271 brokenrevs.add(strippoint)
271 else:
272 else:
272 futurelargelinkrevs.remove(linkrev)
273 futurelargelinkrevs.remove(linkrev)
273
274
274 for p in parentrevsfn(strippoint):
275 for p in parentrevsfn(strippoint):
275 if p != nullrev:
276 if p != nullrev:
276 plinkrev = linkrevfn(p)
277 plinkrev = linkrevfn(p)
277 heads[p] = plinkrev
278 heads[p] = plinkrev
278 if plinkrev >= minlinkrev:
279 if plinkrev >= minlinkrev:
279 futurelargelinkrevs.add(plinkrev)
280 futurelargelinkrevs.add(plinkrev)
280
281
281 return strippoint, brokenrevs
282 return strippoint, brokenrevs
282
283
283
284
284 def emitrevisions(
285 def emitrevisions(
285 store,
286 store,
286 nodes,
287 nodes,
287 nodesorder,
288 nodesorder,
288 resultcls,
289 resultcls,
289 deltaparentfn=None,
290 deltaparentfn=None,
290 candeltafn=None,
291 candeltafn=None,
291 rawsizefn=None,
292 rawsizefn=None,
292 revdifffn=None,
293 revdifffn=None,
293 flagsfn=None,
294 flagsfn=None,
294 deltamode=repository.CG_DELTAMODE_STD,
295 deltamode=repository.CG_DELTAMODE_STD,
295 revisiondata=False,
296 revisiondata=False,
296 assumehaveparentrevisions=False,
297 assumehaveparentrevisions=False,
298 sidedata_helpers=None,
297 ):
299 ):
298 """Generic implementation of ifiledata.emitrevisions().
300 """Generic implementation of ifiledata.emitrevisions().
299
301
300 Emitting revision data is subtly complex. This function attempts to
302 Emitting revision data is subtly complex. This function attempts to
301 encapsulate all the logic for doing so in a backend-agnostic way.
303 encapsulate all the logic for doing so in a backend-agnostic way.
302
304
303 ``store``
305 ``store``
304 Object conforming to ``ifilestorage`` interface.
306 Object conforming to ``ifilestorage`` interface.
305
307
306 ``nodes``
308 ``nodes``
307 List of revision nodes whose data to emit.
309 List of revision nodes whose data to emit.
308
310
309 ``resultcls``
311 ``resultcls``
310 A type implementing the ``irevisiondelta`` interface that will be
312 A type implementing the ``irevisiondelta`` interface that will be
311 constructed and returned.
313 constructed and returned.
312
314
313 ``deltaparentfn`` (optional)
315 ``deltaparentfn`` (optional)
314 Callable receiving a revision number and returning the revision number
316 Callable receiving a revision number and returning the revision number
315 of a revision that the internal delta is stored against. This delta
317 of a revision that the internal delta is stored against. This delta
316 will be preferred over computing a new arbitrary delta.
318 will be preferred over computing a new arbitrary delta.
317
319
318 If not defined, a delta will always be computed from raw revision
320 If not defined, a delta will always be computed from raw revision
319 data.
321 data.
320
322
321 ``candeltafn`` (optional)
323 ``candeltafn`` (optional)
322 Callable receiving a pair of revision numbers that returns a bool
324 Callable receiving a pair of revision numbers that returns a bool
323 indicating whether a delta between them can be produced.
325 indicating whether a delta between them can be produced.
324
326
325 If not defined, it is assumed that any two revisions can delta with
327 If not defined, it is assumed that any two revisions can delta with
326 each other.
328 each other.
327
329
328 ``rawsizefn`` (optional)
330 ``rawsizefn`` (optional)
329 Callable receiving a revision number and returning the length of the
331 Callable receiving a revision number and returning the length of the
330 ``store.rawdata(rev)``.
332 ``store.rawdata(rev)``.
331
333
332 If not defined, ``len(store.rawdata(rev))`` will be called.
334 If not defined, ``len(store.rawdata(rev))`` will be called.
333
335
334 ``revdifffn`` (optional)
336 ``revdifffn`` (optional)
335 Callable receiving a pair of revision numbers that returns a delta
337 Callable receiving a pair of revision numbers that returns a delta
336 between them.
338 between them.
337
339
338 If not defined, a delta will be computed by invoking mdiff code
340 If not defined, a delta will be computed by invoking mdiff code
339 on ``store.revision()`` results.
341 on ``store.revision()`` results.
340
342
341 Defining this function allows a precomputed or stored delta to be
343 Defining this function allows a precomputed or stored delta to be
342 used without having to compute on.
344 used without having to compute on.
343
345
344 ``flagsfn`` (optional)
346 ``flagsfn`` (optional)
345 Callable receiving a revision number and returns the integer flags
347 Callable receiving a revision number and returns the integer flags
346 value for it. If not defined, flags value will be 0.
348 value for it. If not defined, flags value will be 0.
347
349
348 ``deltamode``
350 ``deltamode``
349 constaint on delta to be sent:
351 constaint on delta to be sent:
350 * CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,
352 * CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,
351 * CG_DELTAMODE_PREV - only delta against "prev",
353 * CG_DELTAMODE_PREV - only delta against "prev",
352 * CG_DELTAMODE_FULL - only issue full snapshot.
354 * CG_DELTAMODE_FULL - only issue full snapshot.
353
355
354 Whether to send fulltext revisions instead of deltas, if allowed.
356 Whether to send fulltext revisions instead of deltas, if allowed.
355
357
356 ``nodesorder``
358 ``nodesorder``
357 ``revisiondata``
359 ``revisiondata``
358 ``assumehaveparentrevisions``
360 ``assumehaveparentrevisions``
361 ``sidedata_helpers`` (optional)
362 If not None, means that sidedata should be included.
363 A dictionary of revlog type to tuples of `(repo, computers, removers)`:
364 * `repo` is used as an argument for computers
365 * `computers` is a list of `(category, (keys, computer)` that
366 compute the missing sidedata categories that were asked:
367 * `category` is the sidedata category
368 * `keys` are the sidedata keys to be affected
369 * `computer` is the function `(repo, store, rev, sidedata)` that
370 returns a new sidedata dict.
371 * `removers` will remove the keys corresponding to the categories
372 that are present, but not needed.
373 If both `computers` and `removers` are empty, sidedata are simply not
374 transformed.
375 Revlog types are `changelog`, `manifest` or `filelog`.
359 """
376 """
360
377
361 fnode = store.node
378 fnode = store.node
362 frev = store.rev
379 frev = store.rev
363
380
364 if nodesorder == b'nodes':
381 if nodesorder == b'nodes':
365 revs = [frev(n) for n in nodes]
382 revs = [frev(n) for n in nodes]
366 elif nodesorder == b'linear':
383 elif nodesorder == b'linear':
367 revs = {frev(n) for n in nodes}
384 revs = {frev(n) for n in nodes}
368 revs = dagop.linearize(revs, store.parentrevs)
385 revs = dagop.linearize(revs, store.parentrevs)
369 else: # storage and default
386 else: # storage and default
370 revs = sorted(frev(n) for n in nodes)
387 revs = sorted(frev(n) for n in nodes)
371
388
372 prevrev = None
389 prevrev = None
373
390
374 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
391 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
375 prevrev = store.parentrevs(revs[0])[0]
392 prevrev = store.parentrevs(revs[0])[0]
376
393
377 # Set of revs available to delta against.
394 # Set of revs available to delta against.
378 available = set()
395 available = set()
379
396
380 for rev in revs:
397 for rev in revs:
381 if rev == nullrev:
398 if rev == nullrev:
382 continue
399 continue
383
400
384 node = fnode(rev)
401 node = fnode(rev)
385 p1rev, p2rev = store.parentrevs(rev)
402 p1rev, p2rev = store.parentrevs(rev)
386
403
387 if deltaparentfn:
404 if deltaparentfn:
388 deltaparentrev = deltaparentfn(rev)
405 deltaparentrev = deltaparentfn(rev)
389 else:
406 else:
390 deltaparentrev = nullrev
407 deltaparentrev = nullrev
391
408
392 # Forced delta against previous mode.
409 # Forced delta against previous mode.
393 if deltamode == repository.CG_DELTAMODE_PREV:
410 if deltamode == repository.CG_DELTAMODE_PREV:
394 baserev = prevrev
411 baserev = prevrev
395
412
396 # We're instructed to send fulltext. Honor that.
413 # We're instructed to send fulltext. Honor that.
397 elif deltamode == repository.CG_DELTAMODE_FULL:
414 elif deltamode == repository.CG_DELTAMODE_FULL:
398 baserev = nullrev
415 baserev = nullrev
399 # We're instructed to use p1. Honor that
416 # We're instructed to use p1. Honor that
400 elif deltamode == repository.CG_DELTAMODE_P1:
417 elif deltamode == repository.CG_DELTAMODE_P1:
401 baserev = p1rev
418 baserev = p1rev
402
419
403 # There is a delta in storage. We try to use that because it
420 # There is a delta in storage. We try to use that because it
404 # amounts to effectively copying data from storage and is
421 # amounts to effectively copying data from storage and is
405 # therefore the fastest.
422 # therefore the fastest.
406 elif deltaparentrev != nullrev:
423 elif deltaparentrev != nullrev:
407 # Base revision was already emitted in this group. We can
424 # Base revision was already emitted in this group. We can
408 # always safely use the delta.
425 # always safely use the delta.
409 if deltaparentrev in available:
426 if deltaparentrev in available:
410 baserev = deltaparentrev
427 baserev = deltaparentrev
411
428
412 # Base revision is a parent that hasn't been emitted already.
429 # Base revision is a parent that hasn't been emitted already.
413 # Use it if we can assume the receiver has the parent revision.
430 # Use it if we can assume the receiver has the parent revision.
414 elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev):
431 elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev):
415 baserev = deltaparentrev
432 baserev = deltaparentrev
416
433
417 # No guarantee the receiver has the delta parent. Send delta
434 # No guarantee the receiver has the delta parent. Send delta
418 # against last revision (if possible), which in the common case
435 # against last revision (if possible), which in the common case
419 # should be similar enough to this revision that the delta is
436 # should be similar enough to this revision that the delta is
420 # reasonable.
437 # reasonable.
421 elif prevrev is not None:
438 elif prevrev is not None:
422 baserev = prevrev
439 baserev = prevrev
423 else:
440 else:
424 baserev = nullrev
441 baserev = nullrev
425
442
426 # Storage has a fulltext revision.
443 # Storage has a fulltext revision.
427
444
428 # Let's use the previous revision, which is as good a guess as any.
445 # Let's use the previous revision, which is as good a guess as any.
429 # There is definitely room to improve this logic.
446 # There is definitely room to improve this logic.
430 elif prevrev is not None:
447 elif prevrev is not None:
431 baserev = prevrev
448 baserev = prevrev
432 else:
449 else:
433 baserev = nullrev
450 baserev = nullrev
434
451
435 # But we can't actually use our chosen delta base for whatever
452 # But we can't actually use our chosen delta base for whatever
436 # reason. Reset to fulltext.
453 # reason. Reset to fulltext.
437 if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):
454 if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):
438 baserev = nullrev
455 baserev = nullrev
439
456
440 revision = None
457 revision = None
441 delta = None
458 delta = None
442 baserevisionsize = None
459 baserevisionsize = None
443
460
444 if revisiondata:
461 if revisiondata:
445 if store.iscensored(baserev) or store.iscensored(rev):
462 if store.iscensored(baserev) or store.iscensored(rev):
446 try:
463 try:
447 revision = store.rawdata(node)
464 revision = store.rawdata(node)
448 except error.CensoredNodeError as e:
465 except error.CensoredNodeError as e:
449 revision = e.tombstone
466 revision = e.tombstone
450
467
451 if baserev != nullrev:
468 if baserev != nullrev:
452 if rawsizefn:
469 if rawsizefn:
453 baserevisionsize = rawsizefn(baserev)
470 baserevisionsize = rawsizefn(baserev)
454 else:
471 else:
455 baserevisionsize = len(store.rawdata(baserev))
472 baserevisionsize = len(store.rawdata(baserev))
456
473
457 elif (
474 elif (
458 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
475 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
459 ):
476 ):
460 revision = store.rawdata(node)
477 revision = store.rawdata(node)
461 available.add(rev)
478 available.add(rev)
462 else:
479 else:
463 if revdifffn:
480 if revdifffn:
464 delta = revdifffn(baserev, rev)
481 delta = revdifffn(baserev, rev)
465 else:
482 else:
466 delta = mdiff.textdiff(
483 delta = mdiff.textdiff(
467 store.rawdata(baserev), store.rawdata(rev)
484 store.rawdata(baserev), store.rawdata(rev)
468 )
485 )
469
486
470 available.add(rev)
487 available.add(rev)
471
488
489 sidedata = None
490 if sidedata_helpers:
491 sidedata = store.sidedata(rev)
492 sidedata = run_sidedata_helpers(
493 store=store,
494 sidedata_helpers=sidedata_helpers,
495 sidedata=sidedata,
496 rev=rev,
497 )
498 sidedata = sidedatamod.serialize_sidedata(sidedata)
499
472 yield resultcls(
500 yield resultcls(
473 node=node,
501 node=node,
474 p1node=fnode(p1rev),
502 p1node=fnode(p1rev),
475 p2node=fnode(p2rev),
503 p2node=fnode(p2rev),
476 basenode=fnode(baserev),
504 basenode=fnode(baserev),
477 flags=flagsfn(rev) if flagsfn else 0,
505 flags=flagsfn(rev) if flagsfn else 0,
478 baserevisionsize=baserevisionsize,
506 baserevisionsize=baserevisionsize,
479 revision=revision,
507 revision=revision,
480 delta=delta,
508 delta=delta,
481 sidedata=sidedata,
509 sidedata=sidedata,
482 )
510 )
483
511
484 prevrev = rev
512 prevrev = rev
485
513
486
514
515 def run_sidedata_helpers(store, sidedata_helpers, sidedata, rev):
516 """Returns the sidedata for the given revision after running through
517 the given helpers.
518 - `store`: the revlog this applies to (changelog, manifest, or filelog
519 instance)
520 - `sidedata_helpers`: see `storageutil.emitrevisions`
521 - `sidedata`: previous sidedata at the given rev, if any
522 - `rev`: affected rev of `store`
523 """
524 repo, sd_computers, sd_removers = sidedata_helpers
525 kind = store.revlog_kind
526 for _keys, sd_computer in sd_computers.get(kind, []):
527 sidedata = sd_computer(repo, store, rev, sidedata)
528 for keys, _computer in sd_removers.get(kind, []):
529 for key in keys:
530 sidedata.pop(key, None)
531 return sidedata
532
533
487 def deltaiscensored(delta, baserev, baselenfn):
534 def deltaiscensored(delta, baserev, baselenfn):
488 """Determine if a delta represents censored revision data.
535 """Determine if a delta represents censored revision data.
489
536
490 ``baserev`` is the base revision this delta is encoded against.
537 ``baserev`` is the base revision this delta is encoded against.
491 ``baselenfn`` is a callable receiving a revision number that resolves the
538 ``baselenfn`` is a callable receiving a revision number that resolves the
492 length of the revision fulltext.
539 length of the revision fulltext.
493
540
494 Returns a bool indicating if the result of the delta represents a censored
541 Returns a bool indicating if the result of the delta represents a censored
495 revision.
542 revision.
496 """
543 """
497 # Fragile heuristic: unless new file meta keys are added alphabetically
544 # Fragile heuristic: unless new file meta keys are added alphabetically
498 # preceding "censored", all censored revisions are prefixed by
545 # preceding "censored", all censored revisions are prefixed by
499 # "\1\ncensored:". A delta producing such a censored revision must be a
546 # "\1\ncensored:". A delta producing such a censored revision must be a
500 # full-replacement delta, so we inspect the first and only patch in the
547 # full-replacement delta, so we inspect the first and only patch in the
501 # delta for this prefix.
548 # delta for this prefix.
502 hlen = struct.calcsize(b">lll")
549 hlen = struct.calcsize(b">lll")
503 if len(delta) <= hlen:
550 if len(delta) <= hlen:
504 return False
551 return False
505
552
506 oldlen = baselenfn(baserev)
553 oldlen = baselenfn(baserev)
507 newlen = len(delta) - hlen
554 newlen = len(delta) - hlen
508 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
555 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
509 return False
556 return False
510
557
511 add = b"\1\ncensored:"
558 add = b"\1\ncensored:"
512 addlen = len(add)
559 addlen = len(add)
513 return newlen >= addlen and delta[hlen : hlen + addlen] == add
560 return newlen >= addlen and delta[hlen : hlen + addlen] == add
@@ -1,738 +1,740 b''
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 #
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 # To use this with the test suite:
8 # To use this with the test suite:
9 #
9 #
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12
12
13 from __future__ import absolute_import
13 from __future__ import absolute_import
14
14
15 import stat
15 import stat
16
16
17 from mercurial.i18n import _
17 from mercurial.i18n import _
18 from mercurial.node import (
18 from mercurial.node import (
19 bin,
19 bin,
20 hex,
20 hex,
21 nullid,
21 nullid,
22 nullrev,
22 nullrev,
23 )
23 )
24 from mercurial.thirdparty import attr
24 from mercurial.thirdparty import attr
25 from mercurial import (
25 from mercurial import (
26 ancestor,
26 ancestor,
27 bundlerepo,
27 bundlerepo,
28 error,
28 error,
29 extensions,
29 extensions,
30 localrepo,
30 localrepo,
31 mdiff,
31 mdiff,
32 pycompat,
32 pycompat,
33 revlog,
33 revlog,
34 store,
34 store,
35 verify,
35 verify,
36 )
36 )
37 from mercurial.interfaces import (
37 from mercurial.interfaces import (
38 repository,
38 repository,
39 util as interfaceutil,
39 util as interfaceutil,
40 )
40 )
41 from mercurial.utils import (
41 from mercurial.utils import (
42 cborutil,
42 cborutil,
43 storageutil,
43 storageutil,
44 )
44 )
45 from mercurial.revlogutils import flagutil
45 from mercurial.revlogutils import flagutil
46
46
47 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
47 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
48 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
48 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
49 # be specifying the version(s) of Mercurial they are tested with, or
49 # be specifying the version(s) of Mercurial they are tested with, or
50 # leave the attribute unspecified.
50 # leave the attribute unspecified.
51 testedwith = b'ships-with-hg-core'
51 testedwith = b'ships-with-hg-core'
52
52
53 REQUIREMENT = b'testonly-simplestore'
53 REQUIREMENT = b'testonly-simplestore'
54
54
55
55
56 def validatenode(node):
56 def validatenode(node):
57 if isinstance(node, int):
57 if isinstance(node, int):
58 raise ValueError('expected node; got int')
58 raise ValueError('expected node; got int')
59
59
60 if len(node) != 20:
60 if len(node) != 20:
61 raise ValueError('expected 20 byte node')
61 raise ValueError('expected 20 byte node')
62
62
63
63
64 def validaterev(rev):
64 def validaterev(rev):
65 if not isinstance(rev, int):
65 if not isinstance(rev, int):
66 raise ValueError('expected int')
66 raise ValueError('expected int')
67
67
68
68
69 class simplestoreerror(error.StorageError):
69 class simplestoreerror(error.StorageError):
70 pass
70 pass
71
71
72
72
73 @interfaceutil.implementer(repository.irevisiondelta)
73 @interfaceutil.implementer(repository.irevisiondelta)
74 @attr.s(slots=True)
74 @attr.s(slots=True)
75 class simplestorerevisiondelta(object):
75 class simplestorerevisiondelta(object):
76 node = attr.ib()
76 node = attr.ib()
77 p1node = attr.ib()
77 p1node = attr.ib()
78 p2node = attr.ib()
78 p2node = attr.ib()
79 basenode = attr.ib()
79 basenode = attr.ib()
80 flags = attr.ib()
80 flags = attr.ib()
81 baserevisionsize = attr.ib()
81 baserevisionsize = attr.ib()
82 revision = attr.ib()
82 revision = attr.ib()
83 delta = attr.ib()
83 delta = attr.ib()
84 linknode = attr.ib(default=None)
84 linknode = attr.ib(default=None)
85
85
86
86
87 @interfaceutil.implementer(repository.iverifyproblem)
87 @interfaceutil.implementer(repository.iverifyproblem)
88 @attr.s(frozen=True)
88 @attr.s(frozen=True)
89 class simplefilestoreproblem(object):
89 class simplefilestoreproblem(object):
90 warning = attr.ib(default=None)
90 warning = attr.ib(default=None)
91 error = attr.ib(default=None)
91 error = attr.ib(default=None)
92 node = attr.ib(default=None)
92 node = attr.ib(default=None)
93
93
94
94
95 @interfaceutil.implementer(repository.ifilestorage)
95 @interfaceutil.implementer(repository.ifilestorage)
96 class filestorage(object):
96 class filestorage(object):
97 """Implements storage for a tracked path.
97 """Implements storage for a tracked path.
98
98
99 Data is stored in the VFS in a directory corresponding to the tracked
99 Data is stored in the VFS in a directory corresponding to the tracked
100 path.
100 path.
101
101
102 Index data is stored in an ``index`` file using CBOR.
102 Index data is stored in an ``index`` file using CBOR.
103
103
104 Fulltext data is stored in files having names of the node.
104 Fulltext data is stored in files having names of the node.
105 """
105 """
106
106
107 _flagserrorclass = simplestoreerror
107 _flagserrorclass = simplestoreerror
108
108
109 def __init__(self, svfs, path):
109 def __init__(self, svfs, path):
110 self._svfs = svfs
110 self._svfs = svfs
111 self._path = path
111 self._path = path
112
112
113 self._storepath = b'/'.join([b'data', path])
113 self._storepath = b'/'.join([b'data', path])
114 self._indexpath = b'/'.join([self._storepath, b'index'])
114 self._indexpath = b'/'.join([self._storepath, b'index'])
115
115
116 indexdata = self._svfs.tryread(self._indexpath)
116 indexdata = self._svfs.tryread(self._indexpath)
117 if indexdata:
117 if indexdata:
118 indexdata = cborutil.decodeall(indexdata)
118 indexdata = cborutil.decodeall(indexdata)
119
119
120 self._indexdata = indexdata or []
120 self._indexdata = indexdata or []
121 self._indexbynode = {}
121 self._indexbynode = {}
122 self._indexbyrev = {}
122 self._indexbyrev = {}
123 self._index = []
123 self._index = []
124 self._refreshindex()
124 self._refreshindex()
125
125
126 self._flagprocessors = dict(flagutil.flagprocessors)
126 self._flagprocessors = dict(flagutil.flagprocessors)
127
127
128 def _refreshindex(self):
128 def _refreshindex(self):
129 self._indexbynode.clear()
129 self._indexbynode.clear()
130 self._indexbyrev.clear()
130 self._indexbyrev.clear()
131 self._index = []
131 self._index = []
132
132
133 for i, entry in enumerate(self._indexdata):
133 for i, entry in enumerate(self._indexdata):
134 self._indexbynode[entry[b'node']] = entry
134 self._indexbynode[entry[b'node']] = entry
135 self._indexbyrev[i] = entry
135 self._indexbyrev[i] = entry
136
136
137 self._indexbynode[nullid] = {
137 self._indexbynode[nullid] = {
138 b'node': nullid,
138 b'node': nullid,
139 b'p1': nullid,
139 b'p1': nullid,
140 b'p2': nullid,
140 b'p2': nullid,
141 b'linkrev': nullrev,
141 b'linkrev': nullrev,
142 b'flags': 0,
142 b'flags': 0,
143 }
143 }
144
144
145 self._indexbyrev[nullrev] = {
145 self._indexbyrev[nullrev] = {
146 b'node': nullid,
146 b'node': nullid,
147 b'p1': nullid,
147 b'p1': nullid,
148 b'p2': nullid,
148 b'p2': nullid,
149 b'linkrev': nullrev,
149 b'linkrev': nullrev,
150 b'flags': 0,
150 b'flags': 0,
151 }
151 }
152
152
153 for i, entry in enumerate(self._indexdata):
153 for i, entry in enumerate(self._indexdata):
154 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
154 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
155
155
156 # start, length, rawsize, chainbase, linkrev, p1, p2, node
156 # start, length, rawsize, chainbase, linkrev, p1, p2, node
157 self._index.append(
157 self._index.append(
158 (0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev, entry[b'node'])
158 (0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev, entry[b'node'])
159 )
159 )
160
160
161 self._index.append((0, 0, 0, -1, -1, -1, -1, nullid))
161 self._index.append((0, 0, 0, -1, -1, -1, -1, nullid))
162
162
163 def __len__(self):
163 def __len__(self):
164 return len(self._indexdata)
164 return len(self._indexdata)
165
165
166 def __iter__(self):
166 def __iter__(self):
167 return iter(range(len(self)))
167 return iter(range(len(self)))
168
168
169 def revs(self, start=0, stop=None):
169 def revs(self, start=0, stop=None):
170 step = 1
170 step = 1
171 if stop is not None:
171 if stop is not None:
172 if start > stop:
172 if start > stop:
173 step = -1
173 step = -1
174
174
175 stop += step
175 stop += step
176 else:
176 else:
177 stop = len(self)
177 stop = len(self)
178
178
179 return range(start, stop, step)
179 return range(start, stop, step)
180
180
181 def parents(self, node):
181 def parents(self, node):
182 validatenode(node)
182 validatenode(node)
183
183
184 if node not in self._indexbynode:
184 if node not in self._indexbynode:
185 raise KeyError('unknown node')
185 raise KeyError('unknown node')
186
186
187 entry = self._indexbynode[node]
187 entry = self._indexbynode[node]
188
188
189 return entry[b'p1'], entry[b'p2']
189 return entry[b'p1'], entry[b'p2']
190
190
191 def parentrevs(self, rev):
191 def parentrevs(self, rev):
192 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
192 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
193 return self.rev(p1), self.rev(p2)
193 return self.rev(p1), self.rev(p2)
194
194
195 def rev(self, node):
195 def rev(self, node):
196 validatenode(node)
196 validatenode(node)
197
197
198 try:
198 try:
199 self._indexbynode[node]
199 self._indexbynode[node]
200 except KeyError:
200 except KeyError:
201 raise error.LookupError(node, self._indexpath, _('no node'))
201 raise error.LookupError(node, self._indexpath, _('no node'))
202
202
203 for rev, entry in self._indexbyrev.items():
203 for rev, entry in self._indexbyrev.items():
204 if entry[b'node'] == node:
204 if entry[b'node'] == node:
205 return rev
205 return rev
206
206
207 raise error.ProgrammingError(b'this should not occur')
207 raise error.ProgrammingError(b'this should not occur')
208
208
209 def node(self, rev):
209 def node(self, rev):
210 validaterev(rev)
210 validaterev(rev)
211
211
212 return self._indexbyrev[rev][b'node']
212 return self._indexbyrev[rev][b'node']
213
213
214 def hasnode(self, node):
214 def hasnode(self, node):
215 validatenode(node)
215 validatenode(node)
216 return node in self._indexbynode
216 return node in self._indexbynode
217
217
218 def censorrevision(self, tr, censornode, tombstone=b''):
218 def censorrevision(self, tr, censornode, tombstone=b''):
219 raise NotImplementedError('TODO')
219 raise NotImplementedError('TODO')
220
220
221 def lookup(self, node):
221 def lookup(self, node):
222 if isinstance(node, int):
222 if isinstance(node, int):
223 return self.node(node)
223 return self.node(node)
224
224
225 if len(node) == 20:
225 if len(node) == 20:
226 self.rev(node)
226 self.rev(node)
227 return node
227 return node
228
228
229 try:
229 try:
230 rev = int(node)
230 rev = int(node)
231 if '%d' % rev != node:
231 if '%d' % rev != node:
232 raise ValueError
232 raise ValueError
233
233
234 if rev < 0:
234 if rev < 0:
235 rev = len(self) + rev
235 rev = len(self) + rev
236 if rev < 0 or rev >= len(self):
236 if rev < 0 or rev >= len(self):
237 raise ValueError
237 raise ValueError
238
238
239 return self.node(rev)
239 return self.node(rev)
240 except (ValueError, OverflowError):
240 except (ValueError, OverflowError):
241 pass
241 pass
242
242
243 if len(node) == 40:
243 if len(node) == 40:
244 try:
244 try:
245 rawnode = bin(node)
245 rawnode = bin(node)
246 self.rev(rawnode)
246 self.rev(rawnode)
247 return rawnode
247 return rawnode
248 except TypeError:
248 except TypeError:
249 pass
249 pass
250
250
251 raise error.LookupError(node, self._path, _('invalid lookup input'))
251 raise error.LookupError(node, self._path, _('invalid lookup input'))
252
252
253 def linkrev(self, rev):
253 def linkrev(self, rev):
254 validaterev(rev)
254 validaterev(rev)
255
255
256 return self._indexbyrev[rev][b'linkrev']
256 return self._indexbyrev[rev][b'linkrev']
257
257
258 def _flags(self, rev):
258 def _flags(self, rev):
259 validaterev(rev)
259 validaterev(rev)
260
260
261 return self._indexbyrev[rev][b'flags']
261 return self._indexbyrev[rev][b'flags']
262
262
263 def _candelta(self, baserev, rev):
263 def _candelta(self, baserev, rev):
264 validaterev(baserev)
264 validaterev(baserev)
265 validaterev(rev)
265 validaterev(rev)
266
266
267 if (self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS) or (
267 if (self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS) or (
268 self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS
268 self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS
269 ):
269 ):
270 return False
270 return False
271
271
272 return True
272 return True
273
273
274 def checkhash(self, text, node, p1=None, p2=None, rev=None):
274 def checkhash(self, text, node, p1=None, p2=None, rev=None):
275 if p1 is None and p2 is None:
275 if p1 is None and p2 is None:
276 p1, p2 = self.parents(node)
276 p1, p2 = self.parents(node)
277 if node != storageutil.hashrevisionsha1(text, p1, p2):
277 if node != storageutil.hashrevisionsha1(text, p1, p2):
278 raise simplestoreerror(
278 raise simplestoreerror(
279 _("integrity check failed on %s") % self._path
279 _("integrity check failed on %s") % self._path
280 )
280 )
281
281
282 def revision(self, nodeorrev, raw=False):
282 def revision(self, nodeorrev, raw=False):
283 if isinstance(nodeorrev, int):
283 if isinstance(nodeorrev, int):
284 node = self.node(nodeorrev)
284 node = self.node(nodeorrev)
285 else:
285 else:
286 node = nodeorrev
286 node = nodeorrev
287 validatenode(node)
287 validatenode(node)
288
288
289 if node == nullid:
289 if node == nullid:
290 return b''
290 return b''
291
291
292 rev = self.rev(node)
292 rev = self.rev(node)
293 flags = self._flags(rev)
293 flags = self._flags(rev)
294
294
295 path = b'/'.join([self._storepath, hex(node)])
295 path = b'/'.join([self._storepath, hex(node)])
296 rawtext = self._svfs.read(path)
296 rawtext = self._svfs.read(path)
297
297
298 if raw:
298 if raw:
299 validatehash = flagutil.processflagsraw(self, rawtext, flags)
299 validatehash = flagutil.processflagsraw(self, rawtext, flags)
300 text = rawtext
300 text = rawtext
301 else:
301 else:
302 r = flagutil.processflagsread(self, rawtext, flags)
302 r = flagutil.processflagsread(self, rawtext, flags)
303 text, validatehash = r
303 text, validatehash = r
304 if validatehash:
304 if validatehash:
305 self.checkhash(text, node, rev=rev)
305 self.checkhash(text, node, rev=rev)
306
306
307 return text
307 return text
308
308
309 def rawdata(self, nodeorrev):
309 def rawdata(self, nodeorrev):
310 return self.revision(raw=True)
310 return self.revision(raw=True)
311
311
312 def read(self, node):
312 def read(self, node):
313 validatenode(node)
313 validatenode(node)
314
314
315 revision = self.revision(node)
315 revision = self.revision(node)
316
316
317 if not revision.startswith(b'\1\n'):
317 if not revision.startswith(b'\1\n'):
318 return revision
318 return revision
319
319
320 start = revision.index(b'\1\n', 2)
320 start = revision.index(b'\1\n', 2)
321 return revision[start + 2 :]
321 return revision[start + 2 :]
322
322
323 def renamed(self, node):
323 def renamed(self, node):
324 validatenode(node)
324 validatenode(node)
325
325
326 if self.parents(node)[0] != nullid:
326 if self.parents(node)[0] != nullid:
327 return False
327 return False
328
328
329 fulltext = self.revision(node)
329 fulltext = self.revision(node)
330 m = storageutil.parsemeta(fulltext)[0]
330 m = storageutil.parsemeta(fulltext)[0]
331
331
332 if m and 'copy' in m:
332 if m and 'copy' in m:
333 return m['copy'], bin(m['copyrev'])
333 return m['copy'], bin(m['copyrev'])
334
334
335 return False
335 return False
336
336
337 def cmp(self, node, text):
337 def cmp(self, node, text):
338 validatenode(node)
338 validatenode(node)
339
339
340 t = text
340 t = text
341
341
342 if text.startswith(b'\1\n'):
342 if text.startswith(b'\1\n'):
343 t = b'\1\n\1\n' + text
343 t = b'\1\n\1\n' + text
344
344
345 p1, p2 = self.parents(node)
345 p1, p2 = self.parents(node)
346
346
347 if storageutil.hashrevisionsha1(t, p1, p2) == node:
347 if storageutil.hashrevisionsha1(t, p1, p2) == node:
348 return False
348 return False
349
349
350 if self.iscensored(self.rev(node)):
350 if self.iscensored(self.rev(node)):
351 return text != b''
351 return text != b''
352
352
353 if self.renamed(node):
353 if self.renamed(node):
354 t2 = self.read(node)
354 t2 = self.read(node)
355 return t2 != text
355 return t2 != text
356
356
357 return True
357 return True
358
358
359 def size(self, rev):
359 def size(self, rev):
360 validaterev(rev)
360 validaterev(rev)
361
361
362 node = self._indexbyrev[rev][b'node']
362 node = self._indexbyrev[rev][b'node']
363
363
364 if self.renamed(node):
364 if self.renamed(node):
365 return len(self.read(node))
365 return len(self.read(node))
366
366
367 if self.iscensored(rev):
367 if self.iscensored(rev):
368 return 0
368 return 0
369
369
370 return len(self.revision(node))
370 return len(self.revision(node))
371
371
372 def iscensored(self, rev):
372 def iscensored(self, rev):
373 validaterev(rev)
373 validaterev(rev)
374
374
375 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
375 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
376
376
377 def commonancestorsheads(self, a, b):
377 def commonancestorsheads(self, a, b):
378 validatenode(a)
378 validatenode(a)
379 validatenode(b)
379 validatenode(b)
380
380
381 a = self.rev(a)
381 a = self.rev(a)
382 b = self.rev(b)
382 b = self.rev(b)
383
383
384 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
384 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
385 return pycompat.maplist(self.node, ancestors)
385 return pycompat.maplist(self.node, ancestors)
386
386
387 def descendants(self, revs):
387 def descendants(self, revs):
388 # This is a copy of revlog.descendants()
388 # This is a copy of revlog.descendants()
389 first = min(revs)
389 first = min(revs)
390 if first == nullrev:
390 if first == nullrev:
391 for i in self:
391 for i in self:
392 yield i
392 yield i
393 return
393 return
394
394
395 seen = set(revs)
395 seen = set(revs)
396 for i in self.revs(start=first + 1):
396 for i in self.revs(start=first + 1):
397 for x in self.parentrevs(i):
397 for x in self.parentrevs(i):
398 if x != nullrev and x in seen:
398 if x != nullrev and x in seen:
399 seen.add(i)
399 seen.add(i)
400 yield i
400 yield i
401 break
401 break
402
402
403 # Required by verify.
403 # Required by verify.
404 def files(self):
404 def files(self):
405 entries = self._svfs.listdir(self._storepath)
405 entries = self._svfs.listdir(self._storepath)
406
406
407 # Strip out undo.backup.* files created as part of transaction
407 # Strip out undo.backup.* files created as part of transaction
408 # recording.
408 # recording.
409 entries = [f for f in entries if not f.startswith('undo.backup.')]
409 entries = [f for f in entries if not f.startswith('undo.backup.')]
410
410
411 return [b'/'.join((self._storepath, f)) for f in entries]
411 return [b'/'.join((self._storepath, f)) for f in entries]
412
412
413 def storageinfo(
413 def storageinfo(
414 self,
414 self,
415 exclusivefiles=False,
415 exclusivefiles=False,
416 sharedfiles=False,
416 sharedfiles=False,
417 revisionscount=False,
417 revisionscount=False,
418 trackedsize=False,
418 trackedsize=False,
419 storedsize=False,
419 storedsize=False,
420 ):
420 ):
421 # TODO do a real implementation of this
421 # TODO do a real implementation of this
422 return {
422 return {
423 'exclusivefiles': [],
423 'exclusivefiles': [],
424 'sharedfiles': [],
424 'sharedfiles': [],
425 'revisionscount': len(self),
425 'revisionscount': len(self),
426 'trackedsize': 0,
426 'trackedsize': 0,
427 'storedsize': None,
427 'storedsize': None,
428 }
428 }
429
429
430 def verifyintegrity(self, state):
430 def verifyintegrity(self, state):
431 state['skipread'] = set()
431 state['skipread'] = set()
432 for rev in self:
432 for rev in self:
433 node = self.node(rev)
433 node = self.node(rev)
434 try:
434 try:
435 self.revision(node)
435 self.revision(node)
436 except Exception as e:
436 except Exception as e:
437 yield simplefilestoreproblem(
437 yield simplefilestoreproblem(
438 error='unpacking %s: %s' % (node, e), node=node
438 error='unpacking %s: %s' % (node, e), node=node
439 )
439 )
440 state['skipread'].add(node)
440 state['skipread'].add(node)
441
441
442 def emitrevisions(
442 def emitrevisions(
443 self,
443 self,
444 nodes,
444 nodes,
445 nodesorder=None,
445 nodesorder=None,
446 revisiondata=False,
446 revisiondata=False,
447 assumehaveparentrevisions=False,
447 assumehaveparentrevisions=False,
448 deltamode=repository.CG_DELTAMODE_STD,
448 deltamode=repository.CG_DELTAMODE_STD,
449 sidedata_helpers=None,
449 ):
450 ):
450 # TODO this will probably break on some ordering options.
451 # TODO this will probably break on some ordering options.
451 nodes = [n for n in nodes if n != nullid]
452 nodes = [n for n in nodes if n != nullid]
452 if not nodes:
453 if not nodes:
453 return
454 return
454 for delta in storageutil.emitrevisions(
455 for delta in storageutil.emitrevisions(
455 self,
456 self,
456 nodes,
457 nodes,
457 nodesorder,
458 nodesorder,
458 simplestorerevisiondelta,
459 simplestorerevisiondelta,
459 revisiondata=revisiondata,
460 revisiondata=revisiondata,
460 assumehaveparentrevisions=assumehaveparentrevisions,
461 assumehaveparentrevisions=assumehaveparentrevisions,
461 deltamode=deltamode,
462 deltamode=deltamode,
463 sidedata_helpers=sidedata_helpers,
462 ):
464 ):
463 yield delta
465 yield delta
464
466
465 def add(self, text, meta, transaction, linkrev, p1, p2):
467 def add(self, text, meta, transaction, linkrev, p1, p2):
466 if meta or text.startswith(b'\1\n'):
468 if meta or text.startswith(b'\1\n'):
467 text = storageutil.packmeta(meta, text)
469 text = storageutil.packmeta(meta, text)
468
470
469 return self.addrevision(text, transaction, linkrev, p1, p2)
471 return self.addrevision(text, transaction, linkrev, p1, p2)
470
472
471 def addrevision(
473 def addrevision(
472 self,
474 self,
473 text,
475 text,
474 transaction,
476 transaction,
475 linkrev,
477 linkrev,
476 p1,
478 p1,
477 p2,
479 p2,
478 node=None,
480 node=None,
479 flags=revlog.REVIDX_DEFAULT_FLAGS,
481 flags=revlog.REVIDX_DEFAULT_FLAGS,
480 cachedelta=None,
482 cachedelta=None,
481 ):
483 ):
482 validatenode(p1)
484 validatenode(p1)
483 validatenode(p2)
485 validatenode(p2)
484
486
485 if flags:
487 if flags:
486 node = node or storageutil.hashrevisionsha1(text, p1, p2)
488 node = node or storageutil.hashrevisionsha1(text, p1, p2)
487
489
488 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
490 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
489
491
490 node = node or storageutil.hashrevisionsha1(text, p1, p2)
492 node = node or storageutil.hashrevisionsha1(text, p1, p2)
491
493
492 if node in self._indexbynode:
494 if node in self._indexbynode:
493 return node
495 return node
494
496
495 if validatehash:
497 if validatehash:
496 self.checkhash(rawtext, node, p1=p1, p2=p2)
498 self.checkhash(rawtext, node, p1=p1, p2=p2)
497
499
498 return self._addrawrevision(
500 return self._addrawrevision(
499 node, rawtext, transaction, linkrev, p1, p2, flags
501 node, rawtext, transaction, linkrev, p1, p2, flags
500 )
502 )
501
503
502 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
504 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
503 transaction.addbackup(self._indexpath)
505 transaction.addbackup(self._indexpath)
504
506
505 path = b'/'.join([self._storepath, hex(node)])
507 path = b'/'.join([self._storepath, hex(node)])
506
508
507 self._svfs.write(path, rawtext)
509 self._svfs.write(path, rawtext)
508
510
509 self._indexdata.append(
511 self._indexdata.append(
510 {
512 {
511 b'node': node,
513 b'node': node,
512 b'p1': p1,
514 b'p1': p1,
513 b'p2': p2,
515 b'p2': p2,
514 b'linkrev': link,
516 b'linkrev': link,
515 b'flags': flags,
517 b'flags': flags,
516 }
518 }
517 )
519 )
518
520
519 self._reflectindexupdate()
521 self._reflectindexupdate()
520
522
521 return node
523 return node
522
524
523 def _reflectindexupdate(self):
525 def _reflectindexupdate(self):
524 self._refreshindex()
526 self._refreshindex()
525 self._svfs.write(
527 self._svfs.write(
526 self._indexpath, ''.join(cborutil.streamencode(self._indexdata))
528 self._indexpath, ''.join(cborutil.streamencode(self._indexdata))
527 )
529 )
528
530
529 def addgroup(
531 def addgroup(
530 self,
532 self,
531 deltas,
533 deltas,
532 linkmapper,
534 linkmapper,
533 transaction,
535 transaction,
534 addrevisioncb=None,
536 addrevisioncb=None,
535 duplicaterevisioncb=None,
537 duplicaterevisioncb=None,
536 maybemissingparents=False,
538 maybemissingparents=False,
537 ):
539 ):
538 if maybemissingparents:
540 if maybemissingparents:
539 raise error.Abort(
541 raise error.Abort(
540 _('simple store does not support missing parents ' 'write mode')
542 _('simple store does not support missing parents ' 'write mode')
541 )
543 )
542
544
543 empty = True
545 empty = True
544
546
545 transaction.addbackup(self._indexpath)
547 transaction.addbackup(self._indexpath)
546
548
547 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
549 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
548 linkrev = linkmapper(linknode)
550 linkrev = linkmapper(linknode)
549 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
551 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
550
552
551 if node in self._indexbynode:
553 if node in self._indexbynode:
552 if duplicaterevisioncb:
554 if duplicaterevisioncb:
553 duplicaterevisioncb(self, self.rev(node))
555 duplicaterevisioncb(self, self.rev(node))
554 empty = False
556 empty = False
555 continue
557 continue
556
558
557 # Need to resolve the fulltext from the delta base.
559 # Need to resolve the fulltext from the delta base.
558 if deltabase == nullid:
560 if deltabase == nullid:
559 text = mdiff.patch(b'', delta)
561 text = mdiff.patch(b'', delta)
560 else:
562 else:
561 text = mdiff.patch(self.revision(deltabase), delta)
563 text = mdiff.patch(self.revision(deltabase), delta)
562
564
563 rev = self._addrawrevision(
565 rev = self._addrawrevision(
564 node, text, transaction, linkrev, p1, p2, flags
566 node, text, transaction, linkrev, p1, p2, flags
565 )
567 )
566
568
567 if addrevisioncb:
569 if addrevisioncb:
568 addrevisioncb(self, rev)
570 addrevisioncb(self, rev)
569 empty = False
571 empty = False
570 return not empty
572 return not empty
571
573
572 def _headrevs(self):
574 def _headrevs(self):
573 # Assume all revisions are heads by default.
575 # Assume all revisions are heads by default.
574 revishead = {rev: True for rev in self._indexbyrev}
576 revishead = {rev: True for rev in self._indexbyrev}
575
577
576 for rev, entry in self._indexbyrev.items():
578 for rev, entry in self._indexbyrev.items():
577 # Unset head flag for all seen parents.
579 # Unset head flag for all seen parents.
578 revishead[self.rev(entry[b'p1'])] = False
580 revishead[self.rev(entry[b'p1'])] = False
579 revishead[self.rev(entry[b'p2'])] = False
581 revishead[self.rev(entry[b'p2'])] = False
580
582
581 return [rev for rev, ishead in sorted(revishead.items()) if ishead]
583 return [rev for rev, ishead in sorted(revishead.items()) if ishead]
582
584
583 def heads(self, start=None, stop=None):
585 def heads(self, start=None, stop=None):
584 # This is copied from revlog.py.
586 # This is copied from revlog.py.
585 if start is None and stop is None:
587 if start is None and stop is None:
586 if not len(self):
588 if not len(self):
587 return [nullid]
589 return [nullid]
588 return [self.node(r) for r in self._headrevs()]
590 return [self.node(r) for r in self._headrevs()]
589
591
590 if start is None:
592 if start is None:
591 start = nullid
593 start = nullid
592 if stop is None:
594 if stop is None:
593 stop = []
595 stop = []
594 stoprevs = {self.rev(n) for n in stop}
596 stoprevs = {self.rev(n) for n in stop}
595 startrev = self.rev(start)
597 startrev = self.rev(start)
596 reachable = {startrev}
598 reachable = {startrev}
597 heads = {startrev}
599 heads = {startrev}
598
600
599 parentrevs = self.parentrevs
601 parentrevs = self.parentrevs
600 for r in self.revs(start=startrev + 1):
602 for r in self.revs(start=startrev + 1):
601 for p in parentrevs(r):
603 for p in parentrevs(r):
602 if p in reachable:
604 if p in reachable:
603 if r not in stoprevs:
605 if r not in stoprevs:
604 reachable.add(r)
606 reachable.add(r)
605 heads.add(r)
607 heads.add(r)
606 if p in heads and p not in stoprevs:
608 if p in heads and p not in stoprevs:
607 heads.remove(p)
609 heads.remove(p)
608
610
609 return [self.node(r) for r in heads]
611 return [self.node(r) for r in heads]
610
612
611 def children(self, node):
613 def children(self, node):
612 validatenode(node)
614 validatenode(node)
613
615
614 # This is a copy of revlog.children().
616 # This is a copy of revlog.children().
615 c = []
617 c = []
616 p = self.rev(node)
618 p = self.rev(node)
617 for r in self.revs(start=p + 1):
619 for r in self.revs(start=p + 1):
618 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
620 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
619 if prevs:
621 if prevs:
620 for pr in prevs:
622 for pr in prevs:
621 if pr == p:
623 if pr == p:
622 c.append(self.node(r))
624 c.append(self.node(r))
623 elif p == nullrev:
625 elif p == nullrev:
624 c.append(self.node(r))
626 c.append(self.node(r))
625 return c
627 return c
626
628
627 def getstrippoint(self, minlink):
629 def getstrippoint(self, minlink):
628 return storageutil.resolvestripinfo(
630 return storageutil.resolvestripinfo(
629 minlink,
631 minlink,
630 len(self) - 1,
632 len(self) - 1,
631 self._headrevs(),
633 self._headrevs(),
632 self.linkrev,
634 self.linkrev,
633 self.parentrevs,
635 self.parentrevs,
634 )
636 )
635
637
636 def strip(self, minlink, transaction):
638 def strip(self, minlink, transaction):
637 if not len(self):
639 if not len(self):
638 return
640 return
639
641
640 rev, _ignored = self.getstrippoint(minlink)
642 rev, _ignored = self.getstrippoint(minlink)
641 if rev == len(self):
643 if rev == len(self):
642 return
644 return
643
645
644 # Purge index data starting at the requested revision.
646 # Purge index data starting at the requested revision.
645 self._indexdata[rev:] = []
647 self._indexdata[rev:] = []
646 self._reflectindexupdate()
648 self._reflectindexupdate()
647
649
648
650
649 def issimplestorefile(f, kind, st):
651 def issimplestorefile(f, kind, st):
650 if kind != stat.S_IFREG:
652 if kind != stat.S_IFREG:
651 return False
653 return False
652
654
653 if store.isrevlog(f, kind, st):
655 if store.isrevlog(f, kind, st):
654 return False
656 return False
655
657
656 # Ignore transaction undo files.
658 # Ignore transaction undo files.
657 if f.startswith('undo.'):
659 if f.startswith('undo.'):
658 return False
660 return False
659
661
660 # Otherwise assume it belongs to the simple store.
662 # Otherwise assume it belongs to the simple store.
661 return True
663 return True
662
664
663
665
664 class simplestore(store.encodedstore):
666 class simplestore(store.encodedstore):
665 def datafiles(self):
667 def datafiles(self):
666 for x in super(simplestore, self).datafiles():
668 for x in super(simplestore, self).datafiles():
667 yield x
669 yield x
668
670
669 # Supplement with non-revlog files.
671 # Supplement with non-revlog files.
670 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
672 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
671
673
672 for unencoded, encoded, size in extrafiles:
674 for unencoded, encoded, size in extrafiles:
673 try:
675 try:
674 unencoded = store.decodefilename(unencoded)
676 unencoded = store.decodefilename(unencoded)
675 except KeyError:
677 except KeyError:
676 unencoded = None
678 unencoded = None
677
679
678 yield unencoded, encoded, size
680 yield unencoded, encoded, size
679
681
680
682
681 def reposetup(ui, repo):
683 def reposetup(ui, repo):
682 if not repo.local():
684 if not repo.local():
683 return
685 return
684
686
685 if isinstance(repo, bundlerepo.bundlerepository):
687 if isinstance(repo, bundlerepo.bundlerepository):
686 raise error.Abort(_('cannot use simple store with bundlerepo'))
688 raise error.Abort(_('cannot use simple store with bundlerepo'))
687
689
688 class simplestorerepo(repo.__class__):
690 class simplestorerepo(repo.__class__):
689 def file(self, f):
691 def file(self, f):
690 return filestorage(self.svfs, f)
692 return filestorage(self.svfs, f)
691
693
692 repo.__class__ = simplestorerepo
694 repo.__class__ = simplestorerepo
693
695
694
696
695 def featuresetup(ui, supported):
697 def featuresetup(ui, supported):
696 supported.add(REQUIREMENT)
698 supported.add(REQUIREMENT)
697
699
698
700
699 def newreporequirements(orig, ui, createopts):
701 def newreporequirements(orig, ui, createopts):
700 """Modifies default requirements for new repos to use the simple store."""
702 """Modifies default requirements for new repos to use the simple store."""
701 requirements = orig(ui, createopts)
703 requirements = orig(ui, createopts)
702
704
703 # These requirements are only used to affect creation of the store
705 # These requirements are only used to affect creation of the store
704 # object. We have our own store. So we can remove them.
706 # object. We have our own store. So we can remove them.
705 # TODO do this once we feel like taking the test hit.
707 # TODO do this once we feel like taking the test hit.
706 # if 'fncache' in requirements:
708 # if 'fncache' in requirements:
707 # requirements.remove('fncache')
709 # requirements.remove('fncache')
708 # if 'dotencode' in requirements:
710 # if 'dotencode' in requirements:
709 # requirements.remove('dotencode')
711 # requirements.remove('dotencode')
710
712
711 requirements.add(REQUIREMENT)
713 requirements.add(REQUIREMENT)
712
714
713 return requirements
715 return requirements
714
716
715
717
716 def makestore(orig, requirements, path, vfstype):
718 def makestore(orig, requirements, path, vfstype):
717 if REQUIREMENT not in requirements:
719 if REQUIREMENT not in requirements:
718 return orig(requirements, path, vfstype)
720 return orig(requirements, path, vfstype)
719
721
720 return simplestore(path, vfstype)
722 return simplestore(path, vfstype)
721
723
722
724
723 def verifierinit(orig, self, *args, **kwargs):
725 def verifierinit(orig, self, *args, **kwargs):
724 orig(self, *args, **kwargs)
726 orig(self, *args, **kwargs)
725
727
726 # We don't care that files in the store don't align with what is
728 # We don't care that files in the store don't align with what is
727 # advertised. So suppress these warnings.
729 # advertised. So suppress these warnings.
728 self.warnorphanstorefiles = False
730 self.warnorphanstorefiles = False
729
731
730
732
731 def extsetup(ui):
733 def extsetup(ui):
732 localrepo.featuresetupfuncs.add(featuresetup)
734 localrepo.featuresetupfuncs.add(featuresetup)
733
735
734 extensions.wrapfunction(
736 extensions.wrapfunction(
735 localrepo, 'newreporequirements', newreporequirements
737 localrepo, 'newreporequirements', newreporequirements
736 )
738 )
737 extensions.wrapfunction(localrepo, 'makestore', makestore)
739 extensions.wrapfunction(localrepo, 'makestore', makestore)
738 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
740 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
General Comments 0
You need to be logged in to leave comments. Login now